Discussion:
[Qemu-devel] [PATCH v9 2/7] i386: Add new property to control cache info
Babu Moger
2018-05-14 16:41:51 UTC
Permalink
The property legacy-cache will be used to control the cache information.
If user passes "-cpu legacy-cache" then older information will
be displayed even if the hardware supports new information. Otherwise
use the statically loaded cache definitions if available.

Renamed the previous cache structures to legacy_*. If there is any change in
the cache information, then it needs to be initialized in builtin_x86_defs.

Signed-off-by: Babu Moger <***@amd.com>
Tested-by: Geoffrey McRae <***@hostfission.com>
---
include/hw/i386/pc.h | 5 +++
target/i386/cpu.c | 97 ++++++++++++++++++++++++++++++++------------
target/i386/cpu.h | 5 +++
3 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 69fced9aea..df15deefca 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -306,6 +306,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);

#define PC_COMPAT_2_12 \
HW_COMPAT_2_12 \
+ {\
+ .driver = TYPE_X86_CPU,\
+ .property = "legacy-cache",\
+ .value = "on",\
+ },

#define PC_COMPAT_2_11 \
HW_COMPAT_2_11 \
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f9ebfcfe79..e1daff37ab 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -336,10 +336,14 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
}
}

-/* Definitions of the hardcoded cache entries we expose: */
+/*
+ * Definitions of the hardcoded cache entries we expose:
+ * These are legacy cache values. If there is a need to change any
+ * of these values please use builtin_x86_defs
+ */

/* L1 data cache: */
-static CPUCacheInfo l1d_cache = {
+static CPUCacheInfo legacy_l1d_cache = {
.type = DCACHE,
.level = 1,
.size = 32 * KiB,
@@ -352,7 +356,7 @@ static CPUCacheInfo l1d_cache = {
};

/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo l1d_cache_amd = {
+static CPUCacheInfo legacy_l1d_cache_amd = {
.type = DCACHE,
.level = 1,
.size = 64 * KiB,
@@ -366,7 +370,7 @@ static CPUCacheInfo l1d_cache_amd = {
};

/* L1 instruction cache: */
-static CPUCacheInfo l1i_cache = {
+static CPUCacheInfo legacy_l1i_cache = {
.type = ICACHE,
.level = 1,
.size = 32 * KiB,
@@ -379,7 +383,7 @@ static CPUCacheInfo l1i_cache = {
};

/*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo l1i_cache_amd = {
+static CPUCacheInfo legacy_l1i_cache_amd = {
.type = ICACHE,
.level = 1,
.size = 64 * KiB,
@@ -393,7 +397,7 @@ static CPUCacheInfo l1i_cache_amd = {
};

/* Level 2 unified cache: */
-static CPUCacheInfo l2_cache = {
+static CPUCacheInfo legacy_l2_cache = {
.type = UNIFIED_CACHE,
.level = 2,
.size = 4 * MiB,
@@ -406,7 +410,7 @@ static CPUCacheInfo l2_cache = {
};

/*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */
-static CPUCacheInfo l2_cache_cpuid2 = {
+static CPUCacheInfo legacy_l2_cache_cpuid2 = {
.type = UNIFIED_CACHE,
.level = 2,
.size = 2 * MiB,
@@ -416,7 +420,7 @@ static CPUCacheInfo l2_cache_cpuid2 = {


/*FIXME: CPUID leaf 0x80000006 is inconsistent with leaves 2 & 4 */
-static CPUCacheInfo l2_cache_amd = {
+static CPUCacheInfo legacy_l2_cache_amd = {
.type = UNIFIED_CACHE,
.level = 2,
.size = 512 * KiB,
@@ -428,7 +432,7 @@ static CPUCacheInfo l2_cache_amd = {
};

/* Level 3 unified cache: */
-static CPUCacheInfo l3_cache = {
+static CPUCacheInfo legacy_l3_cache = {
.type = UNIFIED_CACHE,
.level = 3,
.size = 16 * MiB,
@@ -3337,6 +3341,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
env->features[w] = def->features[w];
}

+ /* Store Cache information from the X86CPUDefinition if available */
+ env->cache_info = def->cache_info;
+ cpu->legacy_cache = def->cache_info ? 0 : 1;
+
/* Special cases not set in the X86CPUDefinition structs: */
/* TODO: in-kernel irqchip for hvf */
if (kvm_enabled()) {
@@ -3686,11 +3694,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (!cpu->enable_l3_cache) {
*ecx = 0;
} else {
- *ecx = cpuid2_cache_descriptor(&l3_cache);
+ if (env->cache_info && !cpu->legacy_cache) {
+ *ecx = cpuid2_cache_descriptor(&env->cache_info->l3_cache);
+ } else {
+ *ecx = cpuid2_cache_descriptor(&legacy_l3_cache);
+ }
+ }
+ if (env->cache_info && !cpu->legacy_cache) {
+ *edx = (cpuid2_cache_descriptor(&env->cache_info->l1d_cache) << 16) |
+ (cpuid2_cache_descriptor(&env->cache_info->l1i_cache) << 8) |
+ (cpuid2_cache_descriptor(&env->cache_info->l2_cache));
+ } else {
+ *edx = (cpuid2_cache_descriptor(&legacy_l1d_cache) << 16) |
+ (cpuid2_cache_descriptor(&legacy_l1i_cache) << 8) |
+ (cpuid2_cache_descriptor(&legacy_l2_cache_cpuid2));
}
- *edx = (cpuid2_cache_descriptor(&l1d_cache) << 16) |
- (cpuid2_cache_descriptor(&l1i_cache) << 8) |
- (cpuid2_cache_descriptor(&l2_cache_cpuid2));
break;
case 4:
/* cache info: needed for Core compatibility */
@@ -3703,27 +3721,35 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
} else {
*eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache;
+ l1i = &legacy_l1i_cache;
+ l2 = &legacy_l2_cache;
+ l3 = &legacy_l3_cache;
+ }
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid4(&l1d_cache,
- 1, cs->nr_cores,
+ encode_cache_cpuid4(l1d, 1, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
- encode_cache_cpuid4(&l1i_cache,
- 1, cs->nr_cores,
+ encode_cache_cpuid4(l1i, 1, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
- encode_cache_cpuid4(&l2_cache,
- cs->nr_threads, cs->nr_cores,
+ encode_cache_cpuid4(l2, cs->nr_threads, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads);
if (cpu->enable_l3_cache) {
- encode_cache_cpuid4(&l3_cache,
- (1 << pkg_offset), cs->nr_cores,
+ encode_cache_cpuid4(l3, (1 << pkg_offset), cs->nr_cores,
eax, ebx, ecx, edx);
break;
}
@@ -3936,8 +3962,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
(L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES);
*ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \
(L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES);
- *ecx = encode_cache_cpuid80000005(&l1d_cache_amd);
- *edx = encode_cache_cpuid80000005(&l1i_cache_amd);
+ if (env->cache_info && !cpu->legacy_cache) {
+ *ecx = encode_cache_cpuid80000005(&env->cache_info->l1d_cache);
+ *edx = encode_cache_cpuid80000005(&env->cache_info->l1i_cache);
+ } else {
+ *ecx = encode_cache_cpuid80000005(&legacy_l1d_cache_amd);
+ *edx = encode_cache_cpuid80000005(&legacy_l1i_cache_amd);
+ }
break;
case 0x80000006:
/* cache info (L2 cache) */
@@ -3953,9 +3984,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
(L2_DTLB_4K_ENTRIES << 16) | \
(AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \
(L2_ITLB_4K_ENTRIES);
- encode_cache_cpuid80000006(&l2_cache_amd,
- cpu->enable_l3_cache ? &l3_cache : NULL,
- ecx, edx);
+ if (env->cache_info && !cpu->legacy_cache) {
+ encode_cache_cpuid80000006(&env->cache_info->l2_cache,
+ cpu->enable_l3_cache ?
+ &env->cache_info->l3_cache : NULL,
+ ecx, edx);
+ } else {
+ encode_cache_cpuid80000006(&legacy_l2_cache_amd,
+ cpu->enable_l3_cache ?
+ &legacy_l3_cache : NULL,
+ ecx, edx);
+ }
break;
case 0x80000007:
*eax = 0;
@@ -5133,6 +5172,12 @@ static Property x86_cpu_properties[] = {
false),
DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
+ /*
+ * lecacy_cache defaults to CPU model being chosen. This is set in
+ * x86_cpu_load_def based on cache_info which is initialized in
+ * builtin_x86_defs
+ */
+ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false),

/*
* From "Requirements for Implementing the Microsoft
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 318e66108a..1f9ccd47d9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1395,6 +1395,11 @@ struct X86CPU {
*/
bool enable_l3_cache;

+ /* Compatibility bits for old machine types.
+ * If true present the old cache topology information
+ */
+ bool legacy_cache;
+
/* Compatibility bits for old machine types: */
bool enable_cpuid_0xb;
--
2.17.0
Babu Moger
2018-05-14 16:41:50 UTC
Permalink
Add pc-q35-2.13 and pc-i440fx-2.13 machine types

Signed-off-by: Babu Moger <***@amd.com>
---
hw/i386/pc_piix.c | 15 ++++++++++++---
hw/i386/pc_q35.c | 13 +++++++++++--
include/hw/i386/pc.h | 3 +++
3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 729a0508aa..e36c7bbb40 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -425,21 +425,30 @@ static void pc_i440fx_machine_options(MachineClass *m)
m->default_display = "std";
}

-static void pc_i440fx_2_12_machine_options(MachineClass *m)
+static void pc_i440fx_2_13_machine_options(MachineClass *m)
{
pc_i440fx_machine_options(m);
m->alias = "pc";
m->is_default = 1;
}

+DEFINE_I440FX_MACHINE(v2_13, "pc-i440fx-2.13", NULL,
+ pc_i440fx_2_13_machine_options);
+
+static void pc_i440fx_2_12_machine_options(MachineClass *m)
+{
+ pc_i440fx_2_13_machine_options(m);
+ m->is_default = 0;
+ m->alias = NULL;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12);
+}
+
DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL,
pc_i440fx_2_12_machine_options);

static void pc_i440fx_2_11_machine_options(MachineClass *m)
{
pc_i440fx_2_12_machine_options(m);
- m->is_default = 0;
- m->alias = NULL;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_11);
}

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 9ae916327e..2372457c6a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -308,12 +308,22 @@ static void pc_q35_machine_options(MachineClass *m)
m->max_cpus = 288;
}

-static void pc_q35_2_12_machine_options(MachineClass *m)
+static void pc_q35_2_13_machine_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
}

+DEFINE_Q35_MACHINE(v2_13, "pc-q35-2.13", NULL,
+ pc_q35_2_13_machine_options);
+
+static void pc_q35_2_12_machine_options(MachineClass *m)
+{
+ pc_q35_2_13_machine_options(m);
+ m->alias = NULL;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12);
+}
+
DEFINE_Q35_MACHINE(v2_12, "pc-q35-2.12", NULL,
pc_q35_2_12_machine_options);

@@ -323,7 +333,6 @@ static void pc_q35_2_11_machine_options(MachineClass *m)

pc_q35_2_12_machine_options(m);
pcmc->default_nic_model = "e1000";
- m->alias = NULL;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_11);
}

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 2e834e6ded..69fced9aea 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -304,6 +304,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);

+#define PC_COMPAT_2_12 \
+ HW_COMPAT_2_12 \
+
#define PC_COMPAT_2_11 \
HW_COMPAT_2_11 \
{\
--
2.17.0
Eduardo Habkost
2018-05-14 19:25:21 UTC
Permalink
Post by Babu Moger
Add pc-q35-2.13 and pc-i440fx-2.13 machine types
Reviewed-by: Eduardo Habkost <***@redhat.com>

Queued, thanks.
--
Eduardo
Babu Moger
2018-05-14 16:41:54 UTC
Permalink
Populate threads/core_id/apic_ids/socket_id when CPUID_EXT3_TOPOEXT
feature is supported. This is required to support hyperthreading feature
on AMD CPUs. This is supported via CPUID_8000_001E extended functions.

Signed-off-by: Babu Moger <***@amd.com>
Tested-by: Geoffrey McRae <***@hostfission.com>
---
include/hw/i386/topology.h | 6 ++++++
target/i386/cpu.c | 8 ++++++++
2 files changed, 14 insertions(+)

diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
index 1ebaee0f76..4af746e50c 100644
--- a/include/hw/i386/topology.h
+++ b/include/hw/i386/topology.h
@@ -145,4 +145,10 @@ static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores,
return apicid_from_topo_ids(nr_cores, nr_threads, &topo);
}

+/* Definitions used on CPUID Leaf 0x8000001E */
+#define EXTENDED_APIC_ID(threads, socket_id, core_id, thread_id) \
+ ((threads) ? \
+ ((socket_id << 6) | (core_id << 1) | thread_id) : \
+ ((socket_id << 6) | core_id))
+
#endif /* HW_I386_TOPOLOGY_H */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c06a9f5ebe..18a7872b7d 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4071,6 +4071,14 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
}
break;
+ case 0x8000001E:
+ assert(cpu->core_id <= 255);
+ *eax = EXTENDED_APIC_ID((cs->nr_threads - 1),
+ cpu->socket_id, cpu->core_id, cpu->thread_id);
+ *ebx = (cs->nr_threads - 1) << 8 | cpu->core_id;
+ *ecx = cpu->socket_id;
+ *edx = 0;
+ break;
case 0xC0000000:
*eax = env->cpuid_xlevel2;
*ebx = 0;
--
2.17.0
Babu Moger
2018-05-14 16:41:56 UTC
Permalink
Remove generic non-intel check while validating hyperthreading support.
Certain AMD CPUs can support hyperthreading now.

CPU family with TOPOEXT feature can support hyperthreading now.

Signed-off-by: Babu Moger <***@amd.com>
Tested-by: Geoffrey McRae <***@hostfission.com>
Reviewed-by: Eduardo Habkost <***@redhat.com>
---
target/i386/cpu.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index de2749ae81..d28b51c0a3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4821,17 +4821,20 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)

qemu_init_vcpu(cs);

- /* Only Intel CPUs support hyperthreading. Even though QEMU fixes this
- * issue by adjusting CPUID_0000_0001_EBX and CPUID_8000_0008_ECX
- * based on inputs (sockets,cores,threads), it is still better to gives
+ /* Most Intel and certain AMD CPUs support hyperthreading. Even though QEMU
+ * fixes this issue by adjusting CPUID_0000_0001_EBX and CPUID_8000_0008_ECX
+ * based on inputs (sockets,cores,threads), it is still better to give
* users a warning.
*
* NOTE: the following code has to follow qemu_init_vcpu(). Otherwise
* cs->nr_threads hasn't be populated yet and the checking is incorrect.
*/
- if (!IS_INTEL_CPU(env) && cs->nr_threads > 1 && !ht_warned) {
- error_report("AMD CPU doesn't support hyperthreading. Please configure"
- " -smp options properly.");
+ if (IS_AMD_CPU(env) &&
+ !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
+ cs->nr_threads > 1 && !ht_warned) {
+ error_report("This family of AMD CPU doesn't support "
+ "hyperthreading. Please configure -smp "
+ "options properly.");
ht_warned = true;
}
--
2.17.0
Babu Moger
2018-05-14 16:41:52 UTC
Permalink
Add information for cpuid 0x8000001D leaf. Populate cache topology information
for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
Family 17h Model for more details.

Signed-off-by: Babu Moger <***@amd.com>
Tested-by: Geoffrey McRae <***@hostfission.com>
---
target/i386/cpu.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++
target/i386/kvm.c | 29 +++++++++++++++--
2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
a == ASSOC_FULL ? 0xF : \
0 /* invalid value */)

+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX 4
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads) ((threads > 1) ? \
+ (((CORES_IN_CMPLX - 1) * threads) + 1) : \
+ (CORES_IN_CMPLX - 1))
+
/*
* Encode cache info for CPUID[0x80000006].ECX and CPUID[0x80000006].EDX
* @l3 can be NULL.
@@ -336,6 +344,41 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
}
}

+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int nr_threads,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ assert(cache->size == cache->line_size * cache->associativity *
+ cache->partitions * cache->sets);
+
+ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+ (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+ /* L3 is shared among multiple cores */
+ if (cache->level == 3) {
+ *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+ } else {
+ *eax |= ((nr_threads - 1) << 14);
+ }
+
+ assert(cache->line_size > 0);
+ assert(cache->partitions > 0);
+ assert(cache->associativity > 0);
+ /* We don't implement fully-associative caches */
+ assert(cache->associativity < cache->sets);
+ *ebx = (cache->line_size - 1) |
+ ((cache->partitions - 1) << 12) |
+ ((cache->associativity - 1) << 22);
+
+ assert(cache->sets > 0);
+ *ecx = cache->sets - 1;
+
+ *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
+ (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+ (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
/*
* Definitions of the hardcoded cache entries we expose:
* These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = 0;
}
break;
+ case 0x8000001D:
+ *eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache_amd;
+ l1i = &legacy_l1i_cache_amd;
+ l2 = &legacy_l2_cache_amd;
+ l3 = &legacy_l3_cache;
+ }
+ switch (count) {
+ case 0: /* L1 dcache info */
+ encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 1: /* L1 icache info */
+ encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 2: /* L2 cache info */
+ encode_cache_cpuid8000001d(l2, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 3: /* L3 cache info */
+ encode_cache_cpuid8000001d(l3, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ default: /* end of info */
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+ break;
case 0xC0000000:
*eax = env->cpuid_xlevel2;
*ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
c = &cpuid_data.entries[cpuid_i++];

- c->function = i;
- c->flags = 0;
- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ switch (i) {
+ case 0x8000001d:
+ /* Query for all AMD cache information leaves */
+ for (j = 0; ; j++) {
+ c->function = i;
+ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+ if (c->eax == 0) {
+ break;
+ }
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ default:
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
}

/* Call Centaur's CPUID instructions they are supported. */
--
2.17.0
Eduardo Habkost
2018-05-14 19:47:05 UTC
Permalink
Post by Babu Moger
Add information for cpuid 0x8000001D leaf. Populate cache topology information
for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
Family 17h Model for more details.
---
target/i386/cpu.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++
target/i386/kvm.c | 29 +++++++++++++++--
2 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
a == ASSOC_FULL ? 0xF : \
0 /* invalid value */)
+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX 4
Number of cores is configurable in QEMU, so we can't hardcode
this.

I understand you want to make it match the hardware as close as
possible (as you noted in your reply on v7), but this should be
done by simply configuring QEMU as closely to the hardware as
possible.
Post by Babu Moger
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads) ((threads > 1) ? \
+ (((CORES_IN_CMPLX - 1) * threads) + 1) : \
+ (CORES_IN_CMPLX - 1))
I don't see why the check for threads > 1, here. Why not simply
write this as:

((nr_cores * nr_threads) - 1))

which will work for any cores/threads value?

(Or the function could just get nr_logical_cpus argument like I
suggested on v7, to make the code here simpler.)
Post by Babu Moger
+
/*
* Encode cache info for CPUID[0x80000006].ECX and CPUID[0x80000006].EDX
@@ -336,6 +344,41 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
}
}
+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int nr_threads,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ assert(cache->size == cache->line_size * cache->associativity *
+ cache->partitions * cache->sets);
+
+ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+ (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+ /* L3 is shared among multiple cores */
+ if (cache->level == 3) {
+ *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+ } else {
+ *eax |= ((nr_threads - 1) << 14);
+ }
+
+ assert(cache->line_size > 0);
+ assert(cache->partitions > 0);
+ assert(cache->associativity > 0);
+ /* We don't implement fully-associative caches */
+ assert(cache->associativity < cache->sets);
+ *ebx = (cache->line_size - 1) |
+ ((cache->partitions - 1) << 12) |
+ ((cache->associativity - 1) << 22);
+
+ assert(cache->sets > 0);
+ *ecx = cache->sets - 1;
+
+ *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
+ (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+ (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
/*
* These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = 0;
}
break;
+ *eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache_amd;
+ l1i = &legacy_l1i_cache_amd;
+ l2 = &legacy_l2_cache_amd;
+ l3 = &legacy_l3_cache;
+ }
+ switch (count) {
+ case 0: /* L1 dcache info */
+ encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 1: /* L1 icache info */
+ encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 2: /* L2 cache info */
+ encode_cache_cpuid8000001d(l2, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 3: /* L3 cache info */
+ encode_cache_cpuid8000001d(l3, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ default: /* end of info */
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+ break;
*eax = env->cpuid_xlevel2;
*ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
c = &cpuid_data.entries[cpuid_i++];
- c->function = i;
- c->flags = 0;
- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ switch (i) {
+ /* Query for all AMD cache information leaves */
+ for (j = 0; ; j++) {
+ c->function = i;
+ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+ if (c->eax == 0) {
+ break;
+ }
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
}
/* Call Centaur's CPUID instructions they are supported. */
--
2.17.0
--
Eduardo
Moger, Babu
2018-05-14 23:49:30 UTC
Permalink
-----Original Message-----
Sent: Monday, May 14, 2018 2:47 PM
Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
for cpuid 0x8000001D
Post by Babu Moger
Add information for cpuid 0x8000001D leaf. Populate cache topology
information
Post by Babu Moger
for different cache types(Data Cache, Instruction Cache, L2 and L3)
supported
Post by Babu Moger
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
for AMD
Post by Babu Moger
Family 17h Model for more details.
---
target/i386/cpu.c | 79
+++++++++++++++++++++++++++++++++++++++++++++++
Post by Babu Moger
target/i386/kvm.c | 29 +++++++++++++++--
2 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@ static uint32_t
encode_cache_cpuid80000005(CPUCacheInfo *cache)
Post by Babu Moger
a == ASSOC_FULL ? 0xF : \
0 /* invalid value */)
+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX 4
Number of cores is configurable in QEMU, so we can't hardcode
this.
In EPYC architecture, in a single die we have 2 core complexes.
Each core complex has 4 cores at max(CORES_IN_CMPLX).
Without SMT(thread=1), L3 is shared between 4(4x1) cores.
NUM_SHARING_CACHE should be 3.
With SMT(thread=2), L3 is shared between 8(4x2) cores.
NUM_SHARING_CACHE should be 7.
This is what we are trying to achieve here. This is a fixed h/w configuration.
I understand you want to make it match the hardware as close as
possible (as you noted in your reply on v7), but this should be
done by simply configuring QEMU as closely to the hardware as
possible.
Post by Babu Moger
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads) ((threads > 1) ? \
+ (((CORES_IN_CMPLX - 1) * threads) + 1) : \
+ (CORES_IN_CMPLX - 1))
I don't see why the check for threads > 1, here. Why not simply
((nr_cores * nr_threads) - 1))
which will work for any cores/threads value?
We cannot achieve the above numbers if we use this logic.
For example.. with nr_cores = 8, nr_threads=2.
This will report (8x2)-1=15 which is not what we want.
(Or the function could just get nr_logical_cpus argument like I
suggested on v7, to make the code here simpler.)
Post by Babu Moger
+
/*
* Encode cache info for CPUID[0x80000006].ECX and
CPUID[0x80000006].EDX
Post by Babu Moger
@@ -336,6 +344,41 @@ static void
encode_cache_cpuid80000006(CPUCacheInfo *l2,
Post by Babu Moger
}
}
+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
nr_threads,
Post by Babu Moger
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ assert(cache->size == cache->line_size * cache->associativity *
+ cache->partitions * cache->sets);
+
+ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+ (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+ /* L3 is shared among multiple cores */
+ if (cache->level == 3) {
+ *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+ } else {
+ *eax |= ((nr_threads - 1) << 14);
+ }
+
+ assert(cache->line_size > 0);
+ assert(cache->partitions > 0);
+ assert(cache->associativity > 0);
+ /* We don't implement fully-associative caches */
+ assert(cache->associativity < cache->sets);
+ *ebx = (cache->line_size - 1) |
+ ((cache->partitions - 1) << 12) |
+ ((cache->associativity - 1) << 22);
+
+ assert(cache->sets > 0);
+ *ecx = cache->sets - 1;
+
+ *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
+ (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+ (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
/*
* These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
uint32_t index, uint32_t count,
Post by Babu Moger
*edx = 0;
}
break;
+ *eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache_amd;
+ l1i = &legacy_l1i_cache_amd;
+ l2 = &legacy_l2_cache_amd;
+ l3 = &legacy_l3_cache;
+ }
+ switch (count) {
+ case 0: /* L1 dcache info */
+ encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 1: /* L1 icache info */
+ encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 2: /* L2 cache info */
+ encode_cache_cpuid8000001d(l2, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 3: /* L3 cache info */
+ encode_cache_cpuid8000001d(l3, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ default: /* end of info */
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+ break;
*eax = env->cpuid_xlevel2;
*ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
c = &cpuid_data.entries[cpuid_i++];
- c->function = i;
- c->flags = 0;
- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ switch (i) {
+ /* Query for all AMD cache information leaves */
+ for (j = 0; ; j++) {
+ c->function = i;
+ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+ if (c->eax == 0) {
+ break;
+ }
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
}
/* Call Centaur's CPUID instructions they are supported. */
--
2.17.0
--
Eduardo
Eduardo Habkost
2018-05-16 12:52:24 UTC
Permalink
Post by Moger, Babu
-----Original Message-----
Sent: Monday, May 14, 2018 2:47 PM
Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
for cpuid 0x8000001D
Post by Babu Moger
Add information for cpuid 0x8000001D leaf. Populate cache topology
information
Post by Babu Moger
for different cache types(Data Cache, Instruction Cache, L2 and L3)
supported
Post by Babu Moger
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
for AMD
Post by Babu Moger
Family 17h Model for more details.
---
target/i386/cpu.c | 79
+++++++++++++++++++++++++++++++++++++++++++++++
Post by Babu Moger
target/i386/kvm.c | 29 +++++++++++++++--
2 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@ static uint32_t
encode_cache_cpuid80000005(CPUCacheInfo *cache)
Post by Babu Moger
a == ASSOC_FULL ? 0xF : \
0 /* invalid value */)
+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX 4
Number of cores is configurable in QEMU, so we can't hardcode
this.
In EPYC architecture, in a single die we have 2 core complexes.
Each core complex has 4 cores at max(CORES_IN_CMPLX).
Without SMT(thread=1), L3 is shared between 4(4x1) cores.
NUM_SHARING_CACHE should be 3.
With SMT(thread=2), L3 is shared between 8(4x2) cores.
NUM_SHARING_CACHE should be 7.
This is what we are trying to achieve here. This is a fixed h/w configuration.
There's nothing in this part of the code that makes it specific
to the EPYC CPU model, so it has to be more generic. But
probably my suggestion wasn't correct either. Se my question
Post by Moger, Babu
I understand you want to make it match the hardware as close as
possible (as you noted in your reply on v7), but this should be
done by simply configuring QEMU as closely to the hardware as
possible.
Post by Babu Moger
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads) ((threads > 1) ? \
+ (((CORES_IN_CMPLX - 1) * threads) + 1) : \
+ (CORES_IN_CMPLX - 1))
I don't see why the check for threads > 1, here. Why not simply
((nr_cores * nr_threads) - 1))
which will work for any cores/threads value?
We cannot achieve the above numbers if we use this logic.
For example.. with nr_cores = 8, nr_threads=2.
This will report (8x2)-1=15 which is not what we want.
I'm confused. What would be the correct value for
Fn8000_001D_EAX_x[25:14] for a 8-core 2-threads-per-core CPU?

I assumed the L3 cache would be shared by the whole socket, but
it's shared only by a core complex (which has 4 cores in EPYC).
Is that right?

So, what would be a reasonable value for Fn8000_001D_EAX_3[25:14]
for the following configurations?

-cpu EPYC,cores=2,threads=1
-cpu EPYC,cores=2,threads=2
-cpu EPYC,cores=3,threads=1
-cpu EPYC,cores=3,threads=2
-cpu EPYC,cores=4,threads=1
-cpu EPYC,cores=4,threads=2
-cpu EPYC,cores=5,threads=1
-cpu EPYC,cores=5,threads=2
-cpu EPYC,cores=6,threads=1
-cpu EPYC,cores=6,threads=2
-cpu EPYC,cores=7,threads=1
-cpu EPYC,cores=7,threads=2
-cpu EPYC,cores=8,threads=1
-cpu EPYC,cores=8,threads=2
-cpu EPYC,cores=9,threads=1
-cpu EPYC,cores=9,threads=2
Post by Moger, Babu
(Or the function could just get nr_logical_cpus argument like I
suggested on v7, to make the code here simpler.)
Post by Babu Moger
+
/*
* Encode cache info for CPUID[0x80000006].ECX and
CPUID[0x80000006].EDX
Post by Babu Moger
@@ -336,6 +344,41 @@ static void
encode_cache_cpuid80000006(CPUCacheInfo *l2,
Post by Babu Moger
}
}
+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
nr_threads,
Post by Babu Moger
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ assert(cache->size == cache->line_size * cache->associativity *
+ cache->partitions * cache->sets);
+
+ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+ (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+ /* L3 is shared among multiple cores */
+ if (cache->level == 3) {
+ *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+ } else {
+ *eax |= ((nr_threads - 1) << 14);
+ }
+
+ assert(cache->line_size > 0);
+ assert(cache->partitions > 0);
+ assert(cache->associativity > 0);
+ /* We don't implement fully-associative caches */
+ assert(cache->associativity < cache->sets);
+ *ebx = (cache->line_size - 1) |
+ ((cache->partitions - 1) << 12) |
+ ((cache->associativity - 1) << 22);
+
+ assert(cache->sets > 0);
+ *ecx = cache->sets - 1;
+
+ *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
+ (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+ (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
/*
* These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
uint32_t index, uint32_t count,
Post by Babu Moger
*edx = 0;
}
break;
+ *eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache_amd;
+ l1i = &legacy_l1i_cache_amd;
+ l2 = &legacy_l2_cache_amd;
+ l3 = &legacy_l3_cache;
+ }
+ switch (count) {
+ case 0: /* L1 dcache info */
+ encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 1: /* L1 icache info */
+ encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 2: /* L2 cache info */
+ encode_cache_cpuid8000001d(l2, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 3: /* L3 cache info */
+ encode_cache_cpuid8000001d(l3, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ default: /* end of info */
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+ break;
*eax = env->cpuid_xlevel2;
*ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
c = &cpuid_data.entries[cpuid_i++];
- c->function = i;
- c->flags = 0;
- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ switch (i) {
+ /* Query for all AMD cache information leaves */
+ for (j = 0; ; j++) {
+ c->function = i;
+ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+ if (c->eax == 0) {
+ break;
+ }
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
}
/* Call Centaur's CPUID instructions they are supported. */
--
2.17.0
--
Eduardo
--
Eduardo
Moger, Babu
2018-05-16 19:25:53 UTC
Permalink
-----Original Message-----
Sent: Wednesday, May 16, 2018 7:52 AM
Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
for cpuid 0x8000001D
Post by Moger, Babu
-----Original Message-----
Sent: Monday, May 14, 2018 2:47 PM
Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache
Information
Post by Moger, Babu
for cpuid 0x8000001D
Post by Babu Moger
Add information for cpuid 0x8000001D leaf. Populate cache topology
information
Post by Babu Moger
for different cache types(Data Cache, Instruction Cache, L2 and L3)
supported
Post by Babu Moger
by 0x8000001D leaf. Please refer Processor Programming Reference
(PPR)
Post by Moger, Babu
for AMD
Post by Babu Moger
Family 17h Model for more details.
---
target/i386/cpu.c | 79
+++++++++++++++++++++++++++++++++++++++++++++++
Post by Babu Moger
target/i386/kvm.c | 29 +++++++++++++++--
2 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@ static uint32_t
encode_cache_cpuid80000005(CPUCacheInfo *cache)
Post by Babu Moger
a == ASSOC_FULL ? 0xF : \
0 /* invalid value */)
+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX 4
Number of cores is configurable in QEMU, so we can't hardcode
this.
In EPYC architecture, in a single die we have 2 core complexes.
Each core complex has 4 cores at max(CORES_IN_CMPLX).
Without SMT(thread=1), L3 is shared between 4(4x1) cores.
NUM_SHARING_CACHE should be 3.
With SMT(thread=2), L3 is shared between 8(4x2) cores.
NUM_SHARING_CACHE should be 7.
This is what we are trying to achieve here. This is a fixed h/w configuration.
There's nothing in this part of the code that makes it specific
to the EPYC CPU model, so it has to be more generic. But
probably my suggestion wasn't correct either. Se my question
Post by Moger, Babu
I understand you want to make it match the hardware as close as
possible (as you noted in your reply on v7), but this should be
done by simply configuring QEMU as closely to the hardware as
possible.
Post by Babu Moger
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads) ((threads > 1) ? \
+ (((CORES_IN_CMPLX - 1) * threads) + 1) : \
+ (CORES_IN_CMPLX - 1))
I don't see why the check for threads > 1, here. Why not simply
((nr_cores * nr_threads) - 1))
which will work for any cores/threads value?
We cannot achieve the above numbers if we use this logic.
For example.. with nr_cores = 8, nr_threads=2.
This will report (8x2)-1=15 which is not what we want.
I'm confused. What would be the correct value for
Fn8000_001D_EAX_x[25:14] for a 8-core 2-threads-per-core CPU?
I assumed the L3 cache would be shared by the whole socket, but
it's shared only by a core complex (which has 4 cores in EPYC).
Is that right?
That is correct.
So, what would be a reasonable value for Fn8000_001D_EAX_3[25:14]
for the following configurations?
-cpu EPYC,cores=2,threads=1
1
-cpu EPYC,cores=2,threads=2
3
-cpu EPYC,cores=3,threads=1
2
-cpu EPYC,cores=3,threads=2
5
-cpu EPYC,cores=4,threads=1
3
-cpu EPYC,cores=4,threads=2
7
-cpu EPYC,cores=5,threads=1
-cpu EPYC,cores=5,threads=2
-cpu EPYC,cores=6,threads=1
-cpu EPYC,cores=6,threads=2
-cpu EPYC,cores=7,threads=1
-cpu EPYC,cores=7,threads=2
-cpu EPYC,cores=8,threads=1
-cpu EPYC,cores=8,threads=2
-cpu EPYC,cores=9,threads=1
-cpu EPYC,cores=9,threads=2
Some of these combinations are not valid. We are thinking of coming up with a statically
defined data model and pickup the model that best fits the above parameter or something like that.
We may have to report Invalid for some of the combinations. Still thinking. Let me know if you think
of any better way to handle it or if there are similar cases which are already handled which we can base on.
Post by Moger, Babu
(Or the function could just get nr_logical_cpus argument like I
suggested on v7, to make the code here simpler.)
Post by Babu Moger
+
/*
* Encode cache info for CPUID[0x80000006].ECX and
CPUID[0x80000006].EDX
Post by Babu Moger
@@ -336,6 +344,41 @@ static void
encode_cache_cpuid80000006(CPUCacheInfo *l2,
Post by Babu Moger
}
}
+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
nr_threads,
Post by Babu Moger
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ assert(cache->size == cache->line_size * cache->associativity *
+ cache->partitions * cache->sets);
+
+ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+ (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+ /* L3 is shared among multiple cores */
+ if (cache->level == 3) {
+ *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+ } else {
+ *eax |= ((nr_threads - 1) << 14);
+ }
+
+ assert(cache->line_size > 0);
+ assert(cache->partitions > 0);
+ assert(cache->associativity > 0);
+ /* We don't implement fully-associative caches */
+ assert(cache->associativity < cache->sets);
+ *ebx = (cache->line_size - 1) |
+ ((cache->partitions - 1) << 12) |
+ ((cache->associativity - 1) << 22);
+
+ assert(cache->sets > 0);
+ *ecx = cache->sets - 1;
+
+ *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0)
|
Post by Moger, Babu
Post by Babu Moger
+ (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+ (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
/*
* These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
uint32_t index, uint32_t count,
Post by Babu Moger
*edx = 0;
}
break;
+ *eax = 0;
+ CPUCacheInfo *l1d, *l1i, *l2, *l3;
+ if (env->cache_info && !cpu->legacy_cache) {
+ l1d = &env->cache_info->l1d_cache;
+ l1i = &env->cache_info->l1i_cache;
+ l2 = &env->cache_info->l2_cache;
+ l3 = &env->cache_info->l3_cache;
+ } else {
+ l1d = &legacy_l1d_cache_amd;
+ l1i = &legacy_l1i_cache_amd;
+ l2 = &legacy_l2_cache_amd;
+ l3 = &legacy_l3_cache;
+ }
+ switch (count) {
+ case 0: /* L1 dcache info */
+ encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 1: /* L1 icache info */
+ encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 2: /* L2 cache info */
+ encode_cache_cpuid8000001d(l2, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ case 3: /* L3 cache info */
+ encode_cache_cpuid8000001d(l3, cs->nr_threads,
+ eax, ebx, ecx, edx);
+ break;
+ default: /* end of info */
+ *eax = *ebx = *ecx = *edx = 0;
+ break;
+ }
+ break;
*eax = env->cpuid_xlevel2;
*ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
c = &cpuid_data.entries[cpuid_i++];
- c->function = i;
- c->flags = 0;
- cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ switch (i) {
+ /* Query for all AMD cache information leaves */
+ for (j = 0; ; j++) {
+ c->function = i;
+ c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ c->index = j;
+ cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c-
edx);
Post by Babu Moger
+
+ if (c->eax == 0) {
+ break;
+ }
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
+ c = &cpuid_data.entries[cpuid_i++];
+ }
+ break;
+ c->function = i;
+ c->flags = 0;
+ cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+ break;
+ }
}
/* Call Centaur's CPUID instructions they are supported. */
--
2.17.0
--
Eduardo
--
Eduardo
Babu Moger
2018-05-14 16:41:53 UTC
Permalink
From: Eduardo Habkost <***@redhat.com>

Always initialize CPUCaches structs with cache information, even
if legacy_cache=true. Use different CPUCaches struct for
CPUID[2], CPUID[4], and the AMD CPUID leaves.

This will simplify a lot the logic inside cpu_x86_cpuid().

Signed-off-by: Eduardo Habkost <***@redhat.com>
Signed-off-by: Babu Moger <***@amd.com>
---
target/i386/cpu.c | 145 +++++++++++++++++++++-------------------------
target/i386/cpu.h | 14 +++--
2 files changed, 75 insertions(+), 84 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7f40241786..c06a9f5ebe 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1156,7 +1156,7 @@ struct X86CPUDefinition {
};

static CPUCaches epyc_cache_info = {
- .l1d_cache = {
+ .l1d_cache = &(CPUCacheInfo) {
.type = DCACHE,
.level = 1,
.size = 32 * KiB,
@@ -1168,7 +1168,7 @@ static CPUCaches epyc_cache_info = {
.self_init = 1,
.no_invd_sharing = true,
},
- .l1i_cache = {
+ .l1i_cache = &(CPUCacheInfo) {
.type = ICACHE,
.level = 1,
.size = 64 * KiB,
@@ -1180,7 +1180,7 @@ static CPUCaches epyc_cache_info = {
.self_init = 1,
.no_invd_sharing = true,
},
- .l2_cache = {
+ .l2_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 2,
.size = 512 * KiB,
@@ -1190,7 +1190,7 @@ static CPUCaches epyc_cache_info = {
.sets = 1024,
.lines_per_tag = 1,
},
- .l3_cache = {
+ .l3_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 3,
.size = 8 * MiB,
@@ -3384,9 +3384,8 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
env->features[w] = def->features[w];
}

- /* Store Cache information from the X86CPUDefinition if available */
- env->cache_info = def->cache_info;
- cpu->legacy_cache = def->cache_info ? 0 : 1;
+ /* legacy-cache defaults to 'off' if CPU model provides cache info */
+ cpu->legacy_cache = !def->cache_info;

/* Special cases not set in the X86CPUDefinition structs: */
/* TODO: in-kernel irqchip for hvf */
@@ -3737,21 +3736,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (!cpu->enable_l3_cache) {
*ecx = 0;
} else {
- if (env->cache_info && !cpu->legacy_cache) {
- *ecx = cpuid2_cache_descriptor(&env->cache_info->l3_cache);
- } else {
- *ecx = cpuid2_cache_descriptor(&legacy_l3_cache);
- }
- }
- if (env->cache_info && !cpu->legacy_cache) {
- *edx = (cpuid2_cache_descriptor(&env->cache_info->l1d_cache) << 16) |
- (cpuid2_cache_descriptor(&env->cache_info->l1i_cache) << 8) |
- (cpuid2_cache_descriptor(&env->cache_info->l2_cache));
- } else {
- *edx = (cpuid2_cache_descriptor(&legacy_l1d_cache) << 16) |
- (cpuid2_cache_descriptor(&legacy_l1i_cache) << 8) |
- (cpuid2_cache_descriptor(&legacy_l2_cache_cpuid2));
+ *ecx = cpuid2_cache_descriptor(env->cache_info_cpuid2.l3_cache);
}
+ *edx = (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1d_cache) << 16) |
+ (cpuid2_cache_descriptor(env->cache_info_cpuid2.l1i_cache) << 8) |
+ (cpuid2_cache_descriptor(env->cache_info_cpuid2.l2_cache));
break;
case 4:
/* cache info: needed for Core compatibility */
@@ -3764,35 +3753,27 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
} else {
*eax = 0;
- CPUCacheInfo *l1d, *l1i, *l2, *l3;
- if (env->cache_info && !cpu->legacy_cache) {
- l1d = &env->cache_info->l1d_cache;
- l1i = &env->cache_info->l1i_cache;
- l2 = &env->cache_info->l2_cache;
- l3 = &env->cache_info->l3_cache;
- } else {
- l1d = &legacy_l1d_cache;
- l1i = &legacy_l1i_cache;
- l2 = &legacy_l2_cache;
- l3 = &legacy_l3_cache;
- }
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid4(l1d, 1, cs->nr_cores,
+ encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
+ 1, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
- encode_cache_cpuid4(l1i, 1, cs->nr_cores,
+ encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
+ 1, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
- encode_cache_cpuid4(l2, cs->nr_threads, cs->nr_cores,
+ encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
+ cs->nr_threads, cs->nr_cores,
eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads);
if (cpu->enable_l3_cache) {
- encode_cache_cpuid4(l3, (1 << pkg_offset), cs->nr_cores,
+ encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
+ (1 << pkg_offset), cs->nr_cores,
eax, ebx, ecx, edx);
break;
}
@@ -4005,13 +3986,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
(L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES);
*ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \
(L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES);
- if (env->cache_info && !cpu->legacy_cache) {
- *ecx = encode_cache_cpuid80000005(&env->cache_info->l1d_cache);
- *edx = encode_cache_cpuid80000005(&env->cache_info->l1i_cache);
- } else {
- *ecx = encode_cache_cpuid80000005(&legacy_l1d_cache_amd);
- *edx = encode_cache_cpuid80000005(&legacy_l1i_cache_amd);
- }
+ *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache);
+ *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache);
break;
case 0x80000006:
/* cache info (L2 cache) */
@@ -4027,17 +4003,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
(L2_DTLB_4K_ENTRIES << 16) | \
(AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \
(L2_ITLB_4K_ENTRIES);
- if (env->cache_info && !cpu->legacy_cache) {
- encode_cache_cpuid80000006(&env->cache_info->l2_cache,
- cpu->enable_l3_cache ?
- &env->cache_info->l3_cache : NULL,
- ecx, edx);
- } else {
- encode_cache_cpuid80000006(&legacy_l2_cache_amd,
- cpu->enable_l3_cache ?
- &legacy_l3_cache : NULL,
- ecx, edx);
- }
+ encode_cache_cpuid80000006(env->cache_info_amd.l2_cache,
+ cpu->enable_l3_cache ?
+ env->cache_info_amd.l3_cache : NULL,
+ ecx, edx);
break;
case 0x80000007:
*eax = 0;
@@ -4080,34 +4049,22 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x8000001D:
*eax = 0;
- CPUCacheInfo *l1d, *l1i, *l2, *l3;
- if (env->cache_info && !cpu->legacy_cache) {
- l1d = &env->cache_info->l1d_cache;
- l1i = &env->cache_info->l1i_cache;
- l2 = &env->cache_info->l2_cache;
- l3 = &env->cache_info->l3_cache;
- } else {
- l1d = &legacy_l1d_cache_amd;
- l1i = &legacy_l1i_cache_amd;
- l2 = &legacy_l2_cache_amd;
- l3 = &legacy_l3_cache;
- }
switch (count) {
case 0: /* L1 dcache info */
- encode_cache_cpuid8000001d(l1d, cs->nr_threads,
- eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache,
+ cs->nr_threads, eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
- encode_cache_cpuid8000001d(l1i, cs->nr_threads,
- eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache,
+ cs->nr_threads, eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
- encode_cache_cpuid8000001d(l2, cs->nr_threads,
- eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache,
+ cs->nr_threads, eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
- encode_cache_cpuid8000001d(l3, cs->nr_threads,
- eax, ebx, ecx, edx);
+ encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache,
+ cs->nr_threads, eax, ebx, ecx, edx);
break;
default: /* end of info */
*eax = *ebx = *ecx = *edx = 0;
@@ -4770,6 +4727,37 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
cpu->phys_bits = 32;
}
}
+
+ /* Cache information initialization */
+ if (!cpu->legacy_cache) {
+ if (!xcc->cpu_def || !xcc->cpu_def->cache_info) {
+ char *name = x86_cpu_class_get_model_name(xcc);
+ error_setg(errp,
+ "CPU model '%s' doesn't support legacy-cache=off", name);
+ g_free(name);
+ return;
+ }
+ env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd =
+ *xcc->cpu_def->cache_info;
+ } else {
+ /* Build legacy cache information */
+ env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache;
+ env->cache_info_cpuid2.l1i_cache = &legacy_l1i_cache;
+ env->cache_info_cpuid2.l2_cache = &legacy_l2_cache_cpuid2;
+ env->cache_info_cpuid2.l3_cache = &legacy_l3_cache;
+
+ env->cache_info_cpuid4.l1d_cache = &legacy_l1d_cache;
+ env->cache_info_cpuid4.l1i_cache = &legacy_l1i_cache;
+ env->cache_info_cpuid4.l2_cache = &legacy_l2_cache;
+ env->cache_info_cpuid4.l3_cache = &legacy_l3_cache;
+
+ env->cache_info_amd.l1d_cache = &legacy_l1d_cache_amd;
+ env->cache_info_amd.l1i_cache = &legacy_l1i_cache_amd;
+ env->cache_info_amd.l2_cache = &legacy_l2_cache_amd;
+ env->cache_info_amd.l3_cache = &legacy_l3_cache;
+ }
+
+
cpu_exec_realizefn(cs, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -5252,11 +5240,10 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
/*
- * lecacy_cache defaults to CPU model being chosen. This is set in
- * x86_cpu_load_def based on cache_info which is initialized in
- * builtin_x86_defs
+ * lecacy_cache defaults to true unless the CPU model provides its
+ * own cache information (see x86_cpu_load_def()).
*/
- DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, false),
+ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true),

/*
* From "Requirements for Implementing the Microsoft
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1f9ccd47d9..e64b7da997 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1098,10 +1098,10 @@ typedef struct CPUCacheInfo {


typedef struct CPUCaches {
- CPUCacheInfo l1d_cache;
- CPUCacheInfo l1i_cache;
- CPUCacheInfo l2_cache;
- CPUCacheInfo l3_cache;
+ CPUCacheInfo *l1d_cache;
+ CPUCacheInfo *l1i_cache;
+ CPUCacheInfo *l2_cache;
+ CPUCacheInfo *l3_cache;
} CPUCaches;

typedef struct CPUX86State {
@@ -1289,7 +1289,11 @@ typedef struct CPUX86State {
/* Features that were explicitly enabled/disabled */
FeatureWordArray user_features;
uint32_t cpuid_model[12];
- CPUCaches *cache_info;
+ /* Cache information for CPUID. When legacy-cache=on, the cache data
+ * on each CPUID leaf will be different, because we keep compatibility
+ * with old QEMU versions.
+ */
+ CPUCaches cache_info_cpuid2, cache_info_cpuid4, cache_info_amd;

/* MTRRs */
uint64_t mtrr_fixed[11];
--
2.17.0
Babu Moger
2018-05-14 16:41:55 UTC
Permalink
Enable TOPOEXT feature on EPYC CPU. This is required to support
hyperthreading on VM guests. Also extend xlevel to 0x8000001E.

Disable TOPOEXT feature for legacy (2.12 or older) machine types.

Signed-off-by: Babu Moger <***@amd.com>
Tested-by: Geoffrey McRae <***@hostfission.com>
Reviewed-by: Eduardo Habkost <***@redhat.com>
---
include/hw/i386/pc.h | 4 ++++
target/i386/cpu.c | 11 +++++++++--
2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index df15deefca..c01adc92ba 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -310,6 +310,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
.driver = TYPE_X86_CPU,\
.property = "legacy-cache",\
.value = "on",\
+ },{\
+ .driver = "EPYC-" TYPE_X86_CPU,\
+ .property = "topoext",\
+ .value = "off",\
},

#define PC_COMPAT_2_11 \
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 18a7872b7d..de2749ae81 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2424,7 +2424,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
- CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
+ CPUID_EXT3_TOPOEXT,
.features[FEAT_7_0_EBX] =
CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED |
@@ -2469,7 +2470,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
- CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
+ CPUID_EXT3_TOPOEXT,
.features[FEAT_8000_0008_EBX] =
CPUID_8000_0008_EBX_IBPB,
.features[FEAT_7_0_EBX] =
@@ -4540,6 +4542,11 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A);
}

+ /* TOPOEXT feature requires 0x8000001E */
+ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) {
+ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001E);
+ }
+
/* SEV requires CPUID[0x8000001F] */
if (sev_enabled()) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F);
--
2.17.0
Eduardo Habkost
2018-05-14 19:28:19 UTC
Permalink
Post by Babu Moger
The property legacy-cache will be used to control the cache information.
If user passes "-cpu legacy-cache" then older information will
be displayed even if the hardware supports new information. Otherwise
use the statically loaded cache definitions if available.
Renamed the previous cache structures to legacy_*. If there is any change in
the cache information, then it needs to be initialized in builtin_x86_defs.
Reviewed-by: Eduardo Habkost <***@redhat.com>

Queued, thanks.
--
Eduardo
Loading...