diff options
author | Richard Henderson <rth@twiddle.net> | 2015-07-02 14:53:40 +0100 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2016-02-13 07:59:59 +1100 |
commit | 19dc85dba23c0db1ca932c62e453c37e00761628 (patch) | |
tree | 9ac16b50d5d1b11c9533c6a85e336c1274d1ba38 /target-i386 | |
parent | 121f3157887f92268a3d6169e2d4601f9292020b (diff) | |
download | qemu-19dc85dba23c0db1ca932c62e453c37e00761628.tar.gz qemu-19dc85dba23c0db1ca932c62e453c37e00761628.tar.bz2 qemu-19dc85dba23c0db1ca932c62e453c37e00761628.zip |
target-i386: Add XSAVE extension
This includes XSAVE, XRSTOR, XGETBV, XSETBV, which are all related,
as well as the associate cpuid bits.
Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'target-i386')
-rw-r--r-- | target-i386/cpu.c | 39 | ||||
-rw-r--r-- | target-i386/fpu_helper.c | 145 | ||||
-rw-r--r-- | target-i386/helper.c | 12 | ||||
-rw-r--r-- | target-i386/helper.h | 4 | ||||
-rw-r--r-- | target-i386/kvm.c | 11 | ||||
-rw-r--r-- | target-i386/translate.c | 54 |
6 files changed, 241 insertions, 24 deletions
diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 3fa14bf171..fb8a646a20 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -331,14 +331,14 @@ static const char *cpuid_6_feature_name[] = { #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ + CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE, - CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C, - CPUID_EXT_RDRAND */ + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX, + CPUID_EXT_F16C, CPUID_EXT_RDRAND */ #ifdef TARGET_X86_64 #define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM) @@ -2323,10 +2323,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx = (cpu->apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */ *ecx = env->features[FEAT_1_ECX]; + if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) { + *ecx |= CPUID_EXT_OSXSAVE; + } *edx = env->features[FEAT_1_EDX]; if (cs->nr_cores * cs->nr_threads > 1) { *ebx |= (cs->nr_cores * cs->nr_threads) << 16; - *edx |= 1 << 28; /* HTT bit */ + *edx |= CPUID_HT; } break; case 2: @@ -2450,7 +2453,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0xD: { KVMState *s = cs->kvm_state; - uint64_t kvm_mask; + uint64_t ena_mask; int i; /* Processor Extended State */ @@ -2458,35 +2461,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx = 0; *ecx = 0; *edx = 0; - if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) { + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { break; } - kvm_mask = - kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) | - ((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32); + if (kvm_enabled()) { + ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX); + ena_mask <<= 32; + ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + } else { + ena_mask = -1; + } if (count == 0) { *ecx = 0x240; for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) { const ExtSaveArea *esa = &ext_save_areas[i]; - if ((env->features[esa->feature] & esa->bits) == esa->bits && - (kvm_mask & (1 << i)) != 0) { + if ((env->features[esa->feature] & esa->bits) == esa->bits + && ((ena_mask >> i) & 1) != 0) { if (i < 32) { - *eax |= 1 << i; + *eax |= 1u << i; } else { - *edx |= 1 << (i - 32); + *edx |= 1u << (i - 32); } *ecx = MAX(*ecx, esa->offset + esa->size); } } - *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE); + *eax |= ena_mask & (XSTATE_FP | XSTATE_SSE); *ebx = *ecx; } else if (count == 1) { *eax = env->features[FEAT_XSAVE]; } else if (count < ARRAY_SIZE(ext_save_areas)) { const ExtSaveArea *esa = &ext_save_areas[count]; - if ((env->features[esa->feature] & esa->bits) == esa->bits && - (kvm_mask & (1 << count)) != 0) { + if ((env->features[esa->feature] & esa->bits) == esa->bits + && ((ena_mask >> count) & 1) != 0) { *eax = esa->size; *ebx = esa->offset; } diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c index a7da370c5d..159e1df185 100644 --- a/target-i386/fpu_helper.c +++ b/target-i386/fpu_helper.c @@ -1190,6 +1190,45 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr) } } +static uint64_t get_xinuse(CPUX86State *env) +{ + /* We don't track XINUSE. We could calculate it here, but it's + probably less work to simply indicate all components in use. */ + return -1; +} + +void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + uintptr_t ra = GETPC(); + uint64_t old_bv, new_bv; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + + if (rfbm & XSTATE_FP) { + do_xsave_fpu(env, ptr, ra); + } + if (rfbm & XSTATE_SSE) { + do_xsave_mxcsr(env, ptr, ra); + do_xsave_sse(env, ptr, ra); + } + + /* Update the XSTATE_BV field. */ + old_bv = cpu_ldq_data_ra(env, ptr + 512, ra); + new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm); + cpu_stq_data_ra(env, ptr + 512, new_bv, ra); +} + static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) { int i, fpus, fptag; @@ -1259,6 +1298,112 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) } } +void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + uintptr_t ra = GETPC(); + uint64_t xstate_bv, xcomp_bv0, xcomp_bv1; + + rfbm &= env->xcr0; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra); + + if ((int64_t)xstate_bv < 0) { + /* FIXME: Compact form. */ + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Standard form. */ + + /* The XSTATE field must not set bits not present in XCR0. */ + if (xstate_bv & ~env->xcr0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* The XCOMP field must be zero. */ + xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra); + xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra); + if (xcomp_bv0 || xcomp_bv1) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + if (rfbm & XSTATE_FP) { + if (xstate_bv & XSTATE_FP) { + do_xrstor_fpu(env, ptr, ra); + } else { + helper_fninit(env); + memset(env->fpregs, 0, sizeof(env->fpregs)); + } + } + if (rfbm & XSTATE_SSE) { + /* Note that the standard form of XRSTOR loads MXCSR from memory + whether or not the XSTATE_BV bit is set. */ + do_xrstor_mxcsr(env, ptr, ra); + if (xstate_bv & XSTATE_SSE) { + do_xrstor_sse(env, ptr, ra); + } else { + /* ??? When AVX is implemented, we may have to be more + selective in the clearing. */ + memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); + } + } +} + +uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + switch (ecx) { + case 0: + return env->xcr0; + case 1: + /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */ + return env->xcr0 & get_xinuse(env); + } + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + +void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) +{ + uint32_t dummy, ena_lo, ena_hi; + uint64_t ena; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + /* Only XCR0 is defined at present; the FPU may not be disabled. */ + if (ecx != 0 || (mask & XSTATE_FP) == 0) { + goto do_gpf; + } + + /* Disallow enabling unimplemented features. */ + cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); + ena = ((uint64_t)ena_hi << 32) | ena_lo; + if (mask & ~ena) { + goto do_gpf; + } + + env->xcr0 = mask; + return; + + do_gpf: + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f) { CPU_LDoubleU temp; diff --git a/target-i386/helper.c b/target-i386/helper.c index 3802ed9359..618bcff695 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -647,6 +647,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) { X86CPU *cpu = x86_env_get_cpu(env); + uint32_t hflags; #if defined(DEBUG_MMU) printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); @@ -656,24 +657,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) CR4_SMEP_MASK | CR4_SMAP_MASK)) { tlb_flush(CPU(cpu), 1); } + + /* Clear bits we're going to recompute. */ + hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK); + /* SSE handling */ if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) { new_cr4 &= ~CR4_OSFXSR_MASK; } - env->hflags &= ~HF_OSFXSR_MASK; if (new_cr4 & CR4_OSFXSR_MASK) { - env->hflags |= HF_OSFXSR_MASK; + hflags |= HF_OSFXSR_MASK; } if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) { new_cr4 &= ~CR4_SMAP_MASK; } - env->hflags &= ~HF_SMAP_MASK; if (new_cr4 & CR4_SMAP_MASK) { - env->hflags |= HF_SMAP_MASK; + hflags |= HF_SMAP_MASK; } env->cr[4] = new_cr4; + env->hflags = hflags; } #if defined(CONFIG_USER_ONLY) diff --git a/target-i386/helper.h b/target-i386/helper.h index 6109e466fb..9dfc735429 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -187,6 +187,10 @@ DEF_HELPER_3(fsave, void, env, tl, int) DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl) +DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64) +DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64) +DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32) +DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64) DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 94024bc1b1..fca0314c3a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1855,13 +1855,16 @@ static int kvm_get_sregs(X86CPU *cpu) HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) - hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; + hflags = env->hflags & HFLAG_COPY_MASK; + hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); - hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << - (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); + + if (env->cr[4] & CR4_OSFXSR_MASK) { + hflags |= HF_OSFXSR_MASK; + } if (env->efer & MSR_EFER_LMA) { hflags |= HF_LMA_MASK; @@ -1882,7 +1885,7 @@ static int kvm_get_sregs(X86CPU *cpu) env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT; } } - env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; + env->hflags = hflags; return 0; } diff --git a/target-i386/translate.c b/target-i386/translate.c index 647757843f..7571e850d5 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -7079,6 +7079,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); break; + case 0xd0: /* xgetbv */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); + gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32); + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64); + break; + + case 0xd1: /* xsetbv */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } + tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + cpu_regs[R_EDX]); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]); + gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64); + /* End TB because translation flags may change. */ + gen_jmp_im(s->pc - pc_start); + gen_eob(s); + break; + case 0xd8: /* VMRUN */ if (!(s->flags & HF_SVME_MASK) || !s->pe) { goto illegal_op; @@ -7580,6 +7610,30 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); break; + CASE_MEM_OP(4): /* xsave */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (prefixes & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + gen_lea_modrm(env, s, modrm); + tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + cpu_regs[R_EDX]); + gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64); + break; + + CASE_MEM_OP(5): /* xrstor */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (prefixes & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + gen_lea_modrm(env, s, modrm); + tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX], + cpu_regs[R_EDX]); + gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64); + break; + CASE_MEM_OP(6): /* clwb */ if (prefixes & PREFIX_LOCK) { goto illegal_op; |