diff options
Diffstat (limited to 'vpx_ports')
-rw-r--r-- | vpx_ports/arm_cpudetect.c | 261 | ||||
-rw-r--r-- | vpx_ports/asm_offsets.h | 4 | ||||
-rw-r--r-- | vpx_ports/config.h | 10 | ||||
-rw-r--r-- | vpx_ports/emmintrin_compat.h | 55 | ||||
-rw-r--r-- | vpx_ports/emms.asm | 2 | ||||
-rw-r--r-- | vpx_ports/mem.h | 5 | ||||
-rw-r--r-- | vpx_ports/mem_ops.h | 182 | ||||
-rw-r--r-- | vpx_ports/mem_ops_aligned.h | 80 | ||||
-rw-r--r-- | vpx_ports/vpx_once.h | 97 | ||||
-rw-r--r-- | vpx_ports/vpx_timer.h | 63 | ||||
-rw-r--r-- | vpx_ports/vpxtypes.h | 167 | ||||
-rw-r--r-- | vpx_ports/x86.h | 258 | ||||
-rw-r--r-- | vpx_ports/x86_abi_support.asm | 27 | ||||
-rw-r--r-- | vpx_ports/x86_cpuid.c | 60 |
14 files changed, 617 insertions, 654 deletions
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c index 8ff95a110..542ff6786 100644 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@ -12,50 +12,45 @@ #include <string.h> #include "arm.h" -static int arm_cpu_env_flags(int *flags) -{ - char *env; - env = getenv("VPX_SIMD_CAPS"); - if (env && *env) - { - *flags = (int)strtol(env, NULL, 0); - return 0; - } - *flags = 0; - return -1; +static int arm_cpu_env_flags(int *flags) { + char *env; + env = getenv("VPX_SIMD_CAPS"); + if (env && *env) { + *flags = (int)strtol(env, NULL, 0); + return 0; + } + *flags = 0; + return -1; } -static int arm_cpu_env_mask(void) -{ - char *env; - env = getenv("VPX_SIMD_CAPS_MASK"); - return env && *env ? (int)strtol(env, NULL, 0) : ~0; +static int arm_cpu_env_mask(void) { + char *env; + env = getenv("VPX_SIMD_CAPS_MASK"); + return env && *env ? (int)strtol(env, NULL, 0) : ~0; } #if !CONFIG_RUNTIME_CPU_DETECT -int arm_cpu_caps(void) -{ +int arm_cpu_caps(void) { /* This function should actually be a no-op. There is no way to adjust any of * these because the RTCD tables do not exist: the functions are called * statically */ - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); #if HAVE_EDSP - flags |= HAS_EDSP; + flags |= HAS_EDSP; #endif /* HAVE_EDSP */ #if HAVE_MEDIA - flags |= HAS_MEDIA; + flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ #if HAVE_NEON - flags |= HAS_NEON; + flags |= HAS_NEON; #endif /* HAVE_NEON */ - return flags & mask; + return flags & mask; } #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ @@ -64,156 +59,134 @@ int arm_cpu_caps(void) #define WIN32_EXTRA_LEAN #include <windows.h> -int arm_cpu_caps(void) -{ - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); - /* MSVC has no inline __asm support for ARM, but it does let you __emit - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. - */ +int arm_cpu_caps(void) { + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + /* MSVC has no inline __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. + */ #if HAVE_EDSP - if (mask & HAS_EDSP) - { - __try - { - /*PLD [r13]*/ - __emit(0xF5DDF000); - flags |= HAS_EDSP; - } - __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) - { - /*Ignore exception.*/ - } + if (mask & HAS_EDSP) { + __try { + /*PLD [r13]*/ + __emit(0xF5DDF000); + flags |= HAS_EDSP; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ } + } #if HAVE_MEDIA - if (mask & HAS_MEDIA) - __try - { - /*SHADD8 r3,r3,r3*/ - __emit(0xE6333F93); - flags |= HAS_MEDIA; - } - __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) - { - /*Ignore exception.*/ - } - } + if (mask & HAS_MEDIA) + __try { + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags |= HAS_MEDIA; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ + } +} #if HAVE_NEON - if (mask & HAS_NEON) - { - __try - { - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags |= HAS_NEON; - } - __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) - { - /*Ignore exception.*/ - } - } +if (mask &HAS_NEON) { + __try { + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags |= HAS_NEON; + } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { + /*Ignore exception.*/ + } +} #endif /* HAVE_NEON */ #endif /* HAVE_MEDIA */ #endif /* HAVE_EDSP */ - return flags & mask; +return flags & mask; } #elif defined(__ANDROID__) /* end _MSC_VER */ #include <cpu-features.h> -int arm_cpu_caps(void) -{ - int flags; - int mask; - uint64_t features; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); - features = android_getCpuFeatures(); +int arm_cpu_caps(void) { + int flags; + int mask; + uint64_t features; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + features = android_getCpuFeatures(); #if HAVE_EDSP - flags |= HAS_EDSP; + flags |= HAS_EDSP; #endif /* HAVE_EDSP */ #if HAVE_MEDIA - flags |= HAS_MEDIA; + flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ #if HAVE_NEON - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - flags |= HAS_NEON; + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + flags |= HAS_NEON; #endif /* HAVE_NEON */ - return flags & mask; + return flags & mask; } #elif defined(__linux__) /* end __ANDROID__ */ + #include <stdio.h> -int arm_cpu_caps(void) -{ - FILE *fin; - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); - /* Reading /proc/self/auxv would be easier, but that doesn't work reliably - * on Android. - * This also means that detection will fail in Scratchbox. +int arm_cpu_caps(void) { + FILE *fin; + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably + * on Android. + * This also means that detection will fail in Scratchbox. + */ + fin = fopen("/proc/cpuinfo", "r"); + if (fin != NULL) { + /* 512 should be enough for anybody (it's even enough for all the flags + * that x86 has accumulated... so far). */ - fin = fopen("/proc/cpuinfo","r"); - if(fin != NULL) - { - /* 512 should be enough for anybody (it's even enough for all the flags - * that x86 has accumulated... so far). - */ - char buf[512]; - while (fgets(buf, 511, fin) != NULL) - { + char buf[512]; + while (fgets(buf, 511, fin) != NULL) { #if HAVE_EDSP || HAVE_NEON - if (memcmp(buf, "Features", 8) == 0) - { - char *p; + if (memcmp(buf, "Features", 8) == 0) { + char *p; #if HAVE_EDSP - p=strstr(buf, " edsp"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) - { - flags |= HAS_EDSP; - } + p = strstr(buf, " edsp"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { + flags |= HAS_EDSP; + } #if HAVE_NEON - p = strstr(buf, " neon"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) - { - flags |= HAS_NEON; - } + p = strstr(buf, " neon"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { + flags |= HAS_NEON; + } #endif /* HAVE_NEON */ #endif /* HAVE_EDSP */ - } + } #endif /* HAVE_EDSP || HAVE_NEON */ #if HAVE_MEDIA - if (memcmp(buf, "CPU architecture:",17) == 0){ - int version; - version = atoi(buf+17); - if (version >= 6) - { - flags |= HAS_MEDIA; - } - } -#endif /* HAVE_MEDIA */ + if (memcmp(buf, "CPU architecture:", 17) == 0) { + int version; + version = atoi(buf + 17); + if (version >= 6) { + flags |= HAS_MEDIA; } - fclose(fin); + } +#endif /* HAVE_MEDIA */ } - return flags & mask; + fclose(fin); + } + return flags & mask; } #else /* end __linux__ */ #error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ - "available for your platform. Reconfigure with --disable-runtime-cpu-detect." +"available for your platform. Reconfigure with --disable-runtime-cpu-detect." #endif diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h index 7b6ae4a14..d3a3e5a14 100644 --- a/vpx_ports/asm_offsets.h +++ b/vpx_ports/asm_offsets.h @@ -15,8 +15,8 @@ #include <stddef.h> #define ct_assert(name,cond) \ - static void assert_##name(void) UNUSED;\ - static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} + static void assert_##name(void) UNUSED;\ + static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} #if INLINE_ASM #define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val)) diff --git a/vpx_ports/config.h b/vpx_ports/config.h new file mode 100644 index 000000000..1abe70da9 --- /dev/null +++ b/vpx_ports/config.h @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vpx_config.h" diff --git a/vpx_ports/emmintrin_compat.h b/vpx_ports/emmintrin_compat.h new file mode 100644 index 000000000..782d603af --- /dev/null +++ b/vpx_ports/emmintrin_compat.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H +#define VPX_PORTS_EMMINTRIN_COMPAT_H + +#if defined(__GNUC__) && __GNUC__ < 4 +/* From emmintrin.h (gcc 4.5.3) */ +/* Casts between various SP, DP, INT vector types. Note that these do no + conversion of values, they just change the type. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ps(__m128d __A) +{ + return (__m128) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_si128(__m128d __A) +{ + return (__m128i) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_pd(__m128 __A) +{ + return (__m128d) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_si128(__m128 __A) +{ + return (__m128i) __A; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ps(__m128i __A) +{ + return (__m128) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_pd(__m128i __A) +{ + return (__m128d) __A; +} +#endif + +#endif diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index efad1a503..db8da2873 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -18,7 +18,7 @@ sym(vpx_reset_mmx_state): ret -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h index 29e507f4f..62b86bb1d 100644 --- a/vpx_ports/mem.h +++ b/vpx_ports/mem.h @@ -11,6 +11,7 @@ #ifndef VPX_PORTS_MEM_H #define VPX_PORTS_MEM_H + #include "vpx_config.h" #include "vpx/vpx_integer.h" @@ -31,8 +32,8 @@ * within the array. */ #define DECLARE_ALIGNED_ARRAY(a,typ,val,n)\ -typ val##_[(n)+(a)/sizeof(typ)+1];\ -typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a))) + typ val##_[(n)+(a)/sizeof(typ)+1];\ + typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a))) /* Indicates that the usage of the specified variable has been audited to assure diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h index dec28d595..2d44a3a58 100644 --- a/vpx_ports/mem_ops.h +++ b/vpx_ports/mem_ops.h @@ -60,88 +60,82 @@ #undef mem_get_be16 #define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) -static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 8; - val |= mem[1]; - return val; +static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[0] << 8; + val |= mem[1]; + return val; } #undef mem_get_be24 #define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) -static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 16; - val |= mem[1] << 8; - val |= mem[2]; - return val; +static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[0] << 16; + val |= mem[1] << 8; + val |= mem[2]; + return val; } #undef mem_get_be32 #define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) -static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 24; - val |= mem[1] << 16; - val |= mem[2] << 8; - val |= mem[3]; - return val; +static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[0] << 24; + val |= mem[1] << 16; + val |= mem[2] << 8; + val |= mem[3]; + return val; } #undef mem_get_le16 #define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) -static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[1] << 8; - val |= mem[0]; - return val; +static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[1] << 8; + val |= mem[0]; + return val; } #undef mem_get_le24 #define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) -static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; +static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[2] << 16; + val |= mem[1] << 8; + val |= mem[0]; + return val; } #undef mem_get_le32 #define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) -static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) -{ - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[3] << 24; - val |= mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; +static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { + unsigned MEM_VALUE_T val; + const MAU_T *mem = (const MAU_T *)vmem; + + val = mem[3] << 24; + val |= mem[2] << 16; + val |= mem[1] << 8; + val |= mem[0]; + return val; } #define mem_get_s_generic(end,sz) \ - static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ - const MAU_T *mem = (const MAU_T*)vmem;\ - signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ - return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ - } + static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ + const MAU_T *mem = (const MAU_T*)vmem;\ + signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ + return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ + } #undef mem_get_sbe16 #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) @@ -169,66 +163,60 @@ mem_get_s_generic(le, 32) #undef mem_put_be16 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) -static void mem_put_be16(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; +static void mem_put_be16(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 8) & 0xff; - mem[1] = (val >> 0) & 0xff; + mem[0] = (val >> 8) & 0xff; + mem[1] = (val >> 0) & 0xff; } #undef mem_put_be24 #define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) -static void mem_put_be24(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; +static void mem_put_be24(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 16) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 0) & 0xff; + mem[0] = (val >> 16) & 0xff; + mem[1] = (val >> 8) & 0xff; + mem[2] = (val >> 0) & 0xff; } #undef mem_put_be32 #define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) -static void mem_put_be32(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (val >> 24) & 0xff; - mem[1] = (val >> 16) & 0xff; - mem[2] = (val >> 8) & 0xff; - mem[3] = (val >> 0) & 0xff; +static void mem_put_be32(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (val >> 24) & 0xff; + mem[1] = (val >> 16) & 0xff; + mem[2] = (val >> 8) & 0xff; + mem[3] = (val >> 0) & 0xff; } #undef mem_put_le16 #define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) -static void mem_put_le16(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; +static void mem_put_le16(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; + mem[0] = (val >> 0) & 0xff; + mem[1] = (val >> 8) & 0xff; } #undef mem_put_le24 #define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) -static void mem_put_le24(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; +static void mem_put_le24(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 16) & 0xff; + mem[0] = (val >> 0) & 0xff; + mem[1] = (val >> 8) & 0xff; + mem[2] = (val >> 16) & 0xff; } #undef mem_put_le32 #define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) -static void mem_put_le32(void *vmem, MEM_VALUE_T val) -{ - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (val >> 0) & 0xff; - mem[1] = (val >> 8) & 0xff; - mem[2] = (val >> 16) & 0xff; - mem[3] = (val >> 24) & 0xff; +static void mem_put_le32(void *vmem, MEM_VALUE_T val) { + MAU_T *mem = (MAU_T *)vmem; + + mem[0] = (val >> 0) & 0xff; + mem[1] = (val >> 8) & 0xff; + mem[2] = (val >> 16) & 0xff; + mem[3] = (val >> 24) & 0xff; } diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h index fca653a52..0100300a7 100644 --- a/vpx_ports/mem_ops_aligned.h +++ b/vpx_ports/mem_ops_aligned.h @@ -24,61 +24,61 @@ * could redefine these macros. */ #define swap_endian_16(val,raw) do {\ - val = ((raw>>8) & 0x00ff) \ - | ((raw<<8) & 0xff00);\ - } while(0) + val = ((raw>>8) & 0x00ff) \ + | ((raw<<8) & 0xff00);\ + } while(0) #define swap_endian_32(val,raw) do {\ - val = ((raw>>24) & 0x000000ff) \ - | ((raw>>8) & 0x0000ff00) \ - | ((raw<<8) & 0x00ff0000) \ - | ((raw<<24) & 0xff000000); \ - } while(0) + val = ((raw>>24) & 0x000000ff) \ + | ((raw>>8) & 0x0000ff00) \ + | ((raw<<8) & 0x00ff0000) \ + | ((raw<<24) & 0xff000000); \ + } while(0) #define swap_endian_16_se(val,raw) do {\ - swap_endian_16(val,raw);\ - val = ((val << 16) >> 16);\ - } while(0) + swap_endian_16(val,raw);\ + val = ((val << 16) >> 16);\ + } while(0) #define swap_endian_32_se(val,raw) swap_endian_32(val,raw) #define mem_get_ne_aligned_generic(end,sz) \ - static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - return *mem;\ - } + static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ + const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ + return *mem;\ + } #define mem_get_sne_aligned_generic(end,sz) \ - static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - return *mem;\ - } + static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ + const int##sz##_t *mem = (const int##sz##_t *)vmem;\ + return *mem;\ + } #define mem_get_se_aligned_generic(end,sz) \ - static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz(val,raw);\ - return val;\ - } + static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ + const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ + unsigned MEM_VALUE_T val, raw = *mem;\ + swap_endian_##sz(val,raw);\ + return val;\ + } #define mem_get_sse_aligned_generic(end,sz) \ - static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz##_se(val,raw);\ - return val;\ - } + static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ + const int##sz##_t *mem = (const int##sz##_t *)vmem;\ + unsigned MEM_VALUE_T val, raw = *mem;\ + swap_endian_##sz##_se(val,raw);\ + return val;\ + } #define mem_put_ne_aligned_generic(end,sz) \ - static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem;\ - *mem = (uint##sz##_t)val;\ - } + static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + uint##sz##_t *mem = (uint##sz##_t *)vmem;\ + *mem = (uint##sz##_t)val;\ + } #define mem_put_se_aligned_generic(end,sz) \ - static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ - swap_endian_##sz(raw,val);\ - *mem = (uint##sz##_t)raw;\ - } + static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ + swap_endian_##sz(raw,val);\ + *mem = (uint##sz##_t)raw;\ + } #include "vpx_config.h" #if CONFIG_BIG_ENDIAN diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h new file mode 100644 index 000000000..16a735ccd --- /dev/null +++ b/vpx_ports/vpx_once.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vpx_config.h" + +#if CONFIG_MULTITHREAD && defined(_WIN32) +#include <windows.h> +#include <stdlib.h> +static void once(void (*func)(void)) +{ + static CRITICAL_SECTION *lock; + static LONG waiters; + static int done; + void *lock_ptr = &lock; + + /* If the initialization is complete, return early. This isn't just an + * optimization, it prevents races on the destruction of the global + * lock. + */ + if(done) + return; + + InterlockedIncrement(&waiters); + + /* Get a lock. We create one and try to make it the one-true-lock, + * throwing it away if we lost the race. + */ + + { + /* Scope to protect access to new_lock */ + CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(new_lock); + if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL) + { + DeleteCriticalSection(new_lock); + free(new_lock); + } + } + + /* At this point, we have a lock that can be synchronized on. We don't + * care which thread actually performed the allocation. + */ + + EnterCriticalSection(lock); + + if (!done) + { + func(); + done = 1; + } + + LeaveCriticalSection(lock); + + /* Last one out should free resources. The destructed objects are + * protected by checking if(done) above. + */ + if(!InterlockedDecrement(&waiters)) + { + DeleteCriticalSection(lock); + free(lock); + lock = NULL; + } +} + + +#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H +#include <pthread.h> +static void once(void (*func)(void)) +{ + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} + + +#else +/* No-op version that performs no synchronization. vp8_rtcd() is idempotent, + * so as long as your platform provides atomic loads/stores of pointers + * no synchronization is strictly necessary. + */ + +static void once(void (*func)(void)) +{ + static int done; + + if(!done) + { + func(); + done = 1; + } +} +#endif diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h index d07e08610..cdad9ef1d 100644 --- a/vpx_ports/vpx_timer.h +++ b/vpx_ports/vpx_timer.h @@ -32,65 +32,61 @@ /* timersub is not provided by msys at this time. */ #ifndef timersub #define timersub(a, b, result) \ - do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ - } while (0) + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) #endif #endif -struct vpx_usec_timer -{ +struct vpx_usec_timer { #if defined(_WIN32) - LARGE_INTEGER begin, end; + LARGE_INTEGER begin, end; #else - struct timeval begin, end; + struct timeval begin, end; #endif }; static void -vpx_usec_timer_start(struct vpx_usec_timer *t) -{ +vpx_usec_timer_start(struct vpx_usec_timer *t) { #if defined(_WIN32) - QueryPerformanceCounter(&t->begin); + QueryPerformanceCounter(&t->begin); #else - gettimeofday(&t->begin, NULL); + gettimeofday(&t->begin, NULL); #endif } static void -vpx_usec_timer_mark(struct vpx_usec_timer *t) -{ +vpx_usec_timer_mark(struct vpx_usec_timer *t) { #if defined(_WIN32) - QueryPerformanceCounter(&t->end); + QueryPerformanceCounter(&t->end); #else - gettimeofday(&t->end, NULL); + gettimeofday(&t->end, NULL); #endif } static int64_t -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) -{ +vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { #if defined(_WIN32) - LARGE_INTEGER freq, diff; + LARGE_INTEGER freq, diff; - diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; + diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; - QueryPerformanceFrequency(&freq); - return diff.QuadPart * 1000000 / freq.QuadPart; + QueryPerformanceFrequency(&freq); + return diff.QuadPart * 1000000 / freq.QuadPart; #else - struct timeval diff; + struct timeval diff; - timersub(&t->end, &t->begin, &diff); - return diff.tv_sec * 1000000 + diff.tv_usec; + timersub(&t->end, &t->begin, &diff); + return diff.tv_sec * 1000000 + diff.tv_usec; #endif } @@ -101,9 +97,8 @@ vpx_usec_timer_elapsed(struct vpx_usec_timer *t) #define timersub(a, b, result) #endif -struct vpx_usec_timer -{ - void *dummy; +struct vpx_usec_timer { + void *dummy; }; static void @@ -113,7 +108,9 @@ static void vpx_usec_timer_mark(struct vpx_usec_timer *t) { } static long -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; } +vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { + return 0; +} #endif /* CONFIG_OS_SUPPORT */ diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h deleted file mode 100644 index f2fb08954..000000000 --- a/vpx_ports/vpxtypes.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __VPXTYPES_H__ -#define __VPXTYPES_H__ - -#include "vpx_config.h" - -//#include <sys/types.h> -#ifdef _MSC_VER -# include <basetsd.h> -typedef SSIZE_T ssize_t; -#endif - -#if defined(HAVE_STDINT_H) && HAVE_STDINT_H -/* C99 types are preferred to vpx integer types */ -# include <stdint.h> -#endif - -/*!\defgroup basetypes Base Types - @{*/ -#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED) -# ifdef STRICTTYPES -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; -# else -typedef char int8_t; -typedef short int16_t; -typedef int int32_t; -# endif -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -#endif - -typedef int8_t vpxs8; -typedef uint8_t vpxu8; -typedef int16_t vpxs16; -typedef uint16_t vpxu16; -typedef int32_t vpxs32; -typedef uint32_t vpxu32; -typedef int32_t vpxbool; - -enum {vpxfalse, vpxtrue}; - -/*!\def OTC - \brief a macro suitable for declaring a constant #vpxtc*/ -/*!\def VPXTC - \brief printf format string suitable for printing an #vpxtc*/ -#ifdef UNICODE -# ifdef NO_WCHAR -# error "no non-wchar support added yet" -# else -# include <wchar.h> -typedef wchar_t vpxtc; -# define OTC(str) L ## str -# define VPXTC "ls" -# endif /*NO_WCHAR*/ -#else -typedef char vpxtc; -# define OTC(str) (vpxtc*)str -# define VPXTC "s" -#endif /*UNICODE*/ -/*@} end - base types*/ - -/*!\addtogroup basetypes - @{*/ -/*!\def VPX64 - \brief printf format string suitable for printing an #vpxs64*/ -#if defined(HAVE_STDINT_H) -# define VPX64 PRId64 -typedef int64_t vpxs64; -#elif defined(HASLONGLONG) -# undef PRId64 -# define PRId64 "lld" -# define VPX64 PRId64 -typedef long long vpxs64; -#elif defined(WIN32) || defined(_WIN32_WCE) -# undef PRId64 -# define PRId64 "I64d" -# define VPX64 PRId64 -typedef __int64 vpxs64; -typedef unsigned __int64 vpxu64; -#elif defined(__uClinux__) && defined(CHIP_DM642) -# include <lddk.h> -# undef PRId64 -# define PRId64 "lld" -# define VPX64 PRId64 -typedef long vpxs64; -#else -# error "64 bit integer type undefined for this platform!" -#endif -#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED) -typedef vpxs64 int64_t; -typedef vpxu64 uint64_t; -#endif -/*!@} end - base types*/ - -/*!\ingroup basetypes - \brief Common return type*/ -typedef enum -{ - VPX_NOT_FOUND = -404, - VPX_BUFFER_EMPTY = -202, - VPX_BUFFER_FULL = -201, - - VPX_CONNREFUSED = -102, - VPX_TIMEDOUT = -101, - VPX_WOULDBLOCK = -100, - - VPX_NET_ERROR = -9, - VPX_INVALID_VERSION = -8, - VPX_INPROGRESS = -7, - VPX_NOT_SUPP = -6, - VPX_NO_MEM = -3, - VPX_INVALID_PARAMS = -2, - VPX_ERROR = -1, - VPX_OK = 0, - VPX_DONE = 1 -} vpxsc; - -#if defined(WIN32) || defined(_WIN32_WCE) -# define DLLIMPORT __declspec(dllimport) -# define DLLEXPORT __declspec(dllexport) -# define DLLLOCAL -#elif defined(LINUX) -# define DLLIMPORT -/*visibility attribute support is available in 3.4 and later. - see: http://gcc.gnu.org/wiki/Visibility for more info*/ -# if defined(__GNUC__) && ((__GNUC__<<16|(__GNUC_MINOR__&0xff)) >= (3<<16|4)) -# define GCC_HASCLASSVISIBILITY -# endif /*defined(__GNUC__) && __GNUC_PREREQ(3,4)*/ -# ifdef GCC_HASCLASSVISIBILITY -# define DLLEXPORT __attribute__ ((visibility("default"))) -# define DLLLOCAL __attribute__ ((visibility("hidden"))) -# else -# define DLLEXPORT -# define DLLLOCAL -# endif /*GCC_HASCLASSVISIBILITY*/ -#endif /*platform ifdefs*/ - -#endif /*__VPXTYPES_H__*/ - -#undef VPXAPI -/*!\def VPXAPI - \brief library calling convention/storage class attributes. - - Specifies whether the function is imported through a dll - or is from a static library.*/ -#ifdef VPXDLL -# ifdef VPXDLLEXPORT -# define VPXAPI DLLEXPORT -# else -# define VPXAPI DLLIMPORT -# endif /*VPXDLLEXPORT*/ -#else -# define VPXAPI -#endif /*VPXDLL*/ diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h index 9dd8c4b59..299058347 100644 --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@ -14,133 +14,137 @@ #include <stdlib.h> #include "vpx_config.h" -typedef enum -{ - VPX_CPU_UNKNOWN = -1, - VPX_CPU_AMD, - VPX_CPU_AMD_OLD, - VPX_CPU_CENTAUR, - VPX_CPU_CYRIX, - VPX_CPU_INTEL, - VPX_CPU_NEXGEN, - VPX_CPU_NSC, - VPX_CPU_RISE, - VPX_CPU_SIS, - VPX_CPU_TRANSMETA, - VPX_CPU_TRANSMETA_OLD, - VPX_CPU_UMC, - VPX_CPU_VIA, - - VPX_CPU_LAST +typedef enum { + VPX_CPU_UNKNOWN = -1, + VPX_CPU_AMD, + VPX_CPU_AMD_OLD, + VPX_CPU_CENTAUR, + VPX_CPU_CYRIX, + VPX_CPU_INTEL, + VPX_CPU_NEXGEN, + VPX_CPU_NSC, + VPX_CPU_RISE, + VPX_CPU_SIS, + VPX_CPU_TRANSMETA, + VPX_CPU_TRANSMETA_OLD, + VPX_CPU_UMC, + VPX_CPU_VIA, + + VPX_CPU_LAST } vpx_cpu_t; -#if defined(__GNUC__) && __GNUC__ +#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) #if ARCH_X86_64 #define cpuid(func,ax,bx,cx,dx)\ - __asm__ __volatile__ (\ - "cpuid \n\t" \ - : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func)); + __asm__ __volatile__ (\ + "cpuid \n\t" \ + : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); #else #define cpuid(func,ax,bx,cx,dx)\ - __asm__ __volatile__ (\ - "mov %%ebx, %%edi \n\t" \ - "cpuid \n\t" \ - "xchg %%edi, %%ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func)); + __asm__ __volatile__ (\ + "mov %%ebx, %%edi \n\t" \ + "cpuid \n\t" \ + "xchg %%edi, %%ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); #endif -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ #if ARCH_X86_64 #define cpuid(func,ax,bx,cx,dx)\ - asm volatile (\ - "xchg %rsi, %rbx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "xchg %rsi, %rbx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func)); + asm volatile (\ + "xchg %rsi, %rbx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "xchg %rsi, %rbx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); #else #define cpuid(func,ax,bx,cx,dx)\ - asm volatile (\ - "pushl %ebx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "popl %ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func)); + asm volatile (\ + "pushl %ebx \n\t" \ + "cpuid \n\t" \ + "movl %ebx, %edi \n\t" \ + "popl %ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); #endif -#else +#else /* end __SUNPRO__ */ #if ARCH_X86_64 void __cpuid(int CPUInfo[4], int info_type); #pragma intrinsic(__cpuid) #define cpuid(func,a,b,c,d) do{\ - int regs[4];\ - __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ - } while(0) + int regs[4];\ + __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ + } while(0) #else #define cpuid(func,a,b,c,d)\ - __asm mov eax, func\ - __asm cpuid\ - __asm mov a, eax\ - __asm mov b, ebx\ - __asm mov c, ecx\ - __asm mov d, edx + __asm mov eax, func\ + __asm cpuid\ + __asm mov a, eax\ + __asm mov b, ebx\ + __asm mov c, ecx\ + __asm mov d, edx #endif -#endif - -#define HAS_MMX 0x01 -#define HAS_SSE 0x02 -#define HAS_SSE2 0x04 -#define HAS_SSE3 0x08 -#define HAS_SSSE3 0x10 -#define HAS_SSE4_1 0x20 +#endif /* end others */ + +#define HAS_MMX 0x01 +#define HAS_SSE 0x02 +#define HAS_SSE2 0x04 +#define HAS_SSE3 0x08 +#define HAS_SSSE3 0x10 +#define HAS_SSE4_1 0x20 +#define HAS_AVX 0x40 +#define HAS_AVX2 0x80 #ifndef BIT #define BIT(n) (1<<n) #endif static int -x86_simd_caps(void) -{ - unsigned int flags = 0; - unsigned int mask = ~0; - unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; - char *env; - (void)reg_ebx; +x86_simd_caps(void) { + unsigned int flags = 0; + unsigned int mask = ~0; + unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + char *env; + (void)reg_ebx; - /* See if the CPU capabilities are being overridden by the environment */ - env = getenv("VPX_SIMD_CAPS"); + /* See if the CPU capabilities are being overridden by the environment */ + env = getenv("VPX_SIMD_CAPS"); - if (env && *env) - return (int)strtol(env, NULL, 0); + if (env && *env) + return (int)strtol(env, NULL, 0); - env = getenv("VPX_SIMD_CAPS_MASK"); + env = getenv("VPX_SIMD_CAPS_MASK"); - if (env && *env) - mask = strtol(env, NULL, 0); + if (env && *env) + mask = strtol(env, NULL, 0); - /* Ensure that the CPUID instruction supports extended features */ - cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); + /* Ensure that the CPUID instruction supports extended features */ + cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); - if (reg_eax < 1) - return 0; + if (reg_eax < 1) + return 0; - /* Get the standard feature flags */ - cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); + /* Get the standard feature flags */ + cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); - if (reg_edx & BIT(23)) flags |= HAS_MMX; + if (reg_edx & BIT(23)) flags |= HAS_MMX; - if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ + if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ - if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ + if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ - if (reg_ecx & BIT(0)) flags |= HAS_SSE3; + if (reg_ecx & BIT(0)) flags |= HAS_SSE3; - if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; + if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; - if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; + if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; - return flags & mask; + if (reg_ecx & BIT(28)) flags |= HAS_AVX; + + if (reg_ebx & BIT(5)) flags |= HAS_AVX2; + + return flags & mask; } vpx_cpu_t vpx_x86_vendor(void); @@ -150,21 +154,20 @@ unsigned __int64 __rdtsc(void); #pragma intrinsic(__rdtsc) #endif static unsigned int -x86_readtsc(void) -{ +x86_readtsc(void) { #if defined(__GNUC__) && __GNUC__ - unsigned int tsc; - __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); - return tsc; + unsigned int tsc; + __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); + return tsc; #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - unsigned int tsc; - asm volatile("rdtsc\n\t":"=a"(tsc):); - return tsc; + unsigned int tsc; + asm volatile("rdtsc\n\t":"=a"(tsc):); + return tsc; #else #if ARCH_X86_64 - return (unsigned int)__rdtsc(); + return (unsigned int)__rdtsc(); #else - __asm rdtsc; + __asm rdtsc; #endif #endif } @@ -172,45 +175,41 @@ x86_readtsc(void) #if defined(__GNUC__) && __GNUC__ #define x86_pause_hint()\ - __asm__ __volatile__ ("pause \n\t") + __asm__ __volatile__ ("pause \n\t") #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) #define x86_pause_hint()\ - asm volatile ("pause \n\t") + asm volatile ("pause \n\t") #else #if ARCH_X86_64 #define x86_pause_hint()\ - _mm_pause(); + _mm_pause(); #else #define x86_pause_hint()\ - __asm pause + __asm pause #endif #endif #if defined(__GNUC__) && __GNUC__ static void -x87_set_control_word(unsigned short mode) -{ - __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); +x87_set_control_word(unsigned short mode) { + __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); } static unsigned short -x87_get_control_word(void) -{ - unsigned short mode; - __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); +x87_get_control_word(void) { + unsigned short mode; + __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); return mode; } #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) static void -x87_set_control_word(unsigned short mode) -{ - asm volatile("fldcw %0" : : "m"(*&mode)); +x87_set_control_word(unsigned short mode) { + asm volatile("fldcw %0" : : "m"(*&mode)); } static unsigned short -x87_get_control_word(void) -{ - unsigned short mode; - asm volatile("fstcw %0\n\t":"=m"(*&mode):); - return mode; +x87_get_control_word(void) { + unsigned short mode; + asm volatile("fstcw %0\n\t":"=m"(*&mode):); + return mode; } #elif ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ @@ -220,25 +219,22 @@ extern unsigned short vpx_winx64_fstcw(void); #define x87_get_control_word vpx_winx64_fstcw #else static void -x87_set_control_word(unsigned short mode) -{ - __asm { fldcw mode } +x87_set_control_word(unsigned short mode) { + __asm { fldcw mode } } static unsigned short -x87_get_control_word(void) -{ - unsigned short mode; - __asm { fstcw mode } - return mode; +x87_get_control_word(void) { + unsigned short mode; + __asm { fstcw mode } + return mode; } #endif static unsigned short -x87_set_double_precision(void) -{ - unsigned short mode = x87_get_control_word(); - x87_set_control_word((mode&~0x300) | 0x200); - return mode; +x87_set_double_precision(void) { + unsigned short mode = x87_get_control_word(); + x87_set_control_word((mode&~0x300) | 0x200); + return mode; } diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index 0c9fe3774..eccbfa35c 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -78,6 +78,17 @@ %endif +; LIBVPX_YASM_WIN64 +; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 +; or win64 is defined on the Yasm command line. +%ifidn __OUTPUT_FORMAT__,win64 +%define LIBVPX_YASM_WIN64 1 +%elifidn __OUTPUT_FORMAT__,x64 +%define LIBVPX_YASM_WIN64 1 +%else +%define LIBVPX_YASM_WIN64 0 +%endif + ; sym() ; Return the proper symbol name for the target ABI. ; @@ -90,7 +101,7 @@ %define sym(x) x %elifidn __OUTPUT_FORMAT__,elfx32 %define sym(x) x -%elifidn __OUTPUT_FORMAT__,x64 +%elif LIBVPX_YASM_WIN64 %define sym(x) x %else %define sym(x) _ %+ x @@ -114,7 +125,7 @@ %define PRIVATE :hidden %elifidn __OUTPUT_FORMAT__,elfx32 %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,x64 + %elif LIBVPX_YASM_WIN64 %define PRIVATE %else %define PRIVATE :private_extern @@ -131,7 +142,7 @@ %else ; 64 bit ABI passes arguments in registers. This is a workaround to get up ; and running quickly. Relies on SHADOW_ARGS_TO_STACK - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define arg(x) [rbp+16+8*x] %else %define arg(x) [rbp-8-8*x] @@ -230,6 +241,12 @@ %elifidn __OUTPUT_FORMAT__,elfx32 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden + %elifidn __OUTPUT_FORMAT__,macho64 + %ifdef CHROMIUM + %define HIDDEN_DATA(x) x:private_extern + %else + %define HIDDEN_DATA(x) x + %endif %else %define HIDDEN_DATA(x) x %endif @@ -251,7 +268,7 @@ %endm %define UNSHADOW_ARGS %else -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SHADOW_ARGS_TO_STACK 1 ; argc %if %1 > 0 mov arg(0),rcx @@ -307,7 +324,7 @@ ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - ; but in some cases this is not done and unaligned movs must be used. -%ifidn __OUTPUT_FORMAT__,x64 +%if LIBVPX_YASM_WIN64 %macro SAVE_XMM 1-2 a %if %1 < 6 %error Only xmm registers 6-15 must be preserved diff --git a/vpx_ports/x86_cpuid.c b/vpx_ports/x86_cpuid.c index ce6403374..fe86cfc7b 100644 --- a/vpx_ports/x86_cpuid.c +++ b/vpx_ports/x86_cpuid.c @@ -11,43 +11,39 @@ #include <string.h> #include "x86.h" -struct cpuid_vendors -{ - char vendor_string[12]; - vpx_cpu_t vendor_id; +struct cpuid_vendors { + char vendor_string[12]; + vpx_cpu_t vendor_id; }; -static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] = -{ - { "AuthenticAMD", VPX_CPU_AMD }, - { "AMDisbetter!", VPX_CPU_AMD_OLD }, - { "CentaurHauls", VPX_CPU_CENTAUR }, - { "CyrixInstead", VPX_CPU_CYRIX }, - { "GenuineIntel", VPX_CPU_INTEL }, - { "NexGenDriven", VPX_CPU_NEXGEN }, - { "Geode by NSC", VPX_CPU_NSC }, - { "RiseRiseRise", VPX_CPU_RISE }, - { "SiS SiS SiS ", VPX_CPU_SIS }, - { "GenuineTMx86", VPX_CPU_TRANSMETA }, - { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD }, - { "UMC UMC UMC ", VPX_CPU_UMC }, - { "VIA VIA VIA ", VPX_CPU_VIA }, +static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] = { + { "AuthenticAMD", VPX_CPU_AMD }, + { "AMDisbetter!", VPX_CPU_AMD_OLD }, + { "CentaurHauls", VPX_CPU_CENTAUR }, + { "CyrixInstead", VPX_CPU_CYRIX }, + { "GenuineIntel", VPX_CPU_INTEL }, + { "NexGenDriven", VPX_CPU_NEXGEN }, + { "Geode by NSC", VPX_CPU_NSC }, + { "RiseRiseRise", VPX_CPU_RISE }, + { "SiS SiS SiS ", VPX_CPU_SIS }, + { "GenuineTMx86", VPX_CPU_TRANSMETA }, + { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD }, + { "UMC UMC UMC ", VPX_CPU_UMC }, + { "VIA VIA VIA ", VPX_CPU_VIA }, }; -vpx_cpu_t vpx_x86_vendor(void) -{ - unsigned int reg_eax; - unsigned int vs[3]; - int i; +vpx_cpu_t vpx_x86_vendor(void) { + unsigned int reg_eax; + unsigned int vs[3]; + int i; - /* Get the Vendor String from the CPU */ - cpuid(0, reg_eax, vs[0], vs[2], vs[1]); + /* Get the Vendor String from the CPU */ + cpuid(0, reg_eax, vs[0], vs[2], vs[1]); - for (i = 0; i < VPX_CPU_LAST; i++) - { - if (strncmp ((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0) - return (cpuid_vendor_list[i].vendor_id); - } + for (i = 0; i < VPX_CPU_LAST; i++) { + if (strncmp((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0) + return (cpuid_vendor_list[i].vendor_id); + } - return VPX_CPU_UNKNOWN; + return VPX_CPU_UNKNOWN; } |