summaryrefslogtreecommitdiff
path: root/vpx_ports
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_ports')
-rw-r--r--vpx_ports/arm_cpudetect.c261
-rw-r--r--vpx_ports/asm_offsets.h4
-rw-r--r--vpx_ports/config.h10
-rw-r--r--vpx_ports/emmintrin_compat.h55
-rw-r--r--vpx_ports/emms.asm2
-rw-r--r--vpx_ports/mem.h5
-rw-r--r--vpx_ports/mem_ops.h182
-rw-r--r--vpx_ports/mem_ops_aligned.h80
-rw-r--r--vpx_ports/vpx_once.h97
-rw-r--r--vpx_ports/vpx_timer.h63
-rw-r--r--vpx_ports/vpxtypes.h167
-rw-r--r--vpx_ports/x86.h258
-rw-r--r--vpx_ports/x86_abi_support.asm27
-rw-r--r--vpx_ports/x86_cpuid.c60
14 files changed, 617 insertions, 654 deletions
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
index 8ff95a110..542ff6786 100644
--- a/vpx_ports/arm_cpudetect.c
+++ b/vpx_ports/arm_cpudetect.c
@@ -12,50 +12,45 @@
#include <string.h>
#include "arm.h"
-static int arm_cpu_env_flags(int *flags)
-{
- char *env;
- env = getenv("VPX_SIMD_CAPS");
- if (env && *env)
- {
- *flags = (int)strtol(env, NULL, 0);
- return 0;
- }
- *flags = 0;
- return -1;
+static int arm_cpu_env_flags(int *flags) {
+ char *env;
+ env = getenv("VPX_SIMD_CAPS");
+ if (env && *env) {
+ *flags = (int)strtol(env, NULL, 0);
+ return 0;
+ }
+ *flags = 0;
+ return -1;
}
-static int arm_cpu_env_mask(void)
-{
- char *env;
- env = getenv("VPX_SIMD_CAPS_MASK");
- return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+static int arm_cpu_env_mask(void) {
+ char *env;
+ env = getenv("VPX_SIMD_CAPS_MASK");
+ return env && *env ? (int)strtol(env, NULL, 0) : ~0;
}
#if !CONFIG_RUNTIME_CPU_DETECT
-int arm_cpu_caps(void)
-{
+int arm_cpu_caps(void) {
/* This function should actually be a no-op. There is no way to adjust any of
* these because the RTCD tables do not exist: the functions are called
* statically */
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- flags |= HAS_NEON;
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
@@ -64,156 +59,134 @@ int arm_cpu_caps(void)
#define WIN32_EXTRA_LEAN
#include <windows.h>
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- /* MSVC has no inline __asm support for ARM, but it does let you __emit
- * instructions via their assembled hex code.
- * All of these instructions should be essentially nops.
- */
+int arm_cpu_caps(void) {
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* MSVC has no inline __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops.
+ */
#if HAVE_EDSP
- if (mask & HAS_EDSP)
- {
- __try
- {
- /*PLD [r13]*/
- __emit(0xF5DDF000);
- flags |= HAS_EDSP;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
+ if (mask & HAS_EDSP) {
+ __try {
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags |= HAS_EDSP;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
}
+ }
#if HAVE_MEDIA
- if (mask & HAS_MEDIA)
- __try
- {
- /*SHADD8 r3,r3,r3*/
- __emit(0xE6333F93);
- flags |= HAS_MEDIA;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
- }
+ if (mask & HAS_MEDIA)
+ __try {
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags |= HAS_MEDIA;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
+ }
+}
#if HAVE_NEON
- if (mask & HAS_NEON)
- {
- __try
- {
- /*VORR q0,q0,q0*/
- __emit(0xF2200150);
- flags |= HAS_NEON;
- }
- __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- /*Ignore exception.*/
- }
- }
+if (mask &HAS_NEON) {
+ __try {
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags |= HAS_NEON;
+ } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ /*Ignore exception.*/
+ }
+}
#endif /* HAVE_NEON */
#endif /* HAVE_MEDIA */
#endif /* HAVE_EDSP */
- return flags & mask;
+return flags & mask;
}
#elif defined(__ANDROID__) /* end _MSC_VER */
#include <cpu-features.h>
-int arm_cpu_caps(void)
-{
- int flags;
- int mask;
- uint64_t features;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- features = android_getCpuFeatures();
+int arm_cpu_caps(void) {
+ int flags;
+ int mask;
+ uint64_t features;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ features = android_getCpuFeatures();
#if HAVE_EDSP
- flags |= HAS_EDSP;
+ flags |= HAS_EDSP;
#endif /* HAVE_EDSP */
#if HAVE_MEDIA
- flags |= HAS_MEDIA;
+ flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON
- if (features & ANDROID_CPU_ARM_FEATURE_NEON)
- flags |= HAS_NEON;
+ if (features & ANDROID_CPU_ARM_FEATURE_NEON)
+ flags |= HAS_NEON;
#endif /* HAVE_NEON */
- return flags & mask;
+ return flags & mask;
}
#elif defined(__linux__) /* end __ANDROID__ */
+
#include <stdio.h>
-int arm_cpu_caps(void)
-{
- FILE *fin;
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags))
- {
- return flags;
- }
- mask = arm_cpu_env_mask();
- /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
- * on Android.
- * This also means that detection will fail in Scratchbox.
+int arm_cpu_caps(void) {
+ FILE *fin;
+ int flags;
+ int mask;
+ if (!arm_cpu_env_flags(&flags)) {
+ return flags;
+ }
+ mask = arm_cpu_env_mask();
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+ * on Android.
+ * This also means that detection will fail in Scratchbox.
+ */
+ fin = fopen("/proc/cpuinfo", "r");
+ if (fin != NULL) {
+ /* 512 should be enough for anybody (it's even enough for all the flags
+ * that x86 has accumulated... so far).
*/
- fin = fopen("/proc/cpuinfo","r");
- if(fin != NULL)
- {
- /* 512 should be enough for anybody (it's even enough for all the flags
- * that x86 has accumulated... so far).
- */
- char buf[512];
- while (fgets(buf, 511, fin) != NULL)
- {
+ char buf[512];
+ while (fgets(buf, 511, fin) != NULL) {
#if HAVE_EDSP || HAVE_NEON
- if (memcmp(buf, "Features", 8) == 0)
- {
- char *p;
+ if (memcmp(buf, "Features", 8) == 0) {
+ char *p;
#if HAVE_EDSP
- p=strstr(buf, " edsp");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
- {
- flags |= HAS_EDSP;
- }
+ p = strstr(buf, " edsp");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
+ flags |= HAS_EDSP;
+ }
#if HAVE_NEON
- p = strstr(buf, " neon");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
- {
- flags |= HAS_NEON;
- }
+ p = strstr(buf, " neon");
+ if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
+ flags |= HAS_NEON;
+ }
#endif /* HAVE_NEON */
#endif /* HAVE_EDSP */
- }
+ }
#endif /* HAVE_EDSP || HAVE_NEON */
#if HAVE_MEDIA
- if (memcmp(buf, "CPU architecture:",17) == 0){
- int version;
- version = atoi(buf+17);
- if (version >= 6)
- {
- flags |= HAS_MEDIA;
- }
- }
-#endif /* HAVE_MEDIA */
+ if (memcmp(buf, "CPU architecture:", 17) == 0) {
+ int version;
+ version = atoi(buf + 17);
+ if (version >= 6) {
+ flags |= HAS_MEDIA;
}
- fclose(fin);
+ }
+#endif /* HAVE_MEDIA */
}
- return flags & mask;
+ fclose(fin);
+ }
+ return flags & mask;
}
#else /* end __linux__ */
#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
- "available for your platform. Reconfigure with --disable-runtime-cpu-detect."
+"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
#endif
diff --git a/vpx_ports/asm_offsets.h b/vpx_ports/asm_offsets.h
index 7b6ae4a14..d3a3e5a14 100644
--- a/vpx_ports/asm_offsets.h
+++ b/vpx_ports/asm_offsets.h
@@ -15,8 +15,8 @@
#include <stddef.h>
#define ct_assert(name,cond) \
- static void assert_##name(void) UNUSED;\
- static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
+ static void assert_##name(void) UNUSED;\
+ static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
#if INLINE_ASM
#define DEFINE(sym, val) asm("\n" #sym " EQU %0" : : "i" (val))
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
new file mode 100644
index 000000000..1abe70da9
--- /dev/null
+++ b/vpx_ports/config.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
diff --git a/vpx_ports/emmintrin_compat.h b/vpx_ports/emmintrin_compat.h
new file mode 100644
index 000000000..782d603af
--- /dev/null
+++ b/vpx_ports/emmintrin_compat.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H
+#define VPX_PORTS_EMMINTRIN_COMPAT_H
+
+#if defined(__GNUC__) && __GNUC__ < 4
+/* From emmintrin.h (gcc 4.5.3) */
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+ return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+ return (__m128d) __A;
+}
+#endif
+
+#endif
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index efad1a503..db8da2873 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -18,7 +18,7 @@ sym(vpx_reset_mmx_state):
ret
-%ifidn __OUTPUT_FORMAT__,x64
+%if LIBVPX_YASM_WIN64
global sym(vpx_winx64_fldcw) PRIVATE
sym(vpx_winx64_fldcw):
sub rsp, 8
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 29e507f4f..62b86bb1d 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -11,6 +11,7 @@
#ifndef VPX_PORTS_MEM_H
#define VPX_PORTS_MEM_H
+
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -31,8 +32,8 @@
* within the array.
*/
#define DECLARE_ALIGNED_ARRAY(a,typ,val,n)\
-typ val##_[(n)+(a)/sizeof(typ)+1];\
-typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a)))
+ typ val##_[(n)+(a)/sizeof(typ)+1];\
+ typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a)))
/* Indicates that the usage of the specified variable has been audited to assure
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index dec28d595..2d44a3a58 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -60,88 +60,82 @@
#undef mem_get_be16
#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
-static unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[0] << 8;
- val |= mem[1];
- return val;
+static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[0] << 8;
+ val |= mem[1];
+ return val;
}
#undef mem_get_be24
#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
-static unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[0] << 16;
- val |= mem[1] << 8;
- val |= mem[2];
- return val;
+static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[0] << 16;
+ val |= mem[1] << 8;
+ val |= mem[2];
+ return val;
}
#undef mem_get_be32
#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
-static unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[0] << 24;
- val |= mem[1] << 16;
- val |= mem[2] << 8;
- val |= mem[3];
- return val;
+static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[0] << 24;
+ val |= mem[1] << 16;
+ val |= mem[2] << 8;
+ val |= mem[3];
+ return val;
}
#undef mem_get_le16
#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
-static unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[1] << 8;
- val |= mem[0];
- return val;
+static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#undef mem_get_le24
#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
-static unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#undef mem_get_le32
#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
-static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
-{
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[3] << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
+static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
+ unsigned MEM_VALUE_T val;
+ const MAU_T *mem = (const MAU_T *)vmem;
+
+ val = mem[3] << 24;
+ val |= mem[2] << 16;
+ val |= mem[1] << 8;
+ val |= mem[0];
+ return val;
}
#define mem_get_s_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
- const MAU_T *mem = (const MAU_T*)vmem;\
- signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
- return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
+ const MAU_T *mem = (const MAU_T*)vmem;\
+ signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
+ return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
+ }
#undef mem_get_sbe16
#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
@@ -169,66 +163,60 @@ mem_get_s_generic(le, 32)
#undef mem_put_be16
#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-static void mem_put_be16(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_be16(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 8) & 0xff;
- mem[1] = (val >> 0) & 0xff;
+ mem[0] = (val >> 8) & 0xff;
+ mem[1] = (val >> 0) & 0xff;
}
#undef mem_put_be24
#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24)
-static void mem_put_be24(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_be24(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 16) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 0) & 0xff;
+ mem[0] = (val >> 16) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 0) & 0xff;
}
#undef mem_put_be32
#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32)
-static void mem_put_be32(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (val >> 24) & 0xff;
- mem[1] = (val >> 16) & 0xff;
- mem[2] = (val >> 8) & 0xff;
- mem[3] = (val >> 0) & 0xff;
+static void mem_put_be32(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
+
+ mem[0] = (val >> 24) & 0xff;
+ mem[1] = (val >> 16) & 0xff;
+ mem[2] = (val >> 8) & 0xff;
+ mem[3] = (val >> 0) & 0xff;
}
#undef mem_put_le16
#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16)
-static void mem_put_le16(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_le16(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
}
#undef mem_put_le24
#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24)
-static void mem_put_le24(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
+static void mem_put_le24(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 16) & 0xff;
}
#undef mem_put_le32
#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32)
-static void mem_put_le32(void *vmem, MEM_VALUE_T val)
-{
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
- mem[3] = (val >> 24) & 0xff;
+static void mem_put_le32(void *vmem, MEM_VALUE_T val) {
+ MAU_T *mem = (MAU_T *)vmem;
+
+ mem[0] = (val >> 0) & 0xff;
+ mem[1] = (val >> 8) & 0xff;
+ mem[2] = (val >> 16) & 0xff;
+ mem[3] = (val >> 24) & 0xff;
}
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index fca653a52..0100300a7 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -24,61 +24,61 @@
* could redefine these macros.
*/
#define swap_endian_16(val,raw) do {\
- val = ((raw>>8) & 0x00ff) \
- | ((raw<<8) & 0xff00);\
- } while(0)
+ val = ((raw>>8) & 0x00ff) \
+ | ((raw<<8) & 0xff00);\
+ } while(0)
#define swap_endian_32(val,raw) do {\
- val = ((raw>>24) & 0x000000ff) \
- | ((raw>>8) & 0x0000ff00) \
- | ((raw<<8) & 0x00ff0000) \
- | ((raw<<24) & 0xff000000); \
- } while(0)
+ val = ((raw>>24) & 0x000000ff) \
+ | ((raw>>8) & 0x0000ff00) \
+ | ((raw<<8) & 0x00ff0000) \
+ | ((raw<<24) & 0xff000000); \
+ } while(0)
#define swap_endian_16_se(val,raw) do {\
- swap_endian_16(val,raw);\
- val = ((val << 16) >> 16);\
- } while(0)
+ swap_endian_16(val,raw);\
+ val = ((val << 16) >> 16);\
+ } while(0)
#define swap_endian_32_se(val,raw) swap_endian_32(val,raw)
#define mem_get_ne_aligned_generic(end,sz) \
- static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
- return *mem;\
- }
+ static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
+ return *mem;\
+ }
#define mem_get_sne_aligned_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
- const int##sz##_t *mem = (const int##sz##_t *)vmem;\
- return *mem;\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+ const int##sz##_t *mem = (const int##sz##_t *)vmem;\
+ return *mem;\
+ }
#define mem_get_se_aligned_generic(end,sz) \
- static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
- unsigned MEM_VALUE_T val, raw = *mem;\
- swap_endian_##sz(val,raw);\
- return val;\
- }
+ static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
+ unsigned MEM_VALUE_T val, raw = *mem;\
+ swap_endian_##sz(val,raw);\
+ return val;\
+ }
#define mem_get_sse_aligned_generic(end,sz) \
- static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
- const int##sz##_t *mem = (const int##sz##_t *)vmem;\
- unsigned MEM_VALUE_T val, raw = *mem;\
- swap_endian_##sz##_se(val,raw);\
- return val;\
- }
+ static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+ const int##sz##_t *mem = (const int##sz##_t *)vmem;\
+ unsigned MEM_VALUE_T val, raw = *mem;\
+ swap_endian_##sz##_se(val,raw);\
+ return val;\
+ }
#define mem_put_ne_aligned_generic(end,sz) \
- static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
- uint##sz##_t *mem = (uint##sz##_t *)vmem;\
- *mem = (uint##sz##_t)val;\
- }
+ static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+ uint##sz##_t *mem = (uint##sz##_t *)vmem;\
+ *mem = (uint##sz##_t)val;\
+ }
#define mem_put_se_aligned_generic(end,sz) \
- static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
- uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
- swap_endian_##sz(raw,val);\
- *mem = (uint##sz##_t)raw;\
- }
+ static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+ uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
+ swap_endian_##sz(raw,val);\
+ *mem = (uint##sz##_t)raw;\
+ }
#include "vpx_config.h"
#if CONFIG_BIG_ENDIAN
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
new file mode 100644
index 000000000..16a735ccd
--- /dev/null
+++ b/vpx_ports/vpx_once.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+
+#if CONFIG_MULTITHREAD && defined(_WIN32)
+#include <windows.h>
+#include <stdlib.h>
+static void once(void (*func)(void))
+{
+ static CRITICAL_SECTION *lock;
+ static LONG waiters;
+ static int done;
+ void *lock_ptr = &lock;
+
+ /* If the initialization is complete, return early. This isn't just an
+ * optimization, it prevents races on the destruction of the global
+ * lock.
+ */
+ if(done)
+ return;
+
+ InterlockedIncrement(&waiters);
+
+ /* Get a lock. We create one and try to make it the one-true-lock,
+ * throwing it away if we lost the race.
+ */
+
+ {
+ /* Scope to protect access to new_lock */
+ CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(new_lock);
+ if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
+ {
+ DeleteCriticalSection(new_lock);
+ free(new_lock);
+ }
+ }
+
+ /* At this point, we have a lock that can be synchronized on. We don't
+ * care which thread actually performed the allocation.
+ */
+
+ EnterCriticalSection(lock);
+
+ if (!done)
+ {
+ func();
+ done = 1;
+ }
+
+ LeaveCriticalSection(lock);
+
+ /* Last one out should free resources. The destructed objects are
+ * protected by checking if(done) above.
+ */
+ if(!InterlockedDecrement(&waiters))
+ {
+ DeleteCriticalSection(lock);
+ free(lock);
+ lock = NULL;
+ }
+}
+
+
+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
+#include <pthread.h>
+static void once(void (*func)(void))
+{
+ static pthread_once_t lock = PTHREAD_ONCE_INIT;
+ pthread_once(&lock, func);
+}
+
+
+#else
+/* No-op version that performs no synchronization. vp8_rtcd() is idempotent,
+ * so as long as your platform provides atomic loads/stores of pointers
+ * no synchronization is strictly necessary.
+ */
+
+static void once(void (*func)(void))
+{
+ static int done;
+
+ if(!done)
+ {
+ func();
+ done = 1;
+ }
+}
+#endif
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index d07e08610..cdad9ef1d 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -32,65 +32,61 @@
/* timersub is not provided by msys at this time. */
#ifndef timersub
#define timersub(a, b, result) \
- do { \
- (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((result)->tv_usec < 0) { \
- --(result)->tv_sec; \
- (result)->tv_usec += 1000000; \
- } \
- } while (0)
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
#endif
#endif
-struct vpx_usec_timer
-{
+struct vpx_usec_timer {
#if defined(_WIN32)
- LARGE_INTEGER begin, end;
+ LARGE_INTEGER begin, end;
#else
- struct timeval begin, end;
+ struct timeval begin, end;
#endif
};
static void
-vpx_usec_timer_start(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_start(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- QueryPerformanceCounter(&t->begin);
+ QueryPerformanceCounter(&t->begin);
#else
- gettimeofday(&t->begin, NULL);
+ gettimeofday(&t->begin, NULL);
#endif
}
static void
-vpx_usec_timer_mark(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_mark(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- QueryPerformanceCounter(&t->end);
+ QueryPerformanceCounter(&t->end);
#else
- gettimeofday(&t->end, NULL);
+ gettimeofday(&t->end, NULL);
#endif
}
static int64_t
-vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
-{
+vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
#if defined(_WIN32)
- LARGE_INTEGER freq, diff;
+ LARGE_INTEGER freq, diff;
- diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
+ diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
- QueryPerformanceFrequency(&freq);
- return diff.QuadPart * 1000000 / freq.QuadPart;
+ QueryPerformanceFrequency(&freq);
+ return diff.QuadPart * 1000000 / freq.QuadPart;
#else
- struct timeval diff;
+ struct timeval diff;
- timersub(&t->end, &t->begin, &diff);
- return diff.tv_sec * 1000000 + diff.tv_usec;
+ timersub(&t->end, &t->begin, &diff);
+ return diff.tv_sec * 1000000 + diff.tv_usec;
#endif
}
@@ -101,9 +97,8 @@ vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
#define timersub(a, b, result)
#endif
-struct vpx_usec_timer
-{
- void *dummy;
+struct vpx_usec_timer {
+ void *dummy;
};
static void
@@ -113,7 +108,9 @@ static void
vpx_usec_timer_mark(struct vpx_usec_timer *t) { }
static long
-vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; }
+vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
+ return 0;
+}
#endif /* CONFIG_OS_SUPPORT */
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
deleted file mode 100644
index f2fb08954..000000000
--- a/vpx_ports/vpxtypes.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __VPXTYPES_H__
-#define __VPXTYPES_H__
-
-#include "vpx_config.h"
-
-//#include <sys/types.h>
-#ifdef _MSC_VER
-# include <basetsd.h>
-typedef SSIZE_T ssize_t;
-#endif
-
-#if defined(HAVE_STDINT_H) && HAVE_STDINT_H
-/* C99 types are preferred to vpx integer types */
-# include <stdint.h>
-#endif
-
-/*!\defgroup basetypes Base Types
- @{*/
-#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED)
-# ifdef STRICTTYPES
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef signed int int32_t;
-# else
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-# endif
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#endif
-
-typedef int8_t vpxs8;
-typedef uint8_t vpxu8;
-typedef int16_t vpxs16;
-typedef uint16_t vpxu16;
-typedef int32_t vpxs32;
-typedef uint32_t vpxu32;
-typedef int32_t vpxbool;
-
-enum {vpxfalse, vpxtrue};
-
-/*!\def OTC
- \brief a macro suitable for declaring a constant #vpxtc*/
-/*!\def VPXTC
- \brief printf format string suitable for printing an #vpxtc*/
-#ifdef UNICODE
-# ifdef NO_WCHAR
-# error "no non-wchar support added yet"
-# else
-# include <wchar.h>
-typedef wchar_t vpxtc;
-# define OTC(str) L ## str
-# define VPXTC "ls"
-# endif /*NO_WCHAR*/
-#else
-typedef char vpxtc;
-# define OTC(str) (vpxtc*)str
-# define VPXTC "s"
-#endif /*UNICODE*/
-/*@} end - base types*/
-
-/*!\addtogroup basetypes
- @{*/
-/*!\def VPX64
- \brief printf format string suitable for printing an #vpxs64*/
-#if defined(HAVE_STDINT_H)
-# define VPX64 PRId64
-typedef int64_t vpxs64;
-#elif defined(HASLONGLONG)
-# undef PRId64
-# define PRId64 "lld"
-# define VPX64 PRId64
-typedef long long vpxs64;
-#elif defined(WIN32) || defined(_WIN32_WCE)
-# undef PRId64
-# define PRId64 "I64d"
-# define VPX64 PRId64
-typedef __int64 vpxs64;
-typedef unsigned __int64 vpxu64;
-#elif defined(__uClinux__) && defined(CHIP_DM642)
-# include <lddk.h>
-# undef PRId64
-# define PRId64 "lld"
-# define VPX64 PRId64
-typedef long vpxs64;
-#else
-# error "64 bit integer type undefined for this platform!"
-#endif
-#if !defined(HAVE_STDINT_H) && !defined(INT_T_DEFINED)
-typedef vpxs64 int64_t;
-typedef vpxu64 uint64_t;
-#endif
-/*!@} end - base types*/
-
-/*!\ingroup basetypes
- \brief Common return type*/
-typedef enum
-{
- VPX_NOT_FOUND = -404,
- VPX_BUFFER_EMPTY = -202,
- VPX_BUFFER_FULL = -201,
-
- VPX_CONNREFUSED = -102,
- VPX_TIMEDOUT = -101,
- VPX_WOULDBLOCK = -100,
-
- VPX_NET_ERROR = -9,
- VPX_INVALID_VERSION = -8,
- VPX_INPROGRESS = -7,
- VPX_NOT_SUPP = -6,
- VPX_NO_MEM = -3,
- VPX_INVALID_PARAMS = -2,
- VPX_ERROR = -1,
- VPX_OK = 0,
- VPX_DONE = 1
-} vpxsc;
-
-#if defined(WIN32) || defined(_WIN32_WCE)
-# define DLLIMPORT __declspec(dllimport)
-# define DLLEXPORT __declspec(dllexport)
-# define DLLLOCAL
-#elif defined(LINUX)
-# define DLLIMPORT
-/*visibility attribute support is available in 3.4 and later.
- see: http://gcc.gnu.org/wiki/Visibility for more info*/
-# if defined(__GNUC__) && ((__GNUC__<<16|(__GNUC_MINOR__&0xff)) >= (3<<16|4))
-# define GCC_HASCLASSVISIBILITY
-# endif /*defined(__GNUC__) && __GNUC_PREREQ(3,4)*/
-# ifdef GCC_HASCLASSVISIBILITY
-# define DLLEXPORT __attribute__ ((visibility("default")))
-# define DLLLOCAL __attribute__ ((visibility("hidden")))
-# else
-# define DLLEXPORT
-# define DLLLOCAL
-# endif /*GCC_HASCLASSVISIBILITY*/
-#endif /*platform ifdefs*/
-
-#endif /*__VPXTYPES_H__*/
-
-#undef VPXAPI
-/*!\def VPXAPI
- \brief library calling convention/storage class attributes.
-
- Specifies whether the function is imported through a dll
- or is from a static library.*/
-#ifdef VPXDLL
-# ifdef VPXDLLEXPORT
-# define VPXAPI DLLEXPORT
-# else
-# define VPXAPI DLLIMPORT
-# endif /*VPXDLLEXPORT*/
-#else
-# define VPXAPI
-#endif /*VPXDLL*/
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 9dd8c4b59..299058347 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -14,133 +14,137 @@
#include <stdlib.h>
#include "vpx_config.h"
-typedef enum
-{
- VPX_CPU_UNKNOWN = -1,
- VPX_CPU_AMD,
- VPX_CPU_AMD_OLD,
- VPX_CPU_CENTAUR,
- VPX_CPU_CYRIX,
- VPX_CPU_INTEL,
- VPX_CPU_NEXGEN,
- VPX_CPU_NSC,
- VPX_CPU_RISE,
- VPX_CPU_SIS,
- VPX_CPU_TRANSMETA,
- VPX_CPU_TRANSMETA_OLD,
- VPX_CPU_UMC,
- VPX_CPU_VIA,
-
- VPX_CPU_LAST
+typedef enum {
+ VPX_CPU_UNKNOWN = -1,
+ VPX_CPU_AMD,
+ VPX_CPU_AMD_OLD,
+ VPX_CPU_CENTAUR,
+ VPX_CPU_CYRIX,
+ VPX_CPU_INTEL,
+ VPX_CPU_NEXGEN,
+ VPX_CPU_NSC,
+ VPX_CPU_RISE,
+ VPX_CPU_SIS,
+ VPX_CPU_TRANSMETA,
+ VPX_CPU_TRANSMETA_OLD,
+ VPX_CPU_UMC,
+ VPX_CPU_VIA,
+
+ VPX_CPU_LAST
} vpx_cpu_t;
-#if defined(__GNUC__) && __GNUC__
+#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
#if ARCH_X86_64
#define cpuid(func,ax,bx,cx,dx)\
- __asm__ __volatile__ (\
- "cpuid \n\t" \
- : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ __asm__ __volatile__ (\
+ "cpuid \n\t" \
+ : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#else
#define cpuid(func,ax,bx,cx,dx)\
- __asm__ __volatile__ (\
- "mov %%ebx, %%edi \n\t" \
- "cpuid \n\t" \
- "xchg %%edi, %%ebx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ __asm__ __volatile__ (\
+ "mov %%ebx, %%edi \n\t" \
+ "cpuid \n\t" \
+ "xchg %%edi, %%ebx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#endif
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
#if ARCH_X86_64
#define cpuid(func,ax,bx,cx,dx)\
- asm volatile (\
- "xchg %rsi, %rbx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "xchg %rsi, %rbx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ asm volatile (\
+ "xchg %rsi, %rbx \n\t" \
+ "cpuid \n\t" \
+ "movl %ebx, %edi \n\t" \
+ "xchg %rsi, %rbx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#else
#define cpuid(func,ax,bx,cx,dx)\
- asm volatile (\
- "pushl %ebx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "popl %ebx \n\t" \
- : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
- : "a" (func));
+ asm volatile (\
+ "pushl %ebx \n\t" \
+ "cpuid \n\t" \
+ "movl %ebx, %edi \n\t" \
+ "popl %ebx \n\t" \
+ : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+ : "a" (func));
#endif
-#else
+#else /* end __SUNPRO__ */
#if ARCH_X86_64
void __cpuid(int CPUInfo[4], int info_type);
#pragma intrinsic(__cpuid)
#define cpuid(func,a,b,c,d) do{\
- int regs[4];\
- __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\
- } while(0)
+ int regs[4];\
+ __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\
+ } while(0)
#else
#define cpuid(func,a,b,c,d)\
- __asm mov eax, func\
- __asm cpuid\
- __asm mov a, eax\
- __asm mov b, ebx\
- __asm mov c, ecx\
- __asm mov d, edx
+ __asm mov eax, func\
+ __asm cpuid\
+ __asm mov a, eax\
+ __asm mov b, ebx\
+ __asm mov c, ecx\
+ __asm mov d, edx
#endif
-#endif
-
-#define HAS_MMX 0x01
-#define HAS_SSE 0x02
-#define HAS_SSE2 0x04
-#define HAS_SSE3 0x08
-#define HAS_SSSE3 0x10
-#define HAS_SSE4_1 0x20
+#endif /* end others */
+
+#define HAS_MMX 0x01
+#define HAS_SSE 0x02
+#define HAS_SSE2 0x04
+#define HAS_SSE3 0x08
+#define HAS_SSSE3 0x10
+#define HAS_SSE4_1 0x20
+#define HAS_AVX 0x40
+#define HAS_AVX2 0x80
#ifndef BIT
#define BIT(n) (1<<n)
#endif
static int
-x86_simd_caps(void)
-{
- unsigned int flags = 0;
- unsigned int mask = ~0;
- unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
- char *env;
- (void)reg_ebx;
+x86_simd_caps(void) {
+ unsigned int flags = 0;
+ unsigned int mask = ~0;
+ unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
+ char *env;
+ (void)reg_ebx;
- /* See if the CPU capabilities are being overridden by the environment */
- env = getenv("VPX_SIMD_CAPS");
+ /* See if the CPU capabilities are being overridden by the environment */
+ env = getenv("VPX_SIMD_CAPS");
- if (env && *env)
- return (int)strtol(env, NULL, 0);
+ if (env && *env)
+ return (int)strtol(env, NULL, 0);
- env = getenv("VPX_SIMD_CAPS_MASK");
+ env = getenv("VPX_SIMD_CAPS_MASK");
- if (env && *env)
- mask = strtol(env, NULL, 0);
+ if (env && *env)
+ mask = strtol(env, NULL, 0);
- /* Ensure that the CPUID instruction supports extended features */
- cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ /* Ensure that the CPUID instruction supports extended features */
+ cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx);
- if (reg_eax < 1)
- return 0;
+ if (reg_eax < 1)
+ return 0;
- /* Get the standard feature flags */
- cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ /* Get the standard feature flags */
+ cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx);
- if (reg_edx & BIT(23)) flags |= HAS_MMX;
+ if (reg_edx & BIT(23)) flags |= HAS_MMX;
- if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
+ if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
- if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
+ if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
- if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
+ if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
- if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
+ if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
- if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
+ if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
- return flags & mask;
+ if (reg_ecx & BIT(28)) flags |= HAS_AVX;
+
+ if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+
+ return flags & mask;
}
vpx_cpu_t vpx_x86_vendor(void);
@@ -150,21 +154,20 @@ unsigned __int64 __rdtsc(void);
#pragma intrinsic(__rdtsc)
#endif
static unsigned int
-x86_readtsc(void)
-{
+x86_readtsc(void) {
#if defined(__GNUC__) && __GNUC__
- unsigned int tsc;
- __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
- return tsc;
+ unsigned int tsc;
+ __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
+ return tsc;
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- unsigned int tsc;
- asm volatile("rdtsc\n\t":"=a"(tsc):);
- return tsc;
+ unsigned int tsc;
+ asm volatile("rdtsc\n\t":"=a"(tsc):);
+ return tsc;
#else
#if ARCH_X86_64
- return (unsigned int)__rdtsc();
+ return (unsigned int)__rdtsc();
#else
- __asm rdtsc;
+ __asm rdtsc;
#endif
#endif
}
@@ -172,45 +175,41 @@ x86_readtsc(void)
#if defined(__GNUC__) && __GNUC__
#define x86_pause_hint()\
- __asm__ __volatile__ ("pause \n\t")
+ __asm__ __volatile__ ("pause \n\t")
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define x86_pause_hint()\
- asm volatile ("pause \n\t")
+ asm volatile ("pause \n\t")
#else
#if ARCH_X86_64
#define x86_pause_hint()\
- _mm_pause();
+ _mm_pause();
#else
#define x86_pause_hint()\
- __asm pause
+ __asm pause
#endif
#endif
#if defined(__GNUC__) && __GNUC__
static void
-x87_set_control_word(unsigned short mode)
-{
- __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
+x87_set_control_word(unsigned short mode) {
+ __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
+x87_get_control_word(void) {
+ unsigned short mode;
+ __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
return mode;
}
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
static void
-x87_set_control_word(unsigned short mode)
-{
- asm volatile("fldcw %0" : : "m"(*&mode));
+x87_set_control_word(unsigned short mode) {
+ asm volatile("fldcw %0" : : "m"(*&mode));
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- asm volatile("fstcw %0\n\t":"=m"(*&mode):);
- return mode;
+x87_get_control_word(void) {
+ unsigned short mode;
+ asm volatile("fstcw %0\n\t":"=m"(*&mode):);
+ return mode;
}
#elif ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
@@ -220,25 +219,22 @@ extern unsigned short vpx_winx64_fstcw(void);
#define x87_get_control_word vpx_winx64_fstcw
#else
static void
-x87_set_control_word(unsigned short mode)
-{
- __asm { fldcw mode }
+x87_set_control_word(unsigned short mode) {
+ __asm { fldcw mode }
}
static unsigned short
-x87_get_control_word(void)
-{
- unsigned short mode;
- __asm { fstcw mode }
- return mode;
+x87_get_control_word(void) {
+ unsigned short mode;
+ __asm { fstcw mode }
+ return mode;
}
#endif
static unsigned short
-x87_set_double_precision(void)
-{
- unsigned short mode = x87_get_control_word();
- x87_set_control_word((mode&~0x300) | 0x200);
- return mode;
+x87_set_double_precision(void) {
+ unsigned short mode = x87_get_control_word();
+ x87_set_control_word((mode&~0x300) | 0x200);
+ return mode;
}
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 0c9fe3774..eccbfa35c 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -78,6 +78,17 @@
%endif
+; LIBVPX_YASM_WIN64
+; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64
+; or win64 is defined on the Yasm command line.
+%ifidn __OUTPUT_FORMAT__,win64
+%define LIBVPX_YASM_WIN64 1
+%elifidn __OUTPUT_FORMAT__,x64
+%define LIBVPX_YASM_WIN64 1
+%else
+%define LIBVPX_YASM_WIN64 0
+%endif
+
; sym()
; Return the proper symbol name for the target ABI.
;
@@ -90,7 +101,7 @@
%define sym(x) x
%elifidn __OUTPUT_FORMAT__,elfx32
%define sym(x) x
-%elifidn __OUTPUT_FORMAT__,x64
+%elif LIBVPX_YASM_WIN64
%define sym(x) x
%else
%define sym(x) _ %+ x
@@ -114,7 +125,7 @@
%define PRIVATE :hidden
%elifidn __OUTPUT_FORMAT__,elfx32
%define PRIVATE :hidden
- %elifidn __OUTPUT_FORMAT__,x64
+ %elif LIBVPX_YASM_WIN64
%define PRIVATE
%else
%define PRIVATE :private_extern
@@ -131,7 +142,7 @@
%else
; 64 bit ABI passes arguments in registers. This is a workaround to get up
; and running quickly. Relies on SHADOW_ARGS_TO_STACK
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
%define arg(x) [rbp+16+8*x]
%else
%define arg(x) [rbp-8-8*x]
@@ -230,6 +241,12 @@
%elifidn __OUTPUT_FORMAT__,elfx32
%define WRT_PLT wrt ..plt
%define HIDDEN_DATA(x) x:data hidden
+ %elifidn __OUTPUT_FORMAT__,macho64
+ %ifdef CHROMIUM
+ %define HIDDEN_DATA(x) x:private_extern
+ %else
+ %define HIDDEN_DATA(x) x
+ %endif
%else
%define HIDDEN_DATA(x) x
%endif
@@ -251,7 +268,7 @@
%endm
%define UNSHADOW_ARGS
%else
-%ifidn __OUTPUT_FORMAT__,x64
+%if LIBVPX_YASM_WIN64
%macro SHADOW_ARGS_TO_STACK 1 ; argc
%if %1 > 0
mov arg(0),rcx
@@ -307,7 +324,7 @@
; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
; but in some cases this is not done and unaligned movs must be used.
-%ifidn __OUTPUT_FORMAT__,x64
+%if LIBVPX_YASM_WIN64
%macro SAVE_XMM 1-2 a
%if %1 < 6
%error Only xmm registers 6-15 must be preserved
diff --git a/vpx_ports/x86_cpuid.c b/vpx_ports/x86_cpuid.c
index ce6403374..fe86cfc7b 100644
--- a/vpx_ports/x86_cpuid.c
+++ b/vpx_ports/x86_cpuid.c
@@ -11,43 +11,39 @@
#include <string.h>
#include "x86.h"
-struct cpuid_vendors
-{
- char vendor_string[12];
- vpx_cpu_t vendor_id;
+struct cpuid_vendors {
+ char vendor_string[12];
+ vpx_cpu_t vendor_id;
};
-static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] =
-{
- { "AuthenticAMD", VPX_CPU_AMD },
- { "AMDisbetter!", VPX_CPU_AMD_OLD },
- { "CentaurHauls", VPX_CPU_CENTAUR },
- { "CyrixInstead", VPX_CPU_CYRIX },
- { "GenuineIntel", VPX_CPU_INTEL },
- { "NexGenDriven", VPX_CPU_NEXGEN },
- { "Geode by NSC", VPX_CPU_NSC },
- { "RiseRiseRise", VPX_CPU_RISE },
- { "SiS SiS SiS ", VPX_CPU_SIS },
- { "GenuineTMx86", VPX_CPU_TRANSMETA },
- { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
- { "UMC UMC UMC ", VPX_CPU_UMC },
- { "VIA VIA VIA ", VPX_CPU_VIA },
+static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] = {
+ { "AuthenticAMD", VPX_CPU_AMD },
+ { "AMDisbetter!", VPX_CPU_AMD_OLD },
+ { "CentaurHauls", VPX_CPU_CENTAUR },
+ { "CyrixInstead", VPX_CPU_CYRIX },
+ { "GenuineIntel", VPX_CPU_INTEL },
+ { "NexGenDriven", VPX_CPU_NEXGEN },
+ { "Geode by NSC", VPX_CPU_NSC },
+ { "RiseRiseRise", VPX_CPU_RISE },
+ { "SiS SiS SiS ", VPX_CPU_SIS },
+ { "GenuineTMx86", VPX_CPU_TRANSMETA },
+ { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
+ { "UMC UMC UMC ", VPX_CPU_UMC },
+ { "VIA VIA VIA ", VPX_CPU_VIA },
};
-vpx_cpu_t vpx_x86_vendor(void)
-{
- unsigned int reg_eax;
- unsigned int vs[3];
- int i;
+vpx_cpu_t vpx_x86_vendor(void) {
+ unsigned int reg_eax;
+ unsigned int vs[3];
+ int i;
- /* Get the Vendor String from the CPU */
- cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
+ /* Get the Vendor String from the CPU */
+ cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
- for (i = 0; i < VPX_CPU_LAST; i++)
- {
- if (strncmp ((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
- return (cpuid_vendor_list[i].vendor_id);
- }
+ for (i = 0; i < VPX_CPU_LAST; i++) {
+ if (strncmp((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
+ return (cpuid_vendor_list[i].vendor_id);
+ }
- return VPX_CPU_UNKNOWN;
+ return VPX_CPU_UNKNOWN;
}