summaryrefslogtreecommitdiff
path: root/common_x86.h
diff options
context:
space:
mode:
authorHank Anderson <hank.p.anderson@gmail.com>2015-02-25 11:52:51 -0600
committerHank Anderson <hank.p.anderson@gmail.com>2015-02-25 11:52:51 -0600
commit5ae8993752886033161ef74184f333a2401c8ba9 (patch)
treec8a620dcd8de9b58820d68c86823e4301649d1a1 /common_x86.h
parent84d90d6ed85853eecb3ea17f1f23d3a5d7e8d264 (diff)
downloadopenblas-5ae8993752886033161ef74184f333a2401c8ba9.tar.gz
openblas-5ae8993752886033161ef74184f333a2401c8ba9.tar.bz2
openblas-5ae8993752886033161ef74184f333a2401c8ba9.zip
Added intrinsics for MSVC.
Diffstat (limited to 'common_x86.h')
-rw-r--r--common_x86.h29
1 files changed, 29 insertions, 0 deletions
diff --git a/common_x86.h b/common_x86.h
index f096e9074..0cb242c4e 100644
--- a/common_x86.h
+++ b/common_x86.h
@@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};
+#if defined(_MSC_VER) && !defined(__clang__)
+ // use intrinsic instead of inline assembly
+ ret = _InterlockedExchange(address, 1);
+ // inline assembly
+ /*__asm {
+ mov eax, address
+ mov ebx, 1
+ xchg [eax], ebx
+ mov ret, ebx
+ }*/
+#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
+#endif
} while (ret);
}
static __inline unsigned long long rpcc(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return __rdtsc(); // use MSVC intrinsic
+#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32));
+#endif
};
static __inline unsigned long getstackaddr(void){
+#if defined(_MSC_VER) && !defined(__clang__)
+ return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
+#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
return addr;
+#endif
};
static __inline long double sqrt_long(long double val) {
+#if defined(_MSC_VER) && !defined(__clang__)
+ return sqrt(val); // not sure if this will use fsqrt
+#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
+#endif
}
#define SQRT(a) sqrt_long(a)
@@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
y = blas_quick_divide_table[y];
+#if defined(_MSC_VER) && !defined(__clang__)
+ (void*)result;
+ return x*y;
+#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
+#endif
}
#endif