summaryrefslogtreecommitdiff
path: root/src/util/bitscan.h
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2017-11-13 13:11:09 -0800
committerIan Romanick <ian.d.romanick@intel.com>2018-03-29 14:09:29 -0700
commitef7a4c90155cb180a4f0d289594ef11da9a2b21a (patch)
tree01d8ad661bd8e380bffe568c6cc8db472f76404c /src/util/bitscan.h
parentcd18aa1e50e4668c8d992816bcdb74376dd1064c (diff)
downloadmesa-ef7a4c90155cb180a4f0d289594ef11da9a2b21a.tar.gz
mesa-ef7a4c90155cb180a4f0d289594ef11da9a2b21a.tar.bz2
mesa-ef7a4c90155cb180a4f0d289594ef11da9a2b21a.zip
util: Optimize util_is_power_of_two_nonzero
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Diffstat (limited to 'src/util/bitscan.h')
-rw-r--r--src/util/bitscan.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index a3f2d414bd6..5cc75f0beba 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -38,6 +38,10 @@
#include <intrin.h>
#endif
+#if defined(__POPCNT__)
+#include <popcntintrin.h>
+#endif
+
#include "c99_compat.h"
#ifdef __cplusplus
@@ -127,7 +131,20 @@ util_is_power_of_two_or_zero(unsigned v)
static inline bool
util_is_power_of_two_nonzero(unsigned v)
{
+ /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT. The latter
+ * indicates the existence of the __builtin_popcount function. The former
+ * indicates that _mm_popcnt_u32 exists and is a native instruction.
+ *
+ * The other alternative is to use SSE 4.2 compile-time flags. This has
+ * two drawbacks. First, there is currently no build infrastructure for
+ * SSE 4.2 (only 4.1), so that would have to be added. Second, some AMD
+ * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona).
+ */
+#ifdef __POPCNT__
+ return _mm_popcnt_u32(v) == 1;
+#else
return v != 0 && (v & (v - 1)) == 0;
+#endif
}
/* For looping over a bitmask when you want to loop over consecutive bits