summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gc/env/gcenv.base.h52
-rw-r--r--src/gc/gc.cpp119
-rw-r--r--src/gc/gcrecord.h6
-rw-r--r--src/pal/inc/pal.h49
-rw-r--r--src/pal/src/include/pal/palinternal.h1
5 files changed, 161 insertions, 66 deletions
diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h
index da66e9b8a3..187d9ca723 100644
--- a/src/gc/env/gcenv.base.h
+++ b/src/gc/env/gcenv.base.h
@@ -220,8 +220,10 @@ typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter);
#ifdef _MSC_VER
#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
#if _WIN64
#pragma intrinsic(_BitScanForward64)
+ #pragma intrinsic(_BitScanReverse64)
#endif
#endif // _MSC_VER
@@ -281,6 +283,56 @@ inline uint8_t BitScanForward64(uint32_t *bitIndex, uint64_t mask)
#endif // _MSC_VER
}
+// Cross-platform wrapper for the _BitScanReverse compiler intrinsic.
+inline uint8_t BitScanReverse(uint32_t *bitIndex, uint32_t mask)
+{
+#ifdef _MSC_VER
+ return _BitScanReverse((unsigned long*)bitIndex, mask);
+#else // _MSC_VER
+ // The result of __builtin_clzl is undefined when mask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzl(mask);
+ *bitIndex = static_cast<uint32_t>(31 - lzcount);
+ return mask != 0 ? TRUE : FALSE;
+#endif // _MSC_VER
+}
+
+// Cross-platform wrapper for the _BitScanReverse64 compiler intrinsic.
+inline uint8_t BitScanReverse64(uint32_t *bitIndex, uint64_t mask)
+{
+#ifdef _MSC_VER
+ #if _WIN64
+ return _BitScanReverse64((unsigned long*)bitIndex, mask);
+ #else
+ // MSVC targeting a 32-bit target does not support this intrinsic.
+ // We can fake it checking whether the upper 32 bits are zeros (or not)
+ // then calling _BitScanReverse() on either the upper or lower 32 bits.
+ uint32_t upper = static_cast<uint32_t>(mask >> 32);
+
+ if (upper != 0)
+ {
+ uint8_t result = _BitScanReverse((unsigned long*)bitIndex, upper);
+ *bitIndex += 32;
+ return result;
+ }
+
+ return _BitScanReverse((unsigned long*)bitIndex, static_cast<uint32_t>(mask));
+ #endif // _WIN64
+#else
+ // The result of __builtin_clzll is undefined when mask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzll(mask);
+ *bitIndex = static_cast<uint32_t>(63 - lzcount);
+ return mask != 0 ? TRUE : FALSE;
+#endif // _MSC_VER
+}
+
// Aligns a size_t to the specified alignment. Alignment must be a power
// of two.
inline size_t ALIGN_UP(size_t val, size_t alignment)
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index c5c178b943..a8a47c7788 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -308,78 +308,69 @@ void GCStatistics::DisplayAndUpdate()
#endif // GC_STATS
-#ifdef BIT64
-#define TOTAL_TIMES_TO_SHIFT 6
-#else
-#define TOTAL_TIMES_TO_SHIFT 5
-#endif // BIT64
-
+inline
size_t round_up_power2 (size_t size)
{
- unsigned short shift = 1;
- size_t shifted = 0;
-
- size--;
- for (unsigned short i = 0; i < TOTAL_TIMES_TO_SHIFT; i++)
- {
- shifted = size | (size >> shift);
- if (shifted == size)
- {
- break;
- }
-
- size = shifted;
- shift <<= 1;
- }
- shifted++;
+ // Get the 0-based index of the most-significant bit in size-1.
+ // If the call failed (because size-1 is zero), size must be 1,
+ // so return 1 (because 1 rounds up to itself).
+ DWORD highest_set_bit_index;
+ if (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, size - 1)) { return 1; }
- return shifted;
+ // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
+ // is handled below by relying on the fact that highest_set_bit_index is the maximum value
+ // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
+ // number of bits shifts in zeros from the right, resulting in an output of zero.
+ return static_cast<size_t>(2) << highest_set_bit_index;
}
inline
size_t round_down_power2 (size_t size)
{
- size_t power2 = round_up_power2 (size);
-
- if (power2 != size)
- {
- power2 >>= 1;
- }
+ // Get the 0-based index of the most-significant bit in size.
+ // If the call failed, size must be zero so return zero.
+ DWORD highest_set_bit_index;
+ if (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, size)) { return 0; }
- return power2;
+ // Left-shift 1 by highest_set_bit_index to get back a value containing only
+ // the most-significant set bit of size, i.e. size rounded down
+ // to the next power-of-two value.
+ return static_cast<size_t>(1) << highest_set_bit_index;
}
-// the index starts from 0.
-int index_of_set_bit (size_t power2)
+// Get the 0-based index of the most-significant bit in the value.
+// Returns -1 if the input value is zero (i.e. has no set bits).
+inline
+int index_of_highest_set_bit (size_t value)
{
- int low = 0;
- int high = sizeof (size_t) * 8 - 1;
- int mid;
- while (low <= high)
- {
- mid = ((low + high)/2);
- size_t temp = (size_t)1 << mid;
- if (power2 & temp)
- {
- return mid;
- }
- else if (power2 < temp)
- {
- high = mid - 1;
- }
- else
- {
- low = mid + 1;
- }
- }
-
- return -1;
+ // Get the 0-based index of the most-significant bit in the value.
+ // If the call failed (because value is zero), return -1.
+ DWORD highest_set_bit_index;
+ return (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
}
inline
int relative_index_power2_plug (size_t power2)
{
- int index = index_of_set_bit (power2);
+ int index = index_of_highest_set_bit (power2);
assert (index <= MAX_INDEX_POWER2);
return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2));
@@ -388,7 +379,7 @@ int relative_index_power2_plug (size_t power2)
inline
int relative_index_power2_free_space (size_t power2)
{
- int index = index_of_set_bit (power2);
+ int index = index_of_highest_set_bit (power2);
assert (index <= MAX_INDEX_POWER2);
return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2));
@@ -8688,7 +8679,7 @@ public:
}
}
- int bucket_power2 = index_of_set_bit (round_down_power2 (size));
+ int bucket_power2 = index_of_highest_set_bit (size);
if (bucket_power2 < base_power2)
{
return;
@@ -8791,7 +8782,7 @@ public:
plug_size_to_fit += (pad_in_front ? Align(min_obj_size) : 0);
#endif //SHORT_PLUGS
- int plug_power2 = index_of_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
+ int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
ptrdiff_t i;
uint8_t* new_address = 0;
@@ -8873,9 +8864,9 @@ retry:
(plug_size - pad),
pad,
pinned_plug (m),
- index_of_set_bit (round_down_power2 (free_space_size)),
+ index_of_highest_set_bit (free_space_size),
(pinned_plug (m) - pinned_len (m)),
- index_of_set_bit (round_down_power2 (new_free_space_size))));
+ index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF
#ifdef SHORT_PLUGS
@@ -8919,9 +8910,9 @@ retry:
old_loc,
new_address,
(plug_size - pad),
- index_of_set_bit (round_down_power2 (free_space_size)),
+ index_of_highest_set_bit (free_space_size),
heap_segment_plan_allocated (seg),
- index_of_set_bit (round_down_power2 (new_free_space_size))));
+ index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF
if (realign_padding_p)
@@ -8953,7 +8944,7 @@ retry:
(!chosen_power2) && (i < free_space_count));
}
- int new_bucket_power2 = index_of_set_bit (round_down_power2 (new_free_space_size));
+ int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);
if (new_bucket_power2 < base_power2)
{
@@ -33465,7 +33456,7 @@ HRESULT GCHeap::Initialize ()
gc_heap::min_loh_segment_size = large_seg_size;
gc_heap::min_segment_size = min (seg_size, large_seg_size);
#ifdef SEG_MAPPING_TABLE
- gc_heap::min_segment_size_shr = index_of_set_bit (gc_heap::min_segment_size);
+ gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_heap::min_segment_size);
#endif //SEG_MAPPING_TABLE
#ifdef MULTIPLE_HEAPS
diff --git a/src/gc/gcrecord.h b/src/gc/gcrecord.h
index 9c3282b2bd..30966953e2 100644
--- a/src/gc/gcrecord.h
+++ b/src/gc/gcrecord.h
@@ -311,7 +311,9 @@ static gc_mechanism_descr gc_mechanisms_descr[max_mechanism_per_heap] =
};
#endif //DT_LOG
-int index_of_set_bit (size_t power2);
+// Get the 0-based index of the most-significant bit in the value.
+// Returns -1 if the input value is zero (i.e. has no set bits).
+int index_of_highest_set_bit (size_t value);
#define mechanism_mask (1 << (sizeof (uint32_t) * 8 - 1))
// interesting per heap data we want to record for each GC.
@@ -372,7 +374,7 @@ public:
if (mechanism & mechanism_mask)
{
- int index = index_of_set_bit ((size_t)(mechanism & (~mechanism_mask)));
+ int index = index_of_highest_set_bit ((size_t)(mechanism & (~mechanism_mask)));
assert (index != -1);
return index;
}
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h
index 2a51d584ad..c4a1d64a29 100644
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -3302,6 +3302,55 @@ BitScanForward64(
return qwMask != 0 ? TRUE : FALSE;
}
+// Define BitScanReverse64 and BitScanReverse
+// Per MSDN, BitScanReverse64 will search the mask data from MSB to LSB for a set bit.
+// If one is found, its bit position is stored in the out PDWORD argument and 1 is returned.
+// Otherwise, an undefined value is stored in the out PDWORD argument and 0 is returned.
+//
+// GCC/clang don't have a directly equivalent intrinsic; they do provide the __builtin_clzll
+// intrinsic, which returns the number of leading 0-bits in x starting at the most significant
+// bit position (the result is undefined when x = 0).
+//
+// The same is true for BitScanReverse, except that the GCC function is __builtin_clzl.
+
+EXTERN_C
+PALIMPORT
+inline
+unsigned char
+PALAPI
+BitScanReverse(
+ IN OUT PDWORD Index,
+ IN UINT qwMask)
+{
+ // The result of __builtin_clzl is undefined when qwMask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzl(qwMask);
+ *Index = (DWORD)(31 - lzcount);
+ return qwMask != 0;
+}
+
+EXTERN_C
+PALIMPORT
+inline
+unsigned char
+PALAPI
+BitScanReverse64(
+ IN OUT PDWORD Index,
+ IN UINT64 qwMask)
+{
+ // The result of __builtin_clzll is undefined when qwMask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzll(qwMask);
+ *Index = (DWORD)(63 - lzcount);
+ return qwMask != 0;
+}
+
FORCEINLINE void PAL_ArmInterlockedOperationBarrier()
{
#ifdef _ARM64_
diff --git a/src/pal/src/include/pal/palinternal.h b/src/pal/src/include/pal/palinternal.h
index a66ef7e18e..67236aaa49 100644
--- a/src/pal/src/include/pal/palinternal.h
+++ b/src/pal/src/include/pal/palinternal.h
@@ -342,6 +342,7 @@ function_name() to call the system's implementation
#if !defined(_MSC_VER) && defined(_WIN64)
#undef _BitScanForward64
+#undef _BitScanReverse64
#endif
/* pal.h defines alloca(3) as a compiler builtin.