diff options
author | Steinar H. Gunderson <sesse@google.com> | 2016-01-04 12:51:31 +0100 |
---|---|---|
committer | Steinar H. Gunderson <sesse@google.com> | 2016-01-04 12:51:31 +0100 |
commit | ef5598aa0e867db5243188751054c9b2223b0e5f (patch) | |
tree | 1907bdacf4a6eddc463c58b108e2bb4192b3540e | |
parent | 96a2e340f381ac4dae270f0fe0a8d79fad748c4d (diff) | |
download | snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.gz snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.bz2 snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.zip |
Make UNALIGNED_LOAD16/32 on ARMv7 go through an explicitly unaligned struct,
to avoid the compiler coalescing multiple loads into a single load instruction
(which only work for aligned accesses).
A typical example where GCC would coalesce:
uint8* p = ...;
uint32 a = UNALIGNED_LOAD32(p);
uint32 b = UNALIGNED_LOAD32(p + 4);
uint32 c = a | b;
-rw-r--r-- | snappy-stubs-internal.h | 41 |
1 files changed, 36 insertions, 5 deletions
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h index ddca1a8..9b2c11f 100644 --- a/snappy-stubs-internal.h +++ b/snappy-stubs-internal.h @@ -116,6 +116,15 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL); // sub-architectures. // // This is a mess, but there's not much we can do about it. +// +// To further complicate matters, only LDR instructions (single reads) are +// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we +// explicitly tell the compiler that these accesses can be unaligned, it can and +// will combine accesses. On armcc, the way to signal this is done by accessing +// through the type (uint32 __packed *), but GCC has no such attribute +// (it ignores __attribute__((packed)) on individual variables). However, +// we can tell it that a _struct_ is unaligned, which has the same effect, +// so we do that. #elif defined(__arm__) && \ !defined(__ARM_ARCH_4__) && \ @@ -131,11 +140,33 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL); !defined(__ARM_ARCH_6ZK__) && \ !defined(__ARM_ARCH_6T2__) -#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) -#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) - -#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val)) -#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) +namespace base { +namespace internal { + +struct Unaligned16Struct { + uint16 value; + uint8 dummy; // To make the size non-power-of-two. +} ATTRIBUTE_PACKED; + +struct Unaligned32Struct { + uint32 value; + uint8 dummy; // To make the size non-power-of-two. +} ATTRIBUTE_PACKED; + +} // namespace internal +} // namespace base + +#define UNALIGNED_LOAD16(_p) \ + ((reinterpret_cast<const ::base::internal::Unaligned16Struct *>(_p))->value) +#define UNALIGNED_LOAD32(_p) \ + ((reinterpret_cast<const ::base::internal::Unaligned32Struct *>(_p))->value) + +#define UNALIGNED_STORE16(_p, _val) \ + ((reinterpret_cast<::base::internal::Unaligned16Struct *>(_p))->value = \ + (_val)) +#define UNALIGNED_STORE32(_p, _val) \ + ((reinterpret_cast<::base::internal::Unaligned32Struct *>(_p))->value = \ + (_val)) // TODO(user): NEON supports unaligned 64-bit loads and stores. // See if that would be more efficient on platforms supporting it, |