Make UNALIGNED_LOAD16/32 on ARMv7 go through an explicitly unaligned struct,

to avoid the compiler coalescing multiple loads into a single load instruction (which only work for aligned accesses). A typical example where GCC would coalesce: uint8* p = ...; uint32 a = UNALIGNED_LOAD32(p); uint32 b = UNALIGNED_LOAD32(p + 4); uint32 c = a | b;
author: Steinar H. Gunderson <sesse@google.com> 2016-01-04 12:51:31 +0100
committer: Steinar H. Gunderson <sesse@google.com> 2016-01-04 12:51:31 +0100
commit: ef5598aa0e867db5243188751054c9b2223b0e5f (patch)
tree: 1907bdacf4a6eddc463c58b108e2bb4192b3540e
parent: 96a2e340f381ac4dae270f0fe0a8d79fad748c4d (diff)
download: snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.gz
snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.bz2
snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.zip
1 files changed, 36 insertions, 5 deletions
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
index ddca1a8..9b2c11f 100644
--- a/snappy-stubs-internal.h
+++ b/snappy-stubs-internal.h
@@ -116,6 +116,15 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 // sub-architectures.
 //
 // This is a mess, but there's not much we can do about it.
+//
+// To further complicate matters, only LDR instructions (single reads) are
+// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
+// explicitly tell the compiler that these accesses can be unaligned, it can and
+// will combine accesses. On armcc, the way to signal this is done by accessing
+// through the type (uint32 __packed *), but GCC has no such attribute
+// (it ignores __attribute__((packed)) on individual variables). However,
+// we can tell it that a _struct_ is unaligned, which has the same effect,
+// so we do that.
 
 #elif defined(__arm__) && \
       !defined(__ARM_ARCH_4__) && \
@@ -131,11 +140,33 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
       !defined(__ARM_ARCH_6ZK__) && \
       !defined(__ARM_ARCH_6T2__)
 
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
-
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+namespace base {
+namespace internal {
+
+struct Unaligned16Struct {
+  uint16 value;
+  uint8 dummy;  // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+struct Unaligned32Struct {
+  uint32 value;
+  uint8 dummy;  // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+}  // namespace internal
+}  // namespace base
+
+#define UNALIGNED_LOAD16(_p) \
+    ((reinterpret_cast<const ::base::internal::Unaligned16Struct *>(_p))->value)
+#define UNALIGNED_LOAD32(_p) \
+    ((reinterpret_cast<const ::base::internal::Unaligned32Struct *>(_p))->value)
+
+#define UNALIGNED_STORE16(_p, _val) \
+    ((reinterpret_cast<::base::internal::Unaligned16Struct *>(_p))->value = \
+         (_val))
+#define UNALIGNED_STORE32(_p, _val) \
+    ((reinterpret_cast<::base::internal::Unaligned32Struct *>(_p))->value = \
+         (_val))
 
 // TODO(user): NEON supports unaligned 64-bit loads and stores.
 // See if that would be more efficient on platforms supporting it,
author	Steinar H. Gunderson <sesse@google.com>	2016-01-04 12:51:31 +0100
committer	Steinar H. Gunderson <sesse@google.com>	2016-01-04 12:51:31 +0100
commit	ef5598aa0e867db5243188751054c9b2223b0e5f (patch)
tree	1907bdacf4a6eddc463c58b108e2bb4192b3540e
parent	96a2e340f381ac4dae270f0fe0a8d79fad748c4d (diff)
download	snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.gz snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.tar.bz2 snappy-ef5598aa0e867db5243188751054c9b2223b0e5f.zip