diff options
author | torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-01-22 16:13:06 +0000 |
---|---|---|
committer | torvald <torvald@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-01-22 16:13:06 +0000 |
commit | a44dd069f34153291b7c13b649fe3da21d9b5df8 (patch) | |
tree | 11c4d3a4315d5b73971745da630b492a4481574a | |
parent | c0b02c428dafb80a7b223b5ef3d2c29532306256 (diff) | |
download | linaro-gcc-a44dd069f34153291b7c13b649fe3da21d9b5df8.tar.gz linaro-gcc-a44dd069f34153291b7c13b649fe3da21d9b5df8.tar.bz2 linaro-gcc-a44dd069f34153291b7c13b649fe3da21d9b5df8.zip |
libitm: Fix HTM fastpath.
* beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline
boundary.
(htm_fastpath): Remove.
(gtm_thread::begin_transaction): Fix HTM fastpath.
(_ITM_commitTransaction): Adapt.
(_ITM_commitTransactionEH): Adapt.
* libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member
and accessors.
* libitm/config/posix/rwlock.h (gtm_rwlock): Likewise.
* libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt.
* libitm/config/x86/sjlj.S (_ITM_beginTransaction): Fix HTM fastpath.
* libitm/libitm_i.h (htm_fastpath): Remove declaration.
* libitm/method-serial.cc (htm_mg): Adapt.
(gtm_thread::serialirr_mode): Adapt.
* libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@232735 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | libitm/ChangeLog | 18 | ||||
-rw-r--r-- | libitm/beginend.cc | 47 | ||||
-rw-r--r-- | libitm/config/linux/rwlock.h | 42 | ||||
-rw-r--r-- | libitm/config/posix/rwlock.cc | 1 | ||||
-rw-r--r-- | libitm/config/posix/rwlock.h | 37 | ||||
-rw-r--r-- | libitm/config/x86/sjlj.S | 14 | ||||
-rw-r--r-- | libitm/libitm_i.h | 5 | ||||
-rw-r--r-- | libitm/method-serial.cc | 6 | ||||
-rw-r--r-- | libitm/query.cc | 4 |
9 files changed, 128 insertions, 46 deletions
diff --git a/libitm/ChangeLog b/libitm/ChangeLog index 6179fc8d741..2a3554fd6af 100644 --- a/libitm/ChangeLog +++ b/libitm/ChangeLog @@ -1,3 +1,21 @@ +2016-01-22 Torvald Riegel <triegel@redhat.com> + + * beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline + boundary. + (htm_fastpath): Remove. + (gtm_thread::begin_transaction): Fix HTM fastpath. + (_ITM_commitTransaction): Adapt. + (_ITM_commitTransactionEH): Adapt. + * libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member + and accessors. + * libitm/config/posix/rwlock.h (gtm_rwlock): Likewise. + * libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt. + * libitm/config/x86/sjlj.S (_ITM_beginTransaction): Fix HTM fastpath. + * libitm/libitm_i.h (htm_fastpath): Remove declaration. + * libitm/method-serial.cc (htm_mg): Adapt. + (gtm_thread::serialirr_mode): Adapt. + * libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt. + 2016-01-21 Torvald Riegel <triegel@redhat.com> * testsuite/libitm.c++/libstdc++-safeexc.C: Not supported on darwin diff --git a/libitm/beginend.cc b/libitm/beginend.cc index 00d28f4efb6..1a258ad7965 100644 --- a/libitm/beginend.cc +++ b/libitm/beginend.cc @@ -32,7 +32,11 @@ using namespace GTM; extern __thread gtm_thread_tls _gtm_thr_tls; #endif -gtm_rwlock GTM::gtm_thread::serial_lock; +// Put this at the start of a cacheline so that serial_lock's writers and +// htm_fastpath fields are on the same cacheline, so that HW transactions +// only have to pay one cacheline capacity to monitor both. +gtm_rwlock GTM::gtm_thread::serial_lock + __attribute__((aligned(HW_CACHELINE_SIZE))); gtm_thread *GTM::gtm_thread::list_of_threads = 0; unsigned GTM::gtm_thread::number_of_threads = 0; @@ -51,9 +55,6 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t thr_release_key; static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT; -// See gtm_thread::begin_transaction. -uint32_t GTM::htm_fastpath = 0; - /* Allocate a transaction structure. */ void * GTM::gtm_thread::operator new (size_t s) @@ -173,9 +174,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // lock's writer flag and thus abort if another thread is or becomes a // serial transaction. Therefore, if the fastpath is enabled, then a // transaction is not executing as a HW transaction iff the serial lock is - // write-locked. This allows us to use htm_fastpath and the serial lock's - // writer flag to reliable determine whether the current thread runs a HW - // transaction, and thus we do not need to maintain this information in + // write-locked. Also, HW transactions monitor the fastpath control + // variable, so that they will only execute if dispatch_htm is still the + // current method group. This allows us to use htm_fastpath and the serial + // lock's writers flag to reliable determine whether the current thread runs + // a HW transaction, and thus we do not need to maintain this information in // per-thread state. // If an uninstrumented code path is not available, we can still run // instrumented code from a HW transaction because the HTM fastpath kicks @@ -187,9 +190,14 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // for any internal changes (e.g., they never abort visibly to the STM code // and thus do not trigger the standard retry handling). #ifndef HTM_CUSTOM_FASTPATH - if (likely(htm_fastpath && (prop & pr_hasNoAbort))) + if (likely(serial_lock.get_htm_fastpath() && (prop & pr_hasNoAbort))) { - for (uint32_t t = htm_fastpath; t; t--) + // Note that the snapshot of htm_fastpath that we take here could be + // outdated, and a different method group than dispatch_htm may have + // been chosen in the meantime. Therefore, take care not not touch + // anything besides the serial lock, which is independent of method + // groups. + for (uint32_t t = serial_lock.get_htm_fastpath(); t; t--) { uint32_t ret = htm_begin(); if (htm_begin_success(ret)) @@ -197,9 +205,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // We are executing a transaction now. // Monitor the writer flag in the serial-mode lock, and abort // if there is an active or waiting serial-mode transaction. + // Also checks that htm_fastpath is still nonzero and thus + // HW transactions are allowed to run. // Note that this can also happen due to an enclosing // serial-mode transaction; we handle this case below. - if (unlikely(serial_lock.is_write_locked())) + if (unlikely(serial_lock.htm_fastpath_disabled())) htm_abort(); else // We do not need to set a_saveLiveVariables because of HTM. @@ -210,9 +220,12 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // retrying the transaction will be successful. if (!htm_abort_should_retry(ret)) break; + // Check whether the HTM fastpath has been disabled. + if (!serial_lock.get_htm_fastpath()) + break; // Wait until any concurrent serial-mode transactions have finished. // This is an empty critical section, but won't be elided. - if (serial_lock.is_write_locked()) + if (serial_lock.htm_fastpath_disabled()) { tx = gtm_thr(); if (unlikely(tx == NULL)) @@ -247,7 +260,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // HTM fastpath aborted, and that we thus have to decide whether to retry // the fastpath (returning a_tryHTMFastPath) or just proceed with the // fallback method. - if (likely(htm_fastpath && (prop & pr_HTMRetryableAbort))) + if (likely(serial_lock.get_htm_fastpath() && (prop & pr_HTMRetryableAbort))) { tx = gtm_thr(); if (unlikely(tx == NULL)) @@ -261,13 +274,15 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) // other fallback will use serial transactions, which don't use // restart_total but will reset it when committing. if (!(prop & pr_HTMRetriedAfterAbort)) - tx->restart_total = htm_fastpath; + tx->restart_total = gtm_thread::serial_lock.get_htm_fastpath(); if (--tx->restart_total > 0) { // Wait until any concurrent serial-mode transactions have finished. // Essentially the same code as above. - if (serial_lock.is_write_locked()) + if (!serial_lock.get_htm_fastpath()) + goto stop_custom_htm_fastpath; + if (serial_lock.htm_fastpath_disabled()) { if (tx->nesting > 0) goto stop_custom_htm_fastpath; @@ -691,7 +706,7 @@ _ITM_commitTransaction(void) // a serial-mode transaction. If we are, then there will be no other // concurrent serial-mode transaction. // See gtm_thread::begin_transaction. - if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) + if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) { htm_commit(); return; @@ -707,7 +722,7 @@ _ITM_commitTransactionEH(void *exc_ptr) { #if defined(USE_HTM_FASTPATH) // See _ITM_commitTransaction. - if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) + if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) { htm_commit(); return; diff --git a/libitm/config/linux/rwlock.h b/libitm/config/linux/rwlock.h index d9f364cefe0..4dd1dab80cb 100644 --- a/libitm/config/linux/rwlock.h +++ b/libitm/config/linux/rwlock.h @@ -41,19 +41,27 @@ struct gtm_thread; // read-to-write upgrades do not have a higher priority than writers. // // Do not change the layout of this class; it must remain a POD type with -// standard layout, and the WRITERS field must be first (i.e., so the +// standard layout, and the writers field must be first (i.e., so the // assembler code can assume that its address is equal to the address of the -// respective instance of the class). +// respective instance of the class), and htm_fastpath must be second. class gtm_rwlock { - // TODO Put futexes on different cachelines? std::atomic<int> writers; // Writers' futex. + // We put the HTM fastpath control variable here so that HTM fastpath + // transactions can check efficiently whether they are allowed to run. + // This must be accessed atomically because threads can load this value + // when they are neither a registered reader nor writer (i.e., when they + // attempt to execute the HTM fastpath). + std::atomic<uint32_t> htm_fastpath; + // TODO Put these futexes on different cachelines? (writers and htm_fastpath + // should remain on the same cacheline. std::atomic<int> writer_readers;// A confirmed writer waits here for readers. std::atomic<int> readers; // Readers wait here for writers (iff true). public: - gtm_rwlock() : writers(0), writer_readers(0), readers(0) {}; + gtm_rwlock() : writers(0), htm_fastpath(0), writer_readers(0), readers(0) + { } void read_lock (gtm_thread *tx); void read_unlock (gtm_thread *tx); @@ -64,12 +72,28 @@ class gtm_rwlock bool write_upgrade (gtm_thread *tx); void write_upgrade_finish (gtm_thread *tx); - // Returns true iff there is a concurrent active or waiting writer. - // This is primarily useful for simple HyTM approaches, and the value being - // checked is loaded with memory_order_relaxed. - bool is_write_locked() + // Returns true iff there is a concurrent active or waiting writer, or + // htm_fastpath is zero. This is primarily useful for simple HyTM + // approaches, and the values being checked are loaded with + // memory_order_relaxed. + bool htm_fastpath_disabled () { - return writers.load (memory_order_relaxed) != 0; + return writers.load (memory_order_relaxed) != 0 + || htm_fastpath.load (memory_order_relaxed) == 0; + } + + // This does not need to return an exact value, hence relaxed MO is + // sufficient. + uint32_t get_htm_fastpath () + { + return htm_fastpath.load (memory_order_relaxed); + } + // This must only be called while having acquired the write lock, and other + // threads do not need to load an exact value; hence relaxed MO is + // sufficient. + void set_htm_fastpath (uint32_t val) + { + htm_fastpath.store (val, memory_order_relaxed); } protected: diff --git a/libitm/config/posix/rwlock.cc b/libitm/config/posix/rwlock.cc index 1e1eea820f2..86f71058525 100644 --- a/libitm/config/posix/rwlock.cc +++ b/libitm/config/posix/rwlock.cc @@ -31,6 +31,7 @@ namespace GTM HIDDEN { gtm_rwlock::gtm_rwlock() : summary (0), + htm_fastpath (0), mutex (PTHREAD_MUTEX_INITIALIZER), c_readers (PTHREAD_COND_INITIALIZER), c_writers (PTHREAD_COND_INITIALIZER), diff --git a/libitm/config/posix/rwlock.h b/libitm/config/posix/rwlock.h index 81c29a6f426..1e74e64b87d 100644 --- a/libitm/config/posix/rwlock.h +++ b/libitm/config/posix/rwlock.h @@ -46,9 +46,9 @@ struct gtm_thread; // read-to-write upgrades do not have a higher priority than writers. // // Do not change the layout of this class; it must remain a POD type with -// standard layout, and the SUMMARY field must be first (i.e., so the +// standard layout, and the summary field must be first (i.e., so the // assembler code can assume that its address is equal to the address of the -// respective instance of the class). +// respective instance of the class), and htm_fastpath must be second. class gtm_rwlock { @@ -58,6 +58,13 @@ class gtm_rwlock std::atomic<unsigned int> summary; // Bitmask of the above. + // We put the HTM fastpath control variable here so that HTM fastpath + // transactions can check efficiently whether they are allowed to run. + // This must be accessed atomically because threads can load this value + // when they are neither a registered reader nor writer (i.e., when they + // attempt to execute the HTM fastpath). + std::atomic<uint32_t> htm_fastpath; + pthread_mutex_t mutex; // Held if manipulating any field. pthread_cond_t c_readers; // Readers wait here pthread_cond_t c_writers; // Writers wait here for writers @@ -80,12 +87,28 @@ class gtm_rwlock bool write_upgrade (gtm_thread *tx); void write_upgrade_finish (gtm_thread *tx); - // Returns true iff there is a concurrent active or waiting writer. - // This is primarily useful for simple HyTM approaches, and the value being - // checked is loaded with memory_order_relaxed. - bool is_write_locked() + // Returns true iff there is a concurrent active or waiting writer, or + // htm_fastpath is zero. This is primarily useful for simple HyTM + // approaches, and the values being checked are loaded with + // memory_order_relaxed. + bool htm_fastpath_disabled () + { + return (summary.load (memory_order_relaxed) & (a_writer | w_writer)) + || htm_fastpath.load (memory_order_relaxed) == 0; + } + + // This does not need to return an exact value, hence relaxed MO is + // sufficient. + uint32_t get_htm_fastpath () + { + return htm_fastpath.load (memory_order_relaxed); + } + // This must only be called while having acquired the write lock, and other + // threads do not need to load an exact value; hence relaxed MO is + // sufficient. + void set_htm_fastpath (uint32_t val) { - return summary.load (memory_order_relaxed) & (a_writer | w_writer); + htm_fastpath.store (val, memory_order_relaxed); } protected: diff --git a/libitm/config/x86/sjlj.S b/libitm/config/x86/sjlj.S index 4b79db778f7..3d2a922c82c 100644 --- a/libitm/config/x86/sjlj.S +++ b/libitm/config/x86/sjlj.S @@ -81,8 +81,9 @@ SYM(_ITM_beginTransaction): back to another execution method. RTM restores all registers after a HW transaction abort, so we can do the SW setjmp after aborts, and we have to because we might choose a SW fall back. However, - we have to explicitly save/restore the first argument (edi). */ - cmpl $0, SYM(gtm_htm_fastpath)(%rip) + we have to explicitly save/restore the first argument (edi). + The htm_fastpath field is the second int in gtm_rwlock. */ + cmpl $0, (SYM(gtm_serial_lock)+4)(%rip) jz .Lno_htm testl $pr_hasNoAbort, %edi jz .Lno_htm @@ -95,6 +96,10 @@ SYM(_ITM_beginTransaction): this case in the retry policy implementation. */ cmpl $0, SYM(gtm_serial_lock)(%rip) jnz 1f + /* Now also check that HW transactions are still allowed to run (see + gtm_thread::begin_transaction for why this is necessary). */ + cmpl $0, (SYM(gtm_serial_lock)+4)(%rip) + jz 1f /* Everything is good. Run the transaction, preferably using the uninstrumented code path. Note that the following works because pr_uninstrumentedCode == a_runUninstrumentedCode. */ @@ -102,8 +107,9 @@ SYM(_ITM_beginTransaction): mov $a_runInstrumentedCode, %eax cmovnz %edi, %eax ret - /* There is a serial-mode transaction, so abort (see htm_abort() - regarding the abort code). */ + /* There is a serial-mode transaction or HW transactions are not + allowed anymore, so abort (see htm_abort() regarding the abort + code). */ 1: xabort $0xff .Ltxn_abort: /* If it might make sense to retry the HTM fast path, let the C++ diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h index ae88ff0eccc..a8cff575754 100644 --- a/libitm/libitm_i.h +++ b/libitm/libitm_i.h @@ -344,11 +344,6 @@ extern abi_dispatch *dispatch_gl_wt(); extern abi_dispatch *dispatch_ml_wt(); extern abi_dispatch *dispatch_htm(); -// Control variable for the HTM fastpath that uses serial mode as fallback. -// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction. -// Accessed from assembly language, thus the "asm" specifier on -// the name, avoiding complex name mangling. -extern uint32_t htm_fastpath __asm__(UPFX "gtm_htm_fastpath"); } // namespace GTM diff --git a/libitm/method-serial.cc b/libitm/method-serial.cc index 1123e34d56b..a151b669618 100644 --- a/libitm/method-serial.cc +++ b/libitm/method-serial.cc @@ -226,13 +226,13 @@ struct htm_mg : public method_group // Enable the HTM fastpath if the HW is available. The fastpath is // initially disabled. #ifdef USE_HTM_FASTPATH - htm_fastpath = htm_init(); + gtm_thread::serial_lock.set_htm_fastpath(htm_init()); #endif } virtual void fini() { // Disable the HTM fastpath. - htm_fastpath = 0; + gtm_thread::serial_lock.set_htm_fastpath(0); } }; @@ -292,7 +292,7 @@ GTM::gtm_thread::serialirr_mode () #if defined(USE_HTM_FASTPATH) // HTM fastpath. If we are executing a HW transaction, don't go serial but // continue. See gtm_thread::begin_transaction. - if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) + if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) return; #endif diff --git a/libitm/query.cc b/libitm/query.cc index b7a118006ec..ddce8462f2d 100644 --- a/libitm/query.cc +++ b/libitm/query.cc @@ -49,7 +49,7 @@ _ITM_inTransaction (void) // a transaction and thus we can't deduce this by looking at just the serial // lock. This function isn't used in practice currently, so the easiest // way to handle it is to just abort. - if (htm_fastpath && htm_transaction_active()) + if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) htm_abort(); #endif struct gtm_thread *tx = gtm_thr(); @@ -69,7 +69,7 @@ _ITM_getTransactionId (void) { #if defined(USE_HTM_FASTPATH) // See ITM_inTransaction. - if (htm_fastpath && htm_transaction_active()) + if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) htm_abort(); #endif struct gtm_thread *tx = gtm_thr(); |