Massage code for clang-format

This change starts the process of updating the jit code to make it ready for being formatted by clang-format. Changes mostly include reflowing comments that go past our column limit and moving comments around ifdefs so clang-format does not modify the indentation. Additionally, some header files are manually reformatted for pointer alignment and marked as clang-format off so that we do not lose the current formatting.
author: Michelle McDaniel <adiaaida@gmail.com> 2016-06-24 14:10:52 -0700
committer: Michelle McDaniel <adiaaida@gmail.com> 2016-07-29 09:16:39 -0700
commit: a1fa8c14cae981369dccf2f361b01959b215ad8f (patch)
tree: cfbd2c5020bacd9422a64338f513350057e3cae0
parent: a812669c5737a336745f42c099a1a8a6e1aafa4f (diff)
download: coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.tar.gz
coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.tar.bz2
coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.zip
98 files changed, 1888 insertions, 1265 deletions
diff --git a/src/jit/.clang-format b/src/jit/.clang-format
new file mode 100644
index 0000000000..756dbff197
--- /dev/null
+++ b/src/jit/.clang-format
@@ -0,0 +1,80 @@
+---
+Language:     Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlinesLeft: false
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      true
+  AfterControlStatement: true
+  AfterEnum:       false
+  AfterFunction:   true
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit:   120
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [  ]
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 400
+PenaltyBreakComment: 50
+PenaltyBreakFirstLessLess: 500
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 100000
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+...
diff --git a/src/jit/_typeinfo.h b/src/jit/_typeinfo.h
index 2e04343b3c..c560ec7ac2 100755
--- a/src/jit/_typeinfo.h
+++ b/src/jit/_typeinfo.h
@@ -416,7 +416,6 @@ public:
     static bool AreEquivalentModuloNativeInt(const typeInfo& verTi, const typeInfo& nodeTi)
     {
         if (AreEquivalent(verTi, nodeTi)) return true;
-        // Otherwise...
 #ifdef _TARGET_64BIT_
         return (nodeTi.IsType(TI_I_IMPL) && tiCompatibleWith(0, verTi, typeInfo::nativeInt(), true)) ||
                (verTi.IsType(TI_I_IMPL) && tiCompatibleWith(0, typeInfo::nativeInt(), nodeTi, true));
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index 1ac1cd285f..055f43c161 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -369,7 +369,8 @@ void                Compiler::optAddCopies()
 
                 // This block will be the new candidate for the insert point
                 // for the new assignment
-                //
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                 if  (verbose)
                     printf("new bestBlock\n");
@@ -379,9 +380,10 @@ void                Compiler::optAddCopies()
                 bestWeight = block->getBBWeight(this);
             }
 
-            /* If there is a use of the variable in this block */
-            /* then we insert the assignment at the beginning  */
-            /* otherwise we insert the statement at the end    */
+            // If there is a use of the variable in this block
+            // then we insert the assignment at the beginning
+            // otherwise we insert the statement at the end
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             if  (verbose)
@@ -2251,9 +2253,9 @@ GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt,
 #ifdef _TARGET_64BIT_
             if (vnStore->IsVNHandle(vnCns))
             {
+#ifdef RELOC_SUPPORT
                 // Don't perform constant folding that involves a handle that needs
                 // to be recorded as a relocation with the VM.
-#ifdef RELOC_SUPPORT
                 if (!opts.compReloc)
 #endif
                 {
@@ -2322,9 +2324,9 @@ GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt,
 #ifndef _TARGET_64BIT_
             if (vnStore->IsVNHandle(vnCns))
             {
+#ifdef RELOC_SUPPORT
                 // Don't perform constant folding that involves a handle that needs
                 // to be recorded as a relocation with the VM.
-#ifdef RELOC_SUPPORT
                 if (!opts.compReloc)
 #endif
                 {
@@ -2475,8 +2477,10 @@ GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, GenTre
     if (!optLocalAssertionProp)
     {
         assert(newTree->OperIsConst());                       // We should have a simple Constant node for newTree
-        assert(vnStore->IsVNConstant(curAssertion->op2.vn));  // The value number stored for op2 should be a valid VN representing the constant
-        newTree->gtVNPair.SetBoth(curAssertion->op2.vn);      // Set the ValueNumPair to the constant VN from op2 of the assertion
+        assert(vnStore->IsVNConstant(curAssertion->op2.vn));  // The value number stored for op2 should be a valid
+                                                              // VN representing the constant
+        newTree->gtVNPair.SetBoth(curAssertion->op2.vn);      // Set the ValueNumPair to the constant VN from op2
+                                                              // of the assertion
     }
 
 #ifdef  DEBUG
@@ -2708,8 +2712,8 @@ GenTreePtr Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const
  *  op1Kind and lclNum, op2Kind and the constant value and is either equal or
  *  not equal assertion.
  */
-Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum, optOp2Kind  op2Kind,
-                                                      ssize_t cnsVal, ASSERT_VALARG_TP assertions)
+Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(optOp1Kind op1Kind, unsigned lclNum,
+        optOp2Kind  op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions)
 {
     noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE));
     noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT));
diff --git a/src/jit/bitset.cpp b/src/jit/bitset.cpp
index 30b18cf20b..3b9135c730 100644
--- a/src/jit/bitset.cpp
+++ b/src/jit/bitset.cpp
@@ -12,10 +12,12 @@
 #include "bitsetasshortlong.h"
 #include "bitsetasuint64inclass.h"
 
+// clang-format off
 unsigned BitSetSupport::BitCountTable[16] = { 0, 1, 1, 2, 
                                               1, 2, 2, 3, 
                                               1, 2, 2, 3, 
                                               2, 3, 3, 4 };
+// clang-format on
 
 #ifdef DEBUG
 template<typename BitSetType, 
diff --git a/src/jit/bitset.h b/src/jit/bitset.h
index dd474bb480..d11eb72413 100644
--- a/src/jit/bitset.h
+++ b/src/jit/bitset.h
@@ -137,9 +137,10 @@ unsigned BitSetSupport::CountBitsInIntegral<unsigned>(unsigned c)
 //      An "adapter" class that provides methods that retrieves things from the Env:
 //        static IAllocator* GetAllococator(Env):   yields an "IAllocator*" that the BitSet implementation can use.
 //        static unsigned    GetSize(Env):          the current size (= # of bits) of this bitset type.
-//        static unsigned    GetArrSize(Env, unsigned elemSize):  The number of "elemSize" chunks sufficient to hold "GetSize".
-//                                                                A given BitSet implementation must call this with only one constant value.
-//                                                                Thus, and "Env" may compute this result when GetSize changes.
+//        static unsigned    GetArrSize(Env, unsigned elemSize):  The number of "elemSize" chunks sufficient to hold
+//                                                                "GetSize". A given BitSet implementation must call
+//                                                                this with only one constant value. Thus, and "Env"
+//                                                                may compute this result when GetSize changes.
 //                                    
 //        static unsigned    GetEpoch(Env):         the current epoch.
 //
@@ -149,7 +150,8 @@ unsigned BitSetSupport::CountBitsInIntegral<unsigned>(unsigned c)
 // In addition to implementing the method signatures here, an instantiation of BitSetOps must also export a
 // BitSetOps::Iter type, which supports the following operations:
 //      Iter(BitSetValueArgType):        a constructor
-//      bool NextElem(unsigned* pElem):  returns true if the iteration is not complete, and sets *pElem to the next yielded member.
+//      bool NextElem(unsigned* pElem):  returns true if the iteration is not complete, and sets *pElem to the next
+//                                       yielded member.
 //
 // Finally, it should export two further types:
 // 
@@ -166,12 +168,13 @@ template<typename BitSetType,
          typename BitSetTraits>
 class BitSetOps
 {
+#if 0
     // Below are the set of methods that an instantiation of BitSetOps should provide.  This is
     // #if'd out because it doesn't make any difference; C++ has no mechanism for checking that
     // the methods of an instantiation are consistent with these signatures, other than the expectations
     // embodied in the program that uses the instantiation(s).  But it's useful documentation, and
     // we should try to keep it up to date.
-#if 0
+
   public:
 
     // The uninitialized value -- not a real bitset (if possible).
diff --git a/src/jit/bitsetasuint64inclass.h b/src/jit/bitsetasuint64inclass.h
index a5574bf5b0..a5df174d3f 100644
--- a/src/jit/bitsetasuint64inclass.h
+++ b/src/jit/bitsetasuint64inclass.h
@@ -63,12 +63,12 @@ private:
             ;
     }
 
+#ifndef DEBUG
     // In debug we also want the default copy constructor to be private, to make inadvertent
     // default initializations illegal.  Debug builds therefore arrange to use the
     // non-default constructor defined below that takes an extra argument where one would
     // otherwise use a copy constructor.  In non-debug builds, we don't pass the extra dummy
     // int argument, and just make copy constructor defined here visible.
-#ifndef DEBUG
 public:
 #endif
     BitSetUint64(const BitSetUint64& bs) : m_bits(bs.m_bits)
diff --git a/src/jit/bitvec.h b/src/jit/bitvec.h
index 60de704a51..dfeddaa779 100644
--- a/src/jit/bitvec.h
+++ b/src/jit/bitvec.h
@@ -37,8 +37,8 @@ typedef  BitSetShortLongRep BitVec;
 typedef   BitVecOps::ValArgType BitVec_ValArg_T;
 typedef   BitVecOps::RetValType BitVec_ValRet_T;
 
-// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a set
-// for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
+// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a
+// set for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
 #define BITVEC_INIT(_traits, _varName, _initVal) _varName(BitVecOps::MakeCopy(_traits, _initVal))
 
 // Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
diff --git a/src/jit/block.h b/src/jit/block.h
index 92f9f0103d..bf7f820f66 100644
--- a/src/jit/block.h
+++ b/src/jit/block.h
@@ -142,7 +142,8 @@ enum ThisInitState
 
 struct EntryState
 {
-    ThisInitState   thisInitialized : 8;        // used to track whether the this ptr is initialized (we could use fewer bits here)
+    ThisInitState   thisInitialized : 8;        // used to track whether the this ptr is initialized (we could use
+                                                // fewer bits here)
     unsigned        esStackDepth    : 24;       // size of esStack
     StackEntry*     esStack;                    // ptr to  stack
 };
@@ -319,22 +320,25 @@ struct BasicBlock
 #define BBF_HAS_NEWOBJ      0x00800000  // BB contains 'new' of an object type. 
 
 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
-#define BBF_FINALLY_TARGET  0x01000000  // BB is the target of a finally return: where a finally will return during non-exceptional flow.
-                                        // Because the ARM calling sequence for calling a finally explicitly sets the return address to
-                                        // the finally target and jumps to the finally, instead of using a call instruction, ARM needs this
-                                        // to generate correct code at the finally target, to allow for proper stack unwind from within a
-                                        // non-exceptional call to a finally.
+#define BBF_FINALLY_TARGET  0x01000000  // BB is the target of a finally return: where a finally will return during
+                                        // non-exceptional flow. Because the ARM calling sequence for calling a
+                                        // finally explicitly sets the return address to the finally target and jumps
+                                        // to the finally, instead of using a call instruction, ARM needs this to
+                                        // generate correct code at the finally target, to allow for proper stack
+                                        // unwind from within a non-exceptional call to a finally.
 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
 #define BBF_BACKWARD_JUMP   0x02000000  // BB is surrounded by a backward jump/switch arc
-#define BBF_RETLESS_CALL    0x04000000  // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired BBJ_ALWAYS); see isBBCallAlwaysPair().
+#define BBF_RETLESS_CALL    0x04000000  // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+                                        // BBJ_ALWAYS); see isBBCallAlwaysPair().
 #define BBF_LOOP_PREHEADER  0x08000000  // BB is a loop preheader block
 
 #define BBF_COLD            0x10000000  // BB is cold
 #define BBF_PROF_WEIGHT     0x20000000  // BB weight is computed from profile data
 #define BBF_FORWARD_SWITCH  0x40000000  // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
-#define BBF_KEEP_BBJ_ALWAYS 0x80000000  // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind as BBJ_ALWAYS.
-                                        // Used for the paired BBJ_ALWAYS block following the BBJ_CALLFINALLY block, as well as, on x86,
-                                        // the final step block out of a finally.
+#define BBF_KEEP_BBJ_ALWAYS 0x80000000  // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+                                        // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+                                        // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+                                        // finally.
 
     bool      isRunRarely()             { return ((bbFlags & BBF_RUN_RARELY) != 0); }
     bool      isLoopHead()              { return ((bbFlags & BBF_LOOP_HEAD)  != 0); }
@@ -586,8 +590,8 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
     // analysis that is tracking the contents of local variables might want to consider *all* successors,
     // and would pass the current Compiler object.
     //
-    // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null, NumSucc/GetSucc
-    // yields the first block of the try blocks handler.
+    // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null,
+    // NumSucc/GetSucc yields the first block of the try blocks handler.
     //
     // Also, the behavior for switches changes depending on the value of "comp". If it is null, then all
     // switch successors are returned. If it is non-null, then only unique switch successors are returned;
@@ -621,8 +625,8 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
 
 #define MAX_XCPTN_INDEX (USHRT_MAX - 1)
 
-    // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly, especially
-    // Compiler::fgNewBBinRegion() and friends.
+    // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly,
+    // especially Compiler::fgNewBBinRegion() and friends.
 
     // index, into the compHndBBtab table, of innermost 'try' clause containing the BB (used for raising exceptions).
     // Stored as index + 1; 0 means "no try index".
@@ -632,13 +636,13 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
     // Stored as index + 1; 0 means "no handler index".
     unsigned short      bbHndIndex;
 
-    // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more deeply
-    // nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means "main function"
-    // and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will have a numeric
-    // index relationship that doesn't indicate nesting, whereas a more deeply nested region must have a lower index
-    // than the region it is nested within. Note that if you compare a single block's bbTryIndex and bbHndIndex, there
-    // is guaranteed to be a nesting relationship, since that block can't be simultaneously in two sibling EH regions.
-    // In that case, "maybe" is actually "definitely".
+    // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more
+    // deeply nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means
+    // "main function" and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will
+    // have a numeric index relationship that doesn't indicate nesting, whereas a more deeply nested region must have
+    // a lower index than the region it is nested within. Note that if you compare a single block's bbTryIndex and
+    // bbHndIndex, there is guaranteed to be a nesting relationship, since that block can't be simultaneously in two
+    // sibling EH regions. In that case, "maybe" is actually "definitely".
     static bool ehIndexMaybeMoreNested(unsigned index1, unsigned index2)
     {
         if (index1 == 0)
@@ -725,8 +729,9 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
 #endif
 
     IL_OFFSET           bbCodeOffs;    // IL offset of the beginning of the block
-    IL_OFFSET           bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd) range is not inclusive of the end offset.
-                                       // The count of IL bytes in the block is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
+    IL_OFFSET           bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd)
+                                       // range is not inclusive of the end offset. The count of IL bytes in the block
+                                       // is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
 
 #ifdef DEBUG
     void                dspBlockILRange();  // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for BAD_IL_OFFSET.
@@ -744,8 +749,9 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
     unsigned            bbHeapDef: 1;
     unsigned            bbHeapLiveIn: 1;
     unsigned            bbHeapLiveOut: 1;
-    unsigned            bbHeapHavoc: 1;    // If true, at some point the block does an operation that leaves the heap in an unknown state.
-                                           // (E.g., unanalyzed call, store through unknown pointer...)
+    unsigned            bbHeapHavoc: 1;    // If true, at some point the block does an operation that leaves the heap
+                                           // in an unknown state. (E.g., unanalyzed call, store through unknown
+                                           // pointer...)
 
     // We want to make phi functions for the special implicit var "Heap".  But since this is not a real
     // lclVar, and thus has no local #, we can't use a GenTreePhiArg.  Instead, we use this struct.
@@ -778,10 +784,12 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
 
         void* operator new(size_t sz, class Compiler* comp);
     };
-    static HeapPhiArg*  EmptyHeapPhiDef;   // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list for Heap.
+    static HeapPhiArg*  EmptyHeapPhiDef;   // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+                                           // for Heap.
     HeapPhiArg*         bbHeapSsaPhiFunc;  // If the "in" Heap SSA var is not a phi definition, this value is NULL.
-                                           // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate that Heap needs a phi
-                                           // definition on entry, or else it is the linked list of the phi arguments.
+                                           // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
+                                           // that Heap needs a phi definition on entry, or else it is the linked list
+                                           // of the phi arguments.
     unsigned            bbHeapSsaNumIn;    // The SSA # of "Heap" on entry to the block.
     unsigned            bbHeapSsaNumOut;   // The SSA # of "Heap" on exit from the block.
 
@@ -849,6 +857,8 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
 
     /* The following fields used for loop detection */
 
+    static const unsigned NOT_IN_LOOP = UCHAR_MAX;
+
 #ifdef DEBUG
     // This is the label a loop gets as part of the second, reachability-based
     // loop discovery mechanism.  This is apparently only used for debugging.
@@ -856,8 +866,6 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
     unsigned char       bbLoopNum;   // set to 'n' for a loop #n header
 #endif // DEBUG
 
-    static const unsigned NOT_IN_LOOP = UCHAR_MAX;
-
     unsigned char       bbNatLoopNum;  // Index, in optLoopTable, of most-nested loop that contains this block,
                                        // or else NOT_IN_LOOP if this block is not in a loop.
 
@@ -881,7 +889,7 @@ typedef unsigned weight_t;             // Type used to hold block and edge weigh
     }
 
     // Given an the edge b1 -> b2, calculate the slop fraction by
-    //  using the higher of the two block weights
+    // using the higher of the two block weights
     static weight_t     GetSlopFraction(BasicBlock* b1, BasicBlock* b2)
     {
         return GetSlopFraction(max(b1->bbWeight, b2->bbWeight));
diff --git a/src/jit/blockset.h b/src/jit/blockset.h
index 2b886bb96b..b47dcd6c04 100644
--- a/src/jit/blockset.h
+++ b/src/jit/blockset.h
@@ -60,8 +60,8 @@ typedef  BitSetShortLongRep BlockSet;
 typedef   BlockSetOps::ValArgType BlockSet_ValArg_T;
 typedef   BlockSetOps::RetValType BlockSet_ValRet_T;
 
-// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a var set
-// for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
+// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a
+// var set for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
 #define BLOCKSET_INIT(_comp, _varName, _initVal) _varName(BlockSetOps::MakeCopy(_comp, _initVal))
 
 // Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 72eb676752..32937bdc8c 100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -180,6 +180,8 @@ private:
     // the GC info.  Requires "codeSize" to be the size of the generated code, "prologSize" and "epilogSize"
     // to be the sizes of the prolog and epilog, respectively.  In DEBUG, makes a check involving the
     // "codePtr", assumed to be a pointer to the start of the generated code.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef JIT32_GCENCODER
     void*               genCreateAndStoreGCInfo     (unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
     void*               genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
@@ -468,6 +470,7 @@ protected:
     //
     // Epilog functions
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_ARM_)
     bool                genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index dea03189f4..4b5e40fe0a 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -211,9 +211,9 @@ void                CodeGen::genCodeForBBlist()
 
     regSet.rsSpillBeg();
 
+#ifdef DEBUGGING_SUPPORT
     /* Initialize the line# tracking logic */
 
-#ifdef DEBUGGING_SUPPORT
     if (compiler->opts.compScopeInfo)
     {
         siInit();
@@ -307,9 +307,9 @@ void                CodeGen::genCodeForBBlist()
         genUpdateLife(block->bbLiveIn);
 
         // Even if liveness didn't change, we need to update the registers containing GC references.
-        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
-        // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
-        // That would require handling the changes in recordVarLocationsAtStartOfBB().
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
 
         regMaskTP newLiveRegSet = RBM_NONE;
         regMaskTP newRegGCrefSet = RBM_NONE;
@@ -363,6 +363,7 @@ void                CodeGen::genCodeForBBlist()
         }
 
         /* Start a new code output block */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
 #if defined(_TARGET_ARM_)
@@ -373,18 +374,17 @@ void                CodeGen::genCodeForBBlist()
         {
             assert(block->bbFlags & BBF_JMP_TARGET);
 
-            // Create a label that we'll use for computing the start of an EH region, if this block is
-            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
-            // determining the EH regions, then this NOP would end up outside of the region, if this
-            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
-            // would be executed, which we would prefer not to do.
-
 #ifdef  DEBUG
             if (compiler->verbose)
             {
                 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
             }
 #endif
+            // Create a label that we'll use for computing the start of an EH region, if this block is
+            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+            // determining the EH regions, then this NOP would end up outside of the region, if this
+            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+            // would be executed, which we would prefer not to do.
 
             block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur,
                                                                       gcInfo.gcRegGCrefSetCur,
@@ -479,6 +479,7 @@ void                CodeGen::genCodeForBBlist()
          *  Generate code for each statement-tree in the block
          *
          */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
         if (block->bbFlags & BBF_FUNCLET_BEG)
@@ -696,7 +697,8 @@ void                CodeGen::genCodeForBBlist()
         // The document "X64 and ARM ABIs.docx" has more details. The situations:
         // 1. If the call instruction is in a different EH region as the instruction that follows it.
         // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
-        //    be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
+        //    be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+        //    here.)
         // We handle case #1 here, and case #2 in the emitter.
         if (getEmitter()->emitIsLastInsCall())
         {
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index b6dc645112..3e505bbff0 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -173,17 +173,19 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool*
 }
 
 //------------------------------------------------------------------------
-// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet prolog.
-// If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
-// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that instruction.
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
 //
 // Arguments:
 //    reg1                     - First register of pair to save.
 //    reg2                     - Second register of pair to save.
 //    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
-//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
-//    lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This allows us to
-//                               emit the "save_next" unwind code.
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
 //    tmpReg                   - An available temporary register. Needed for the case of large frames.
 //    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
 //                               Otherwise, we don't touch it.
@@ -202,7 +204,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
     assert(spOffset >= 0);
     assert(spDelta <= 0);
     assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
-    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both FP/SIMD
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
 
     bool needToSaveRegs = true;
     if (spDelta != 0)
@@ -246,16 +249,18 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
 }
 
 //------------------------------------------------------------------------
-// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or floating-point/SIMD register
-// in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), then spOffset must be 8. This is because
-// otherwise we would create an alignment hole above the saved register, not below it, which we currently don't support. This restriction
-// could be loosened if the callers change to handle it (and this function changes to support using pre-indexed STR addressing).
-// The caller must ensure that we can use the STR instruction, and that spOffset will be in the legal range for that instruction.
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
+// STR instruction, and that spOffset will be in the legal range for that instruction.
 //
 // Arguments:
 //    reg1                     - Register to save.
 //    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
-//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
 //    tmpReg                   - An available temporary register. Needed for the case of large frames.
 //    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
 //                               Otherwise, we don't touch it.
@@ -294,7 +299,8 @@ void CodeGen::genPrologSaveReg(regNumber reg1,
 //    reg1                     - First register of pair to restore.
 //    reg2                     - Second register of pair to restore.
 //    spOffset                 - The offset from SP to load reg1 (must be positive or zero).
-//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
 //    tmpReg                   - An available temporary register. Needed for the case of large frames.
 //    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
 //                               Otherwise, we don't touch it.
@@ -348,7 +354,8 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
 // Arguments:
 //    reg1                     - Register to restore.
 //    spOffset                 - The offset from SP to restore reg1 (must be positive or zero).
-//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
 //    tmpReg                   - An available temporary register. Needed for the case of large frames.
 //    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
 //                               Otherwise, we don't touch it.
@@ -400,7 +407,8 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1,
 //    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
 //                              if non-zero spDelta, then this is the offset of the first save *after* that
 //                              SP adjustment.
-//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                              zero).
 //
 // Return Value:
 //    None.
@@ -424,7 +432,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP   regsToSaveMask,
 
     assert((spDelta % 16) == 0);
     assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
-    assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+    assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
+                                                                        // RBM_CALLEE_SAVED.
 
     regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
     regMaskTP maskSaveRegsInt   = regsToSaveMask & ~maskSaveRegsFloat;
@@ -469,8 +478,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP   regsToSaveMask,
 
             genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
 
-            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
-            // get the codes to match. Turn this off until that is better understood.
+            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+            // this epilog, to get the codes to match. Turn this off until that is better understood.
             // lastSavedWasPair = true;
 
             spOffset += 2 * REGSIZE_BYTES;
@@ -521,8 +530,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP   regsToSaveMask,
 
             genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
 
-            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
-            // get the codes to match. Turn this off until that is better understood.
+            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+            // this epilog, to get the codes to match. Turn this off until that is better understood.
             // lastSavedWasPair = true;
 
             spOffset += 2 * FPSAVE_REGSIZE_BYTES;
@@ -551,7 +560,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP   regsToSaveMask,
 // Arguments:
 //    regsToRestoreMask       - The mask of callee-saved registers to restore. If empty, this function does nothing.
 //    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
-//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                              zero).
 //
 // Here's an example restore sequence:
 //      ldp     x27, x28, [sp,#96]
@@ -568,8 +578,8 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP   regsToSaveMask,
 //      ldp     x21, x22, [sp,#16]
 //      ldp     x19, x20, [sp], #80
 //
-// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when generating
-// a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
 //
 // Return Value:
 //    None.
@@ -717,7 +727,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP   regsToRestoreMask,
     assert(intRegsToRestoreCount == 0);
 }
 
-
+// clang-format off
 /*****************************************************************************
  *
  *  Generates code for an EH funclet prolog.
@@ -900,6 +910,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP   regsToRestoreMask,
  *      |       | downward      |         
  *              V
  */
+// clang-format on
 
 void                CodeGen::genFuncletProlog(BasicBlock* block)
 {
@@ -1125,7 +1136,8 @@ void                CodeGen::genCaptureFuncletPrologEpilogInfo()
         return;
 
     assert(isFramePointerUsed());
-    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+                                                                          // finalized
 
     genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
 
@@ -1448,9 +1460,9 @@ void                CodeGen::genCodeForBBlist()
 
     regSet.rsSpillBeg();
 
+#ifdef DEBUGGING_SUPPORT
     /* Initialize the line# tracking logic */
 
-#ifdef DEBUGGING_SUPPORT
     if (compiler->opts.compScopeInfo)
     {
         siInit();
@@ -1539,9 +1551,9 @@ void                CodeGen::genCodeForBBlist()
         genUpdateLife(block->bbLiveIn);
 
         // Even if liveness didn't change, we need to update the registers containing GC references.
-        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
-        // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
-        // That would require handling the changes in recordVarLocationsAtStartOfBB().
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
 
         regMaskTP newLiveRegSet = RBM_NONE;
         regMaskTP newRegGCrefSet = RBM_NONE;
@@ -2175,7 +2187,7 @@ void                CodeGen::instGen_Set_Reg_To_Imm(emitAttr    size,
             getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
             getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
 
-            if ((size == EA_8BYTE) && ((imm >> 32) != 0))      // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+            if ((size == EA_8BYTE) && ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
             {
                 getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
                 if ((imm >> 48) != 0)   // Frequently the upper 16 bits are zero and the first mov has zero-ed them
@@ -2825,7 +2837,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
                         //
                         emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
                         inst_JMP(jmpNotEqual, sdivLabel);                  // goto sdiv if the Z flag is clear
-                        genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN);      // if the V flags is set throw ArithmeticException
+                        genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN);      // if the V flags is set throw
+                                                                           // ArithmeticException
 
                         genDefineTempLabel(sdivLabel);
                     }
@@ -3454,7 +3467,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
         break;
 
     case GT_PUTARG_REG:
-        assert(targetType != TYP_STRUCT);  // Any TYP_STRUCT register args should have been removed by fgMorphMultiregStructArg
+        assert(targetType != TYP_STRUCT);  // Any TYP_STRUCT register args should have been removed by
+                                           // fgMorphMultiregStructArg
         // We have a normal non-Struct targetType
         {
             GenTree *op1 = treeNode->gtOp.gtOp1;
@@ -3851,7 +3865,8 @@ CodeGen::genLclHeap(GenTreePtr tree)
     //      Nothing needs to popped off from stack nor relocated.
     if  (compiler->lvaOutgoingArgSpaceSize > 0)
     {
-        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+                                                                        // aligned
         inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
         stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
     }
@@ -4118,9 +4133,9 @@ void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
     assert(!initVal->isContained());
     assert(!blockSize->isContained());
 
+#if 0
     // TODO-ARM64-CQ: When initblk loop unrolling is implemented
     //                put this assert back on.
-#if 0
     if (blockSize->IsCnsIntOrI())
     {
         assert(blockSize->gtIntCon.gtIconVal >= INITBLK_UNROLL_LIMIT);
@@ -4379,9 +4394,10 @@ void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
     assert(!srcAddr->isContained());
     assert(!blockSize->isContained());
 
-    // Enable this when we support cpblk loop unrolling.
 #if 0
 #ifdef DEBUG
+    // Enable this when we support cpblk loop unrolling.
+
     if (blockSize->IsCnsIntOrI())
     {
         assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
@@ -6452,8 +6468,9 @@ CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
 
         getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
     }
-    else if (treeNode->gtRegNum != op1->gtRegNum) // If double to double cast or float to float cast. Emit a move instruction.
+    else if (treeNode->gtRegNum != op1->gtRegNum)
     {
+        // If double to double cast or float to float cast. Emit a move instruction.
         getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
     }
 
@@ -7432,6 +7449,7 @@ void                CodeGen::genArm64EmitterUnitTests()
 
     emitter*  theEmitter = getEmitter();
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // We use this:
     //      genDefineTempLabel(genCreateTempLabel());
     // to create artificial labels to help separate groups of tests.
@@ -7440,8 +7458,6 @@ void                CodeGen::genArm64EmitterUnitTests()
     // Loads/Stores basic general register
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // ldr/str Xt, [reg]
@@ -7499,12 +7515,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // Compares 
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // cmp reg, reg
@@ -7534,12 +7549,10 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
-
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // R_R
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R(INS_cls,   EA_8BYTE, REG_R1,  REG_R12);
@@ -7557,12 +7570,12 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_I
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // mov reg, imm(i16,hw)
@@ -7611,12 +7624,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // tst reg, reg
@@ -7649,12 +7661,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_I_I
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // mov reg, imm(i16,hw)
@@ -7671,12 +7682,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_I
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_I(INS_lsl,  EA_8BYTE, REG_R0, REG_R0,  1);
@@ -7782,11 +7792,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_I cmp/txt
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // cmp
     theEmitter->emitIns_R_R_I(INS_cmp,    EA_8BYTE, REG_R8, REG_R9, 0);
     theEmitter->emitIns_R_R_I(INS_cmp,    EA_4BYTE, REG_R8, REG_R9, 0);
@@ -7844,12 +7854,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_R(INS_lsl,    EA_8BYTE, REG_R8, REG_R9, REG_R10);
@@ -7900,12 +7909,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_I_I
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_I_I(INS_sbfm,  EA_8BYTE, REG_R2, REG_R3,  4, 39);
@@ -7934,12 +7942,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R_I
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // ADD (extended register)
@@ -8042,12 +8049,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R_I  -- load/store pair
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     theEmitter->emitIns_R_R_R_I(INS_ldnp,    EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
     theEmitter->emitIns_R_R_R_I(INS_stnp,    EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
     theEmitter->emitIns_R_R_R_I(INS_ldnp,    EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
@@ -8091,12 +8097,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R_Ext    -- load/store shifted/extend
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // LDR (register)
@@ -8219,12 +8224,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R_R
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_R_R(INS_madd,    EA_4BYTE, REG_R0,  REG_R12, REG_R27, REG_R10);
@@ -8243,11 +8247,10 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // R_COND
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // cset reg, cond
     theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
     theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
@@ -8282,11 +8285,10 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // R_R_COND
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // cinc reg, reg, cond
     // cinv reg, reg, cond
     // cneg reg, reg, cond
@@ -8307,11 +8309,10 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // R_R_R_COND
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // csel  reg, reg, reg, cond
     // csinc reg, reg, reg, cond
     // csinv reg, reg, reg, cond
@@ -8333,11 +8334,10 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     // R_R_FLAGS_COND
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // ccmp reg1, reg2, nzcv, cond
     theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V,    INS_COND_EQ); // eq
     theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C,    INS_COND_NE); // ne
@@ -8420,12 +8420,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // Branch to register
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R(INS_br,  EA_PTRSIZE, REG_R8);
@@ -8435,12 +8434,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // Misc
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
@@ -8464,6 +8462,7 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     ////////////////////////////////////////////////////////////////////////////////
     //
     // SIMD and Floating point
@@ -8474,8 +8473,6 @@ void                CodeGen::genArm64EmitterUnitTests()
     // Load/Stores vector register
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // ldr/str Vt, [reg]
@@ -8641,12 +8638,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R   mov and aliases for mov
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // mov vector to vector
     theEmitter->emitIns_R_R(INS_mov, EA_8BYTE,  REG_V0,  REG_V1);
     theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2,  REG_V3);
@@ -8724,12 +8720,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_I   movi and mvni
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // movi  imm8  (vector)
     theEmitter->emitIns_R_I(INS_movi, EA_8BYTE,   REG_V0,  0x00,       INS_OPTS_8B);
     theEmitter->emitIns_R_I(INS_movi, EA_8BYTE,   REG_V1,  0xFF,       INS_OPTS_8B);
@@ -8796,12 +8791,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_I   orr/bic vector immediate
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     theEmitter->emitIns_R_I(INS_orr, EA_8BYTE,   REG_V0,  0x0022,     INS_OPTS_4H);
     theEmitter->emitIns_R_I(INS_orr, EA_8BYTE,   REG_V1,  0x2200,     INS_OPTS_4H);  // LSL  8
     theEmitter->emitIns_R_I(INS_orr, EA_16BYTE,  REG_V2,  0x0033,     INS_OPTS_8H); 
@@ -8834,12 +8828,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_F   cmp/fmov immediate
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // fmov  imm8  (scalar)
     theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE,  REG_V14,  1.0);
     theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE,  REG_V15, -1.0);
@@ -8876,12 +8869,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R   fmov/fcmp/fcvt
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // fmov to vector to vector
     theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE,  REG_V0,  REG_V2);
     theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE,  REG_V1,  REG_V3);
@@ -8918,12 +8910,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R   floating point conversions
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // fcvtas scalar
     theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE,  REG_V0,  REG_V1);
     theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE,  REG_V2,  REG_V3);
@@ -9116,12 +9107,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R   floating point operations, one dest, one source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // fabs scalar
     theEmitter->emitIns_R_R(INS_fabs,  EA_4BYTE,  REG_V0,  REG_V1);
     theEmitter->emitIns_R_R(INS_fabs,  EA_8BYTE,  REG_V2,  REG_V3);
@@ -9231,12 +9221,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R   floating point round to int, one dest, one source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     // frinta scalar
     theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE,  REG_V0,  REG_V1);
     theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE,  REG_V2,  REG_V3);
@@ -9302,12 +9291,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R   floating point operations, one dest, two source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE,  REG_V0,  REG_V1,  REG_V2);  // scalar 4BYTE
@@ -9378,12 +9366,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_I  vector operations, one dest, one source reg, one immed
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // 'sshr' scalar
@@ -9623,12 +9610,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R   vector operations, one dest, two source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     // Specifying an Arrangement is optional
@@ -9727,12 +9713,11 @@ void                CodeGen::genArm64EmitterUnitTests()
     
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R  vector multiply
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_R(INS_mul,  EA_8BYTE,  REG_V0,  REG_V1,  REG_V2,  INS_OPTS_8B);
@@ -9789,12 +9774,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R   floating point operations, one source/dest, and two source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE,  REG_V6,  REG_V7,  REG_V8,  INS_OPTS_2S);
@@ -9819,12 +9803,11 @@ void                CodeGen::genArm64EmitterUnitTests()
 
 #endif // ALL_ARM64_EMITTER_UNIT_TESTS
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
     //
     // R_R_R_R   floating point operations, one dest, and three source
     //
 
-#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
-
     theEmitter->emitIns_R_R_R_R(INS_fmadd,   EA_4BYTE, REG_V0, REG_V8,  REG_V16, REG_V24);
     theEmitter->emitIns_R_R_R_R(INS_fmsub,   EA_4BYTE, REG_V1, REG_V9,  REG_V17, REG_V25);
     theEmitter->emitIns_R_R_R_R(INS_fnmadd,  EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h
index 24c98d6f8d..46a70aade2 100644
--- a/src/jit/codegenclassic.h
+++ b/src/jit/codegenclassic.h
@@ -68,12 +68,12 @@ protected:
 
     void                genPInvokeMethodEpilog();    
 
-    regNumber           genPInvokeCallProlog(LclVarDsc *    varDsc,
+    regNumber           genPInvokeCallProlog(LclVarDsc*     varDsc,
                                              int            argSize,
                                       CORINFO_METHOD_HANDLE methodToken,
-                                             BasicBlock *   returnLabel);
+                                             BasicBlock*    returnLabel);
 
-    void                genPInvokeCallEpilog(LclVarDsc *    varDsc,
+    void                genPInvokeCallEpilog(LclVarDsc*     varDsc,
                                              regMaskTP      retVal);
 
     regNumber           genLclHeap          (GenTreePtr     size);
@@ -147,7 +147,7 @@ protected:
                                              bool           forLea,
                                              regMaskTP      regMask,
                                              RegSet::KeepReg        keepReg,
-                                             regMaskTP *    useMaskPtr,
+                                             regMaskTP*     useMaskPtr,
                                              bool           deferOp = false);
 
     regMaskTP           genMakeRvalueAddressable(GenTreePtr tree,
@@ -195,7 +195,7 @@ protected:
                                              RegSet::KeepReg        keptReg);
 
     GenTreePtr          genMakeAddrOrFPstk  (GenTreePtr     tree,
-                                             regMaskTP *    regMaskPtr,
+                                             regMaskTP*     regMaskPtr,
                                              bool           roundResult);
 
     void                genEmitGSCookieCheck(bool           pushReg);
@@ -204,8 +204,8 @@ protected:
 
 
     void                genCondJump         (GenTreePtr     cond,
-                                             BasicBlock *   destTrue  = NULL,
-                                             BasicBlock *   destFalse = NULL,
+                                             BasicBlock*    destTrue  = NULL,
+                                             BasicBlock*    destFalse = NULL,
                                              bool           bStackFPFixup = true);
 
 
@@ -213,28 +213,28 @@ protected:
 
 
     void                genJCC              (genTreeOps     cmp,
-                                             BasicBlock *   block,
+                                             BasicBlock*    block,
                                              var_types      type);
 
     void                genJccLongHi        (genTreeOps     cmp,
-                                             BasicBlock *   jumpTrue,
-                                             BasicBlock *   jumpFalse,
+                                             BasicBlock*    jumpTrue,
+                                             BasicBlock*    jumpFalse,
                                              bool           unsOper = false);
 
     void                genJccLongLo        (genTreeOps     cmp,
-                                             BasicBlock *   jumpTrue,
-                                             BasicBlock *   jumpFalse);
+                                             BasicBlock*    jumpTrue,
+                                             BasicBlock*    jumpFalse);
 
     void                genCondJumpLng      (GenTreePtr     cond,
-                                             BasicBlock *   jumpTrue,
-                                             BasicBlock *   jumpFalse,
+                                             BasicBlock*    jumpTrue,
+                                             BasicBlock*    jumpFalse,
                                              bool bFPTransition = false);
 
     bool                genUse_fcomip();
 
     void                genTableSwitch      (regNumber      reg,
                                              unsigned       jumpCnt,
-                                             BasicBlock **  jumpTab);
+                                             BasicBlock**  jumpTab);
 
     regMaskTP           WriteBarrier        (GenTreePtr     tgt,
                                              GenTreePtr     assignVal,
@@ -324,7 +324,7 @@ protected:
                                              regMaskTP      destReg,
                                              regMaskTP      bestReg = RBM_NONE);
 
-    regNumber           genIntegerCast(GenTree *tree, regMaskTP needReg, regMaskTP bestReg);
+    regNumber           genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg);
     
     void                genCodeForNumericCast(GenTreePtr     tree,
                                               regMaskTP      destReg,
@@ -420,8 +420,8 @@ protected:
     void                genCodeForSwitch      (GenTreePtr     tree);
 
     regMaskTP           genPushRegs         (regMaskTP      regs,
-                                             regMaskTP *    byrefRegs,
-                                             regMaskTP *    noRefRegs);
+                                             regMaskTP*     byrefRegs,
+                                             regMaskTP*     noRefRegs);
     void                genPopRegs          (regMaskTP      regs,
                                              regMaskTP      byrefRegs,
                                              regMaskTP      noRefRegs);
@@ -473,7 +473,7 @@ protected:
                                                       LclVarDsc* promotedStructLocalVarDesc, 
                                                       emitAttr fieldSize,
                                                       unsigned* pNextPromotedStructFieldVar,         // IN/OUT
-                                                      unsigned *pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
+                                                      unsigned* pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
                                                       regNumber* pCurRegNum,                         // IN/OUT
                                                       int argOffset,
                                                       int fieldOffsetOfFirstStackSlot,
@@ -505,7 +505,7 @@ protected:
     GenTreePtr          genGetAddrModeBase  (GenTreePtr     tree);
 
     GenTreePtr          genIsAddrMode       (GenTreePtr     tree,
-                                             GenTreePtr *   indxPtr);
+                                             GenTreePtr*    indxPtr);
 private:
 
     bool                genIsLocalLastUse   (GenTreePtr     tree);
@@ -554,25 +554,25 @@ private:
     void            genCodeForTreeStackFP_Cast               (GenTreePtr tree);
     void            genCodeForTreeStackFP                    (GenTreePtr tree);
     void            genCondJumpFltStackFP                    (GenTreePtr     cond,
-                                                             BasicBlock *   jumpTrue,
-                                                             BasicBlock *   jumpFalse,
+                                                             BasicBlock*    jumpTrue,
+                                                             BasicBlock*    jumpFalse,
                                                              bool bDoTransition = true);
     void            genCondJumpFloat                         (GenTreePtr     cond,
-                                                             BasicBlock *   jumpTrue,
-                                                             BasicBlock *   jumpFalse);
+                                                             BasicBlock*    jumpTrue,
+                                                             BasicBlock*    jumpFalse);
     void            genCondJumpLngStackFP                    (GenTreePtr     cond,
-                                                             BasicBlock *   jumpTrue,
-                                                             BasicBlock *   jumpFalse);
-
-    void            genFloatConst(GenTree *tree, RegSet::RegisterPreference *pref);
-    void            genFloatLeaf(GenTree *tree, RegSet::RegisterPreference *pref);
-    void            genFloatSimple(GenTree *tree, RegSet::RegisterPreference *pref);
-    void            genFloatMath(GenTree *tree, RegSet::RegisterPreference *pref);
-    void            genFloatCheckFinite(GenTree *tree, RegSet::RegisterPreference *pref);
+                                                             BasicBlock*    jumpTrue,
+                                                             BasicBlock*    jumpFalse);
+
+    void            genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref);
+    void            genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref);
+    void            genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref);
+    void            genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref);
+    void            genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref);
     void            genLoadFloat(GenTreePtr tree, regNumber reg);
-    void            genFloatAssign(GenTree *tree);
-    void            genFloatArith(GenTree *tree, RegSet::RegisterPreference *pref);
-    void            genFloatAsgArith(GenTree *tree);
+    void            genFloatAssign(GenTree* tree);
+    void            genFloatArith(GenTree* tree, RegSet::RegisterPreference* pref);
+    void            genFloatAsgArith(GenTree* tree);
 
     regNumber       genAssignArithFloat(genTreeOps oper, 
                                         GenTreePtr dst, regNumber dstreg, 
@@ -580,11 +580,11 @@ private:
 
 
     GenTreePtr      genMakeAddressableFloat(GenTreePtr tree, 
-                                            regMaskTP *  regMaskIntPtr, regMaskTP *  regMaskFltPtr, 
+                                            regMaskTP*   regMaskIntPtr, regMaskTP*   regMaskFltPtr, 
                                             bool bCollapseConstantDoubles = true);
 
     void            genCodeForTreeFloat(GenTreePtr tree,
-                                        RegSet::RegisterPreference *pref = NULL);
+                                        RegSet::RegisterPreference* pref = NULL);
 
     void            genCodeForTreeFloat(GenTreePtr tree,
                                         regMaskTP  needReg, regMaskTP bestReg);
@@ -593,10 +593,10 @@ private:
                                    GenTreePtr dst, regNumber dstreg, 
                                    GenTreePtr src, regNumber srcreg, 
                                    bool bReverse);
-    void            genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
-    void            genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
-    void            genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference *pref);
-    void            genKeepAddressableFloat(GenTreePtr tree, regMaskTP * regMaskIntPtr, regMaskTP * regMaskFltPtr);
+    void            genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+    void            genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+    void            genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+    void            genKeepAddressableFloat(GenTreePtr tree, regMaskTP*  regMaskIntPtr, regMaskTP*  regMaskFltPtr);
     void            genDoneAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
     void            genComputeAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg, regMaskTP needReg, RegSet::KeepReg keepReg, bool freeOnly = false);
     void            genRoundFloatExpression(GenTreePtr op, var_types type);
@@ -617,8 +617,8 @@ private:
 
 #endif
 
-    GenTreePtr      genMakeAddressableStackFP               (GenTreePtr tree, regMaskTP *  regMaskIntPtr, regMaskTP *  regMaskFltPtr, bool bCollapseConstantDoubles = true);
-    void            genKeepAddressableStackFP               (GenTreePtr tree, regMaskTP *  regMaskIntPtr, regMaskTP *  regMaskFltPtr);
+    GenTreePtr      genMakeAddressableStackFP               (GenTreePtr tree, regMaskTP*   regMaskIntPtr, regMaskTP*   regMaskFltPtr, bool bCollapseConstantDoubles = true);
+    void            genKeepAddressableStackFP               (GenTreePtr tree, regMaskTP*   regMaskIntPtr, regMaskTP*   regMaskFltPtr);
     void            genDoneAddressableStackFP               (GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
 
 
@@ -677,12 +677,12 @@ private:
     regNumber       genArithmStackFP                       (genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
     regNumber       genAsgArithmStackFP                    (genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
     void            genCondJmpInsStackFP                   (emitJumpKind   jumpKind,
-                                                            BasicBlock *   jumpTrue,
-                                                            BasicBlock *   jumpFalse,
+                                                            BasicBlock*    jumpTrue,
+                                                            BasicBlock*    jumpFalse,
                                                             bool bDoTransition = true);
     void            genTableSwitchStackFP                  (regNumber      reg,
                                                             unsigned       jumpCnt,
-                                                            BasicBlock **  jumpTab);
+                                                            BasicBlock**   jumpTab);
 
     void            JitDumpFPState                          ();
 #else // !FEATURE_STACK_FP_X87
@@ -708,10 +708,10 @@ private:
 #endif // FEATURE_STACK_FP_X87
 
     // Float spill
-    void            UnspillFloat                           (RegSet::SpillDsc *spillDsc);
+    void            UnspillFloat                           (RegSet::SpillDsc* spillDsc);
     void            UnspillFloat                           (GenTreePtr tree);
-    void            UnspillFloat                           (LclVarDsc * varDsc);
-    void            UnspillFloatMachineDep                 (RegSet::SpillDsc *spillDsc);
+    void            UnspillFloat                           (LclVarDsc*  varDsc);
+    void            UnspillFloatMachineDep                 (RegSet::SpillDsc* spillDsc);
     void            UnspillFloatMachineDep                 (RegSet::SpillDsc* spillDsc, bool useSameReg);
     void            RemoveSpillDsc                         (RegSet::SpillDsc* spillDsc);
 
@@ -729,10 +729,10 @@ protected :
         {}
     };
 
-    void saveLiveness    (genLivenessSet * ls);
-    void restoreLiveness (genLivenessSet * ls);
-    void checkLiveness   (genLivenessSet * ls);
-    void unspillLiveness (genLivenessSet * ls);
+    void saveLiveness    (genLivenessSet*  ls);
+    void restoreLiveness (genLivenessSet*  ls);
+    void checkLiveness   (genLivenessSet*  ls);
+    void unspillLiveness (genLivenessSet*  ls);
 
     //-------------------------------------------------------------------------
     //
@@ -754,3 +754,4 @@ protected :
 #endif // LEGACY_BACKEND
 
 #endif // _CODEGENCLASSIC_H_
+
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 562fc08ee3..e3b95a3fae 100755
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -69,10 +69,13 @@ void CodeGenInterface::setFramePointerRequiredEH(bool value)
         // if they are fully-interruptible.  So if we have a catch
         // or finally that will keep frame-vars alive, we need to
         // force fully-interruptible.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
         if (verbose)
             printf("Method has EH, marking method as fully interruptible\n");
 #endif
+
         m_cgInterruptible = true;
     }
 #endif // JIT32_GCENCODER
@@ -741,10 +744,10 @@ void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
     unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
     LclVarDsc * varDsc = lvaTable + lclNum;
 
-    // Struct fields are not traversed in a consistent order, so ignore them when
-    // verifying that we see the var nodes in execution order
 #ifdef DEBUG 
 #if !defined(_TARGET_AMD64_) // no addr nodes on AMD and experimenting with with encountering vars in 'random' order
+    // Struct fields are not traversed in a consistent order, so ignore them when
+    // verifying that we see the var nodes in execution order
     if (ForCodeGen)
     {
         if (tree->gtOper == GT_OBJ)
@@ -1509,8 +1512,6 @@ bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
         return true;
     }
 
-    // Else jitting.
-
 #ifdef _TARGET_AMD64_
     // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.    
     if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
@@ -1544,8 +1545,6 @@ bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
         return true;
     }
 
-    // Else jitting.
-
 #ifdef _TARGET_AMD64_
     // By default all direct code addresses go through relocation so that VM will setup
     // a jump stub if addr cannot be encoded as pc-relative offset.
@@ -1891,11 +1890,11 @@ bool                CodeGen::genCreateAddrMode(GenTreePtr    addr,
 #endif
 
 AGAIN:
-
     /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
        constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
        here if we find a scaled index.
     */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if SCALED_ADDR_MODES
     assert(mul == 0);
@@ -2327,9 +2326,8 @@ FOUND_AM:
                 /* Get hold of the index value */
                 ssize_t ixv = index->AsIntConCommon()->IconValue();
 
-                /* Scale the index if necessary */
-
 #if SCALED_ADDR_MODES
+                /* Scale the index if necessary */
                 if  (tmpMul) ixv *= tmpMul;
 #endif
 
@@ -2339,9 +2337,8 @@ FOUND_AM:
 
                     cns += ixv;
 
-                    /* There is no scaled operand any more */
-
 #if SCALED_ADDR_MODES
+                    /* There is no scaled operand any more */
                     mul = 0;
 #endif
                     rv2 = 0;
@@ -2926,8 +2923,8 @@ void                CodeGen::genGenerateCode(void * * codePtr,
 
 #ifndef LEGACY_BACKEND
 #ifdef DEBUG
-    // After code generation, dump the frame layout again. It should be the same as before code generation, if code generation
-    // hasn't touched it (it shouldn't!).
+    // After code generation, dump the frame layout again. It should be the same as before code generation, if code
+    // generation hasn't touched it (it shouldn't!).
     if  (verbose)
     {
         compiler->lvaTableDump();
@@ -3006,8 +3003,8 @@ void                CodeGen::genGenerateCode(void * * codePtr,
     trackedStackPtrsContig = !compiler->opts.compDbgEnC;
 #endif
 
-    /* We're done generating code for this function */
 #ifdef DEBUG
+    /* We're done generating code for this function */
     compiler->compCodeGenDone = true;
 #endif
 
@@ -3055,14 +3052,14 @@ void                CodeGen::genGenerateCode(void * * codePtr,
     *nativeSizeOfCode       = codeSize;
     compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
 
-//  printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
+    // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
 
     // Make sure that the x86 alignment and cache prefetch optimization rules
     // were obeyed.
 
     // Don't start a method in the last 7 bytes of a 16-byte alignment area
     //   unless we are generating SMALL_CODE
-    //noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
+    // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
 
     /* Now that the code is issued, we can finalize and emit the unwind data */
 
@@ -3109,12 +3106,12 @@ void                CodeGen::genGenerateCode(void * * codePtr,
 
     genReportEH();
 
-    // Create and store the GC info for this method.
 #ifdef JIT32_GCENCODER
 #ifdef DEBUG
     void* infoPtr = 
 #endif // DEBUG    
 #endif
+        // Create and store the GC info for this method.
         genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
 
 #ifdef  DEBUG
@@ -3869,14 +3866,12 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
         bool        processed;  // true after we've processed the argument (and it is in its final location)
         bool        circular;   // true if this register participates in a circular dependency loop.
 
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
         // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
         // the type of the lclVar in ways that are not ascertainable from lvType.
         // So, for that case we retain the type of the register in the regArgTab.
-        // In other cases, we simply use the type of the lclVar to determine the type of the register.
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
 
-        // This is the UNIX_AMD64 implementation
         var_types   getRegType(Compiler* compiler)
         {
             return type;  // UNIX_AMD64 implementation
@@ -3884,7 +3879,7 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
 
 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
 
-        // This is the implementation for all other targets
+        // In other cases, we simply use the type of the lclVar to determine the type of the register.
         var_types   getRegType(Compiler* compiler)
         {
             LclVarDsc varDsc = compiler->lvaTable[varNum];
@@ -4060,11 +4055,11 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
             regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
 
             noway_assert(regArgNum < argMax);
-            // we better not have added it already (there better not be multiple vars representing this argument register)
+            // We better not have added it already (there better not be multiple vars representing this argument register)
             noway_assert(regArgTab[regArgNum].slot == 0);
 
-            // Set the register type.
 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // Set the register type.
             regArgTab[regArgNum].type = regType;
 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
 
@@ -4098,7 +4093,7 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
                 {
                     noway_assert((regArgNum + i) < argMax);
 
-                    // we better not have added it already (there better not be multiple vars representing this argument register)
+                    // We better not have added it already (there better not be multiple vars representing this argument register)
                     noway_assert(regArgTab[regArgNum + i].slot == 0);
 
                     regArgTab[regArgNum + i].varNum = varNum;
@@ -4134,11 +4129,12 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
             regType = regArgTab[regArgNum + i].getRegType(compiler);
             regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
 
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
             // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
             // The type of the register depends on the classification of the first eightbyte 
             // of the struct. For information on classification refer to the System V x86_64 ABI at:
             // http://www.x86-64.org/documentation/abi.pdf
-#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
             assert((i > 0) || (regNum == varDsc->lvArgReg));
 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
             // Is the arg dead on entry to the method ?
@@ -4330,6 +4326,7 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
 
     /* At this point, everything that has the "circular" flag
      * set to "true" forms a circular dependency */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (regArgMaskLive)
@@ -4343,6 +4340,8 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
 
     // LSRA allocates registers to incoming parameters in order and will not overwrite
     // a register still holding a live parameter.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
     noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented.");
 #endif // LEGACY_BACKEND
@@ -4356,8 +4355,8 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
     {
         emitAttr        size;
 
-        // If this is the wrong register file, just continue.
 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        // If this is the wrong register file, just continue.
         if (regArgTab[argNum].type == TYP_UNDEF)
         {
             // This could happen if the reg in regArgTab[argNum] is of the other register file -
@@ -4381,9 +4380,8 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
         varNum = regArgTab[argNum].varNum; noway_assert(varNum < compiler->lvaCount);
         varDsc = compiler->lvaTable + varNum;
 
-        // If not a stack arg go to the next one
-
 #ifndef _TARGET_64BIT_
+        // If not a stack arg go to the next one
         if (varDsc->lvType == TYP_LONG)
         {
             if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
@@ -4398,6 +4396,7 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
         else 
 #endif // !_TARGET_64BIT_
         {
+            // If not a stack arg go to the next one
             if (!regArgTab[argNum].stackArg)
             {
                 continue;
@@ -4810,10 +4809,11 @@ void            CodeGen::genFnPrologCalleeRegArgs(regNumber  xtraReg,
 
             noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
 #ifndef _TARGET_64BIT_
-            //Right now we think that incoming arguments are not pointer sized.  When we eventually
-            //understand the calling convention... this still won't be true.  But maybe we'll have a better
-            //idea of how to ignore it.
 #ifndef _TARGET_ARM_
+            // Right now we think that incoming arguments are not pointer sized.  When we eventually
+            // understand the calling convention, this still won't be true. But maybe we'll have a better
+            // idea of how to ignore it.
+
             // On Arm, a long can be passed in register
             noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void *));
 #endif
@@ -5229,7 +5229,9 @@ void CodeGen::genCheckUseBlockInit()
 
             /* With compInitMem, all untracked vars will have to be init'ed */
             /* VSW 102460 - Do not force initialization of compiler generated temps,
-               unless they are untracked GC type or structs that contain GC pointers */
+                unless they are untracked GC type or structs that contain GC pointers */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if FEATURE_SIMD
             // TODO-1stClassStructs
             // This is here to duplicate previous behavior, where TYP_SIMD8 locals
@@ -5325,9 +5327,9 @@ void CodeGen::genCheckUseBlockInit()
         // we will mess up already computed offsets on the stack (for ESP frames)
         regSet.rsSetRegsModified(RBM_EDI);
 
+#ifdef UNIX_AMD64_ABI
         // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
         // In such case use R12 and R13 registers.
-#ifdef UNIX_AMD64_ABI
         if (maskCalleeRegArgMask & RBM_RCX)
         {
             regSet.rsSetRegsModified(RBM_R12);
@@ -5383,8 +5385,9 @@ void CodeGen::genPushCalleeSavedRegisters()
     assert(compiler->compGeneratingProlog);
 
 #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
-    // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack here.
-    // Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not here.
+    // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
+    // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
+    // here.
     regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
 #else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
     regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
@@ -5409,21 +5412,21 @@ void CodeGen::genPushCalleeSavedRegisters()
         rsPushRegs |= RBM_FPBASE;
 
     //
-    // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require 
+    // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
     // changes in GC suspension architecture.
     //
-    // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we generate 
-    // partially interruptible code for both the method that contains the tight loop with the call and the leaf method 
-    // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends 
-    // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never 
+    // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+    // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+    // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+    // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
     // be saved on the stack and the GC suspension would time out.
     //
-    // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of 
+    // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
     // the following to make GC suspension work in the above scenario:
     // - Make return address hijacking work even when lr is not saved on the stack.
     // - Generate fully interruptible code for loops that contains calls
     // - Generate fully interruptible code for leaf methods
-    // 
+    //
     // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
     // is not worth it.
     //
@@ -5460,19 +5463,24 @@ void CodeGen::genPushCalleeSavedRegisters()
         compiler->unwindPushMaskFloat(maskPushRegsFloat);
     }
 #elif defined(_TARGET_ARM64_)
-    // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and options. Case numbers
-    // in comments here refer to this document.
+    // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
+    // options. Case numbers in comments here refer to this document.
     //
     // For most frames, generate, e.g.:
-    //      stp fp,  lr,  [sp,-0x80]!   // predecrement SP with full frame size, and store FP/LR pair. Store pair ensures stack stays aligned.
-    //      stp r19, r20, [sp, 0x60]    // store at positive offset from SP established above, into callee-saved area at top of frame (highest addresses).
+    //      stp fp,  lr,  [sp,-0x80]!   // predecrement SP with full frame size, and store FP/LR pair. Store pair
+    //                                  // ensures stack stays aligned.
+    //      stp r19, r20, [sp, 0x60]    // store at positive offset from SP established above, into callee-saved area
+    //                                  // at top of frame (highest addresses).
     //      stp r21, r22, [sp, 0x70]
     //
     // Notes:
-    // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers at the top of the frame.
+    // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
+    //    at the top of the frame.
     // 2. If we save FP, then the first store is FP, LR.
-    // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only preserve their lower 8 bytes, by calling convention.
-    // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are consecutive.
+    // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
+    //    preserve their lower 8 bytes, by calling convention.
+    // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
+    //    consecutive.
     // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
 
     int totalFrameSize = genTotalFrameSize();
@@ -5487,7 +5495,8 @@ void CodeGen::genPushCalleeSavedRegisters()
         assert(maskSaveRegsFloat == RBM_NONE);
     }
 
-    int frameType = 0;  // This number is arbitrary, is defined below, and corresponds to one of the frame styles we generate based on various sizes.
+    int frameType = 0;  // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
+                        // generate based on various sizes.
     int calleeSaveSPDelta = 0;
     int calleeSaveSPDeltaUnaligned = 0;
 
@@ -5528,7 +5537,8 @@ void CodeGen::genPushCalleeSavedRegisters()
             //      sub sp,sp,#framesz
             //      stp fp,lr,[sp,#outsz]   // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
             //
-            // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with signed offset encoding.
+            // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
+            // signed offset encoding.
             //
             // After saving callee-saved registers, we establish the frame pointer with:
             //      add fp,sp,#outsz
@@ -5551,21 +5561,25 @@ void CodeGen::genPushCalleeSavedRegisters()
         {
             // Case 5 or 6.
             //
-            // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index to subtract from SP
-            // as the first instruction. It must also leave space for varargs registers to be stored. For example:
+            // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
+            // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
+            // stored. For example:
             //      stp r19,r20,[sp,#-96]!
             //      stp d8,d9,[sp,#16]
             //      ... save varargs incoming integer registers ...
-            // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be lower on the stack than
-            // the callee-saved registers (see lvaAlignFrame() for how we calculate alignment). So, if there is an odd number of
-            // callee-saved registers, we use (for example, with just one saved register):
+            // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
+            // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
+            // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
+            // register):
             //      sub sp,sp,#16
             //      str r19,[sp,#8]
-            // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be possible to have two 8-byte alignment
-            // padding words, one below the callee-saved registers, and one above them. If that is preferable, we could implement it.
+            // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
+            // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
+            // above them. If that is preferable, we could implement it.
             // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
             //
-            // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment padding from above).
+            // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
+            // padding from above).
             // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
             //
             // Generate:
@@ -5578,9 +5592,10 @@ void CodeGen::genPushCalleeSavedRegisters()
             //      stp fp,lr,[sp,#outsz]
             //      add fp,sp,#outsz
             //
-            // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can handle. And, once again,
-            // we might need to deal with #outsz that is not aligned to 16-bytes (i.e., STACK_ALIGN). So, in the case of large #outsz we will
-            // have an additional SP adjustment, using one of the following sequences:
+            // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
+            // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+            // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
+            // following sequences:
             //
             // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
             //
@@ -5591,7 +5606,8 @@ void CodeGen::genPushCalleeSavedRegisters()
             //
             // Or:
             //
-            //      sub sp,sp,roundUp(#remainingFrameSz2,16)    // if #remainingFrameSz2 is not 16-byte aligned (it is always guaranteed to be 8 byte aligned).
+            //      sub sp,sp,roundUp(#remainingFrameSz2,16)    // if #remainingFrameSz2 is not 16-byte aligned (it is
+            //                                                  // always guaranteed to be 8 byte aligned).
             //      stp fp,lr,[sp,#8]                           // it will always be #8 in the unaligned case
             //      add fp,sp,#8
             //      sub sp,sp,#outsz - #8
@@ -5770,6 +5786,7 @@ void CodeGen::genAllocLclFrame(unsigned  frameSize,
     else if (frameSize < compiler->getVeryLargeFrameSize())
     {
         // Frame size is (0x1000..0x3000)
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if CPU_LOAD_STORE_ARCH
         instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
@@ -5816,6 +5833,7 @@ void CodeGen::genAllocLclFrame(unsigned  frameSize,
         // complete since the tickles could cause a stack overflow, and we
         // need to be able to crawl the stack afterward (which means the
         // stack pointer needs to be known).
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_XARCH_
         bool pushedStubParam = false;
@@ -6395,7 +6413,8 @@ void            CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilo
                 else
                 {
                     // Generate:
-                    //      add sp,sp,#outsz                ; if #outsz is not 16-byte aligned, we need to be more careful
+                    //      add sp,sp,#outsz                ; if #outsz is not 16-byte aligned, we need to be more
+                    //                                      ; careful
                     int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
                     assert(spAdjustment3 > 0);
                     assert((spAdjustment3 % 16) == 0);
@@ -6419,7 +6438,8 @@ void            CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilo
 
                 // Generate:
                 //      ldp fp,lr,[sp,#outsz]
-                //      add sp,sp,#remainingFrameSz     ; might need to load this constant in a scratch register if it's large
+                //      add sp,sp,#remainingFrameSz     ; might need to load this constant in a scratch register if
+                //                                      ; it's large
 
                 genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0, nullptr);
             }
@@ -6491,8 +6511,8 @@ void            CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
         inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
     }
 
-    // For System V AMD64 calling convention ESI and EDI are volatile registers.
 #ifndef UNIX_AMD64_ABI
+    // For System V AMD64 calling convention ESI and EDI are volatile registers.
     if  (regSet.rsRegsModified(RBM_ESI))
     {
         popCount++;
@@ -6799,8 +6819,8 @@ void        CodeGen::genZeroInitFrame(int        untrLclHi,
 
         noway_assert(regSet.rsRegsModified(RBM_EDI));
 
-        // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
 #ifdef UNIX_AMD64_ABI
+        // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
         if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
         {
             noway_assert(regSet.rsRegsModified(RBM_R12));
@@ -6815,6 +6835,7 @@ void        CodeGen::genZeroInitFrame(int        untrLclHi,
             regTracker.rsTrackRegTrash(REG_R13);
         }
 #else // !UNIX_AMD64_ABI      
+        // For register arguments we may have to save ECX
         if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
         {
             noway_assert(regSet.rsRegsModified(RBM_ESI));
@@ -6836,8 +6857,8 @@ void        CodeGen::genZeroInitFrame(int        untrLclHi,
         instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
         instGen   (INS_r_stosd);
 
-        // Move back the argument registers
 #ifdef UNIX_AMD64_ABI
+        // Move back the argument registers
         if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
         {
             inst_RV_RV(INS_mov, REG_RCX, REG_R12);
@@ -6848,6 +6869,7 @@ void        CodeGen::genZeroInitFrame(int        untrLclHi,
             inst_RV_RV(INS_mov, REG_RDI, REG_R13);
         }
 #else // !UNIX_AMD64_ABI
+        // Move back the argument registers
         if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
         {
             inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
@@ -6960,7 +6982,7 @@ void        CodeGen::genZeroInitFrame(int        untrLclHi,
                 if  (!varTypeIsGC(tempThis->tdTempType()))
                     continue;
 
-//              printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
+                // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
 
                 inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
             }
@@ -7022,13 +7044,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg,
     {
         if (isFramePointerUsed())
         {
+#if defined(_TARGET_ARM_)
             // lvStkOffs is always valid for incoming stack-arguments, even if the argument
             // will become enregistered.
-            //
             // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
-#if defined(_TARGET_ARM_)
             noway_assert((2*REGSIZE_BYTES <= varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize+2*REGSIZE_BYTES));
 #else
+            // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+            // will become enregistered.
             noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
 #endif
         }
@@ -7102,14 +7125,13 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg,
 
         *pInitRegZeroed = false;
 
-        //  mov   reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
-        //  mov   dword ptr [frame.GSSecurityCookie], reg
-
 #if CPU_LOAD_STORE_ARCH
         instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
         regTracker.rsTrackRegTrash(reg);
 #else
+        //  mov   reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
+        //  mov   dword ptr [frame.GSSecurityCookie], reg
         getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC,
                                 reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         regTracker.rsTrackRegTrash(reg);
@@ -7253,13 +7275,11 @@ void CodeGen::genProfilingEnterCallback(regNumber  initReg,
 
     unsigned  saveStackLvl2 = genStackLevel;
 
+#if defined(_TARGET_X86_)
     // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
     // for x86 stack unwinding
 
-    //
     // Push the profilerHandle
-    //
-#if defined(_TARGET_X86_)
     if (compiler->compProfilerMethHndIndirected)
     {
         getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
@@ -7423,10 +7443,11 @@ void                CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORIN
 
 #else // LEGACY_BACKEND
 
+#if defined(_TARGET_X86_)
     //
     // Push the profilerHandle
     //
-#if defined(_TARGET_X86_)
+
     if (compiler->compProfilerMethHndIndirected)
     {
         getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
@@ -7449,7 +7470,11 @@ void                CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORIN
         compiler->fgPtrArgCntMax = 1;
     }
 #elif defined(_TARGET_ARM_)
-     // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock RBM_PROFILER_RET_USED always.
+    //
+    // Push the profilerHandle
+    //
+
+    // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock RBM_PROFILER_RET_USED always.
     regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
     noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);        
     regSet.rsLockReg(RBM_PROFILER_RET_USED);
@@ -7471,8 +7496,8 @@ void                CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORIN
     }
     else
     {
-        // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing profiler handle.
-        // Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. 
+        // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+        // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract. 
         if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
         {
             attr = EA_GCREF;
@@ -7808,8 +7833,10 @@ void                CodeGen::genFinalizeFrame()
     genCheckUseBlockInit();
 
     // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_X86_)
+
     if (compiler->compTailCallUsed)
     {
         // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
@@ -7839,6 +7866,7 @@ void                CodeGen::genFinalizeFrame()
     }
 #endif // DEBUG
 
+    // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
     if (compiler->opts.compDbgEnC)
     {
         // We always save FP.
@@ -8248,13 +8276,16 @@ void                CodeGen::genFnProlog()
             signed int loOffs = tempThis->tdTempOffs();
             signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
 
-            // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the previous
-            // frame pointer. Thus, stkOffs can't be zero.
-            // However, on amd64 there is no requirement to chain frame pointers.
+            // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
+            // previous frame pointer. Thus, stkOffs can't be zero.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if !defined(_TARGET_AMD64_)
+            // However, on amd64 there is no requirement to chain frame pointers.
+
             noway_assert(!isFramePointerUsed() || loOffs != 0);
 #endif // !defined(_TARGET_AMD64_)
-//          printf("    Untracked tmp at [EBP-%04X]\n", -stkOffs);
+            // printf("    Untracked tmp at [EBP-%04X]\n", -stkOffs);
 
             hasUntrLcl = true;
 
@@ -8379,12 +8410,12 @@ void                CodeGen::genFnProlog()
 
 #endif // _TARGET_AMD64_
 
+#ifdef _TARGET_ARM_
     /*-------------------------------------------------------------------------
      *
      * Now start emitting the part of the prolog which sets up the frame
      */
 
-#ifdef _TARGET_ARM_
     if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
     {
         inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
@@ -8450,6 +8481,7 @@ void                CodeGen::genFnProlog()
     // Subtract the local frame size from SP.
     //
     //-------------------------------------------------------------------------
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_ARM64_
     regMaskTP maskStackAlloc = RBM_NONE;
@@ -8644,13 +8676,13 @@ void                CodeGen::genFnProlog()
      * Take care of register arguments first
      */
 
+    RegState *regState;
+
 #ifndef LEGACY_BACKEND
     // Update the arg initial register locations.
     compiler->lvaUpdateArgsWithInitialReg();
 #endif // !LEGACY_BACKEND
 
-    RegState *regState;
-    
     FOREACH_REGISTER_FILE(regState)
     {
         if (regState->rsCalleeRegArgMaskLiveIn)
@@ -8776,8 +8808,8 @@ void                CodeGen::genFnProlog()
         printf("\n");
 #endif
 
-    // On non-x86 the VARARG cookie does not need any special treatment.
 #ifdef _TARGET_X86_
+    // On non-x86 the VARARG cookie does not need any special treatment.
 
     // Load up the VARARG argument pointer register so it doesn't get clobbered.
     // only do this if we actually access any statically declared args
@@ -9205,9 +9237,8 @@ void                CodeGen::genFnEpilog(BasicBlock* block)
 
         if  (compiler->compLclFrameSize)
         {
-            /* Add 'compiler->compLclFrameSize' to ESP */
-
 #ifdef _TARGET_X86_
+            /* Add 'compiler->compLclFrameSize' to ESP */
             /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
 
             if  ( (compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed )
@@ -9218,6 +9249,7 @@ void                CodeGen::genFnEpilog(BasicBlock* block)
             else
 #endif // _TARGET_X86
             {
+                /* Add 'compiler->compLclFrameSize' to ESP */
                 /* Generate "add esp, <stack-size>" */
                 inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
             }
@@ -9471,16 +9503,18 @@ void                CodeGen::genFnEpilog(BasicBlock* block)
  *  The ARM funclet prolog sequence is:
  *
  *     push {regs,lr}   ; We push the callee-saved regs and 'lr'.
- *                      ;   TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we actually use
- *                      ;         in the funclet. Currently, we save the same set of callee-saved regs calculated for the
- *                      ;         entire function.
+ *                      ;   TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
+ *                      ;         actually use in the funclet. Currently, we save the same set of callee-saved regs
+ *                      ;         calculated for the entire function.
  *     sub sp, XXX      ; Establish the rest of the frame.
  *                      ;   XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
  *                      ;   up to preserve stack alignment. If we push an odd number of registers, we also
  *                      ;   generate this, to keep the stack aligned.
  *
- *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
- *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ *     ;     filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ *     ;     epilog.
  *
  *     if (this is a filter funclet)
  *     {
@@ -9495,15 +9529,17 @@ void                CodeGen::genFnEpilog(BasicBlock* block)
  *          //        } catch(Exception) {
  *          //            throw new Exception();     // The exception thrown here ...
  *          //        }
- *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is still on the stack
+ *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is
+ *          //                                       // still on the stack
  *          //    } filter-handler {
  *          //    }
  *          //
- *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
- *          // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
- *          // create a main function PSP for any function with a filter.
+ *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
+ *          // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
+ *          // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
  *
- *          ldr r1, [r1 - PSP_slot_CallerSP_offset]     ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ *          ldr r1, [r1 - PSP_slot_CallerSP_offset]     ; Load the CallerSP of the main function (stored in the PSP of
+ *                                                      ; the dynamically containing funclet or function)
  *          str r1, [sp + PSP_slot_SP_offset]           ; store the PSP
  *          sub r11, r1, Function_CallerSP_to_FP_delta  ; re-establish the frame pointer
  *     }
@@ -9533,7 +9569,8 @@ void                CodeGen::genFnEpilog(BasicBlock* block)
  *      +=======================+ <---- Caller's SP
  *      |Callee saved registers |         
  *      |-----------------------|
- *      |Pre-spill regs space   |   // This is only necessary to keep the PSP slot at the same offset in function and funclet
+ *      |Pre-spill regs space   |   // This is only necessary to keep the PSP slot at the same offset 
+ *      |                       |   // in function and funclet
  *      |-----------------------|
  *      |        PSP slot       |
  *      |-----------------------|
@@ -9778,20 +9815,25 @@ void                CodeGen::genCaptureFuncletPrologEpilogInfo()
  *     push ebp
  *     push callee-saved regs
  *                      ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
- *                      ;         in the funclet. Currently, we save the same set of callee-saved regs calculated for the
- *                      ;         entire function.
+ *                      ;         in the funclet. Currently, we save the same set of callee-saved regs calculated for
+ *                      ;         the entire function.
  *     sub sp, XXX      ; Establish the rest of the frame.
  *                      ;   XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
  *                      ;   up to preserve stack alignment. If we push an odd number of registers, we also
  *                      ;   generate this, to keep the stack aligned.
  *
- *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
- *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ *     ;    filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ *     ;    epilog.
  *     ; Also, re-establish the frame pointer from the PSP.
  *
- *     mov rbp, [rcx + PSP_slot_InitialSP_offset]       ; Load the PSP (InitialSP of the main function stored in the PSP of the dynamically containing funclet or function)
+ *     mov rbp, [rcx + PSP_slot_InitialSP_offset]       ; Load the PSP (InitialSP of the main function stored in the
+ *                                                      ; PSP of the dynamically containing funclet or function)
  *     mov [rsp + PSP_slot_InitialSP_offset], rbp       ; store the PSP in our frame
- *     lea ebp, [rbp + Function_InitialSP_to_FP_delta]  ; re-establish the frame pointer of the parent frame. If Function_InitialSP_to_FP_delta==0, we don't need this instruction.
+ *     lea ebp, [rbp + Function_InitialSP_to_FP_delta]  ; re-establish the frame pointer of the parent frame. If
+ *                                                      ; Function_InitialSP_to_FP_delta==0, we don't need this
+ *                                                      ; instruction.
  *
  *  The epilog sequence is then:
  *
@@ -10231,6 +10273,7 @@ void                CodeGen::genGeneratePrologsAndEpilogs()
     genFnProlog();
 
     // Generate all the prologs and epilogs.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
 
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index 0b93c6d243..9f242b6d24 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -1584,7 +1584,8 @@ bool                CodeGen::genMakeIndAddrMode(GenTreePtr   addr,
             // Maybe I should just set "fold" true in the call to genMakeAddressable above.
             if (scaledIndex != NULL)
             {
-                int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal);  // If this truncates, that's OK -- multiple of 2^6.
+                int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
+                                                                                     // multiple of 2^6.
                 if (mul == 0)
                 {
                     mul = scale;
@@ -1925,8 +1926,9 @@ void                CodeGen::genRangeCheck(GenTreePtr  oper)
         GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
         lenOffset = arrLenExact->ArrLenOffset();
 
-        // We always load the length into a register on ARM and x64.
 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+        // We always load the length into a register on ARM and x64.
+
         // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
         // lengths, but the index expression *can* be native int (64-bits)
         arrRef = arrLenExact->ArrRef();
@@ -2233,9 +2235,9 @@ regMaskTP           CodeGen::genMakeAddrArrElem(GenTreePtr      arrElem,
 
         genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
 
+#if CPU_LOAD_STORE_ARCH
         /* Subtract the lower bound, and do the range check */
 
-#if CPU_LOAD_STORE_ARCH
         regNumber   valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
         getEmitter()->emitIns_R_AR(
                         INS_ldr, EA_4BYTE,
@@ -2259,6 +2261,7 @@ regMaskTP           CodeGen::genMakeAddrArrElem(GenTreePtr      arrElem,
                         index->gtRegNum,
                         valueReg);
 #else
+        /* Subtract the lower bound, and do the range check */
         getEmitter()->emitIns_R_AR(
                         INS_sub, EA_4BYTE,
                         index->gtRegNum,
@@ -2984,7 +2987,10 @@ void                CodeGen::genEmitGSCookieCheck(bool pushReg)
     if (compiler->gsGlobalSecurityCookieAddr == NULL)
     {
         // JIT case
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if CPU_LOAD_STORE_ARCH
+
         regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
         getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE,
                                 reg,
@@ -3105,7 +3111,9 @@ AGAIN:
                      ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
             {
                 /* Compare against any register to do null-check */
- #if defined(_TARGET_XARCH_)
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_XARCH_)
                 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
                 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
 #elif CPU_LOAD_STORE_ARCH
@@ -3421,10 +3429,11 @@ regMaskTP           CodeGen::WriteBarrier(GenTreePtr tgt,
     gcInfo.gcMarkRegSetByref(RBM_ARG_0);        // byref in ARG_0 
 
 #ifdef _TARGET_ARM_
+#if NOGC_WRITE_BARRIERS
     // Finally, we may be required to spill whatever is in the further argument registers
     // trashed by the call. The write barrier trashes some further registers --
     // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
-#if NOGC_WRITE_BARRIERS
+
     regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
 #else
     regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
@@ -3983,8 +3992,10 @@ void                CodeGen::genCondJumpLng(GenTreePtr     cond,
         genJccLongLo(cmp, jumpTrue, jumpFalse);
 
         /* Free up anything that was tied up by either operand */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if CPU_LOAD_STORE_ARCH
+
         // Fix 388442 ARM JitStress WP7
         regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
         genReleaseRegPair(op2);
@@ -5165,11 +5176,11 @@ void                CodeGen::genCodeForTreeLeaf(GenTreePtr tree,
         break;
 
     case GT_NO_OP:
+        // The VM does certain things with actual NOP instructions
+        // so generate something small that has no effect, but isn't
+        // a typical NOP
         if (tree->gtFlags & GTF_NO_OP_NO)
         {
-            // The VM does certain things with actual NOP instructions
-            // so generate something small that has no effect, but isn't
-            // a typical NOP
 #ifdef _TARGET_XARCH_
             // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
             instGen(INS_nop);
@@ -5374,6 +5385,7 @@ void                CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
 
         /* Argument was passed on the stack, but ended up in a register
          * Store it back to the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_64BIT_
         if (varDsc->TypeGet() == TYP_LONG)
@@ -5428,6 +5440,7 @@ void                CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
         noway_assert(!varDsc->lvRegister);
 
         /* Reload it from the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_64BIT_
         if (varDsc->TypeGet() == TYP_LONG)
@@ -5736,6 +5749,8 @@ void                CodeGen::genCodeForQmark(GenTreePtr tree,
 #if FEATURE_STACK_FP_X87
     /* Spill any register that hold partial values so that the exit liveness
        from sides is the same */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
 
@@ -6088,26 +6103,27 @@ void                CodeGen::genCodeForQmark(GenTreePtr tree,
         /* Generate jmp lab_done */
         lab_done  = genCreateTempLabel();
 
-        // We would like to know here if the else node is really going to generate
-        // code, as if it isn't, we're generating here a jump to the next instruction.
-        // What you would really like is to be able to go back and remove the jump, but
-        // we have no way of doing that right now.
-
 #ifdef DEBUG
         // We will use this to assert we don't emit instructions if we decide not to
         // do the jmp
         unsigned emittedInstructions = getEmitter()->emitInsCount;
         bool bSkippedJump = false;
 #endif
+        // We would like to know here if the else node is really going to generate
+        // code, as if it isn't, we're generating here a jump to the next instruction.
+        // What you would really like is to be able to go back and remove the jump, but
+        // we have no way of doing that right now.
+
         if (
 #if FEATURE_STACK_FP_X87
             !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
 #endif
              genIsEnregisteredIntVariable(thenNode) == reg)
         {
+#ifdef DEBUG
             // For the moment, fix this easy case (enregistered else node), which
             // is the one that happens all the time.
-#ifdef DEBUG
+
             bSkippedJump = true;
 #endif
         }
@@ -6660,9 +6676,8 @@ void                CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree,
 
         reg   = regSet.rsPickReg(needReg, bestReg);
 
-        /* Compute the value into the target: reg=op1*op2_icon */
-
 #if LEA_AVAILABLE
+        /* Compute the value into the target: reg=op1*op2_icon */
         if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
         {
             regNumber regSrc;
@@ -6680,6 +6695,7 @@ void                CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree,
         else
 #endif // LEA_AVAILABLE
         {
+            /* Compute the value into the target: reg=op1*op2_icon */
             inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
         }
 
@@ -6969,8 +6985,8 @@ DONE_LEA_ADD:
                 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
             }
 
-            /* Update the live set of register variables */
 #ifdef DEBUG
+            /* Update the live set of register variables */
             if (compiler->opts.varNames) genUpdateLife(tree);
 #endif
 
@@ -7471,14 +7487,14 @@ INCDEC_REG:
                 /* Make the target addressable for load/store */
                 addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
 
-    #if CPU_LOAD_STORE_ARCH
-                // We always load from memory then store to memory 
-    #else
+    #if !CPU_LOAD_STORE_ARCH
+                // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
                 /* For small types with overflow check, we need to
                     sign/zero extend the result, so we need it in a reg */
 
                 if (ovfl && genTypeSize(treeType) < sizeof(int))
-    #endif // CPU_LOAD_STORE_ARCH
+    #endif // !CPU_LOAD_STORE_ARCH
                 {
                     // Load op1 into a reg
 
@@ -7580,8 +7596,10 @@ INCDEC_REG:
         addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
 
         /* Compute the new value into the target register */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if CPU_HAS_BYTE_REGS
+
         // Fix 383833 X86 ILGEN
         regNumber  reg2; 
         if ((op2->gtFlags & GTF_REG_VAL) != 0) 
@@ -7740,14 +7758,13 @@ INCDEC_REG:
         addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
         regSet.rsLockUsedReg(addrReg);
 
-#if CPU_LOAD_STORE_ARCH
-        // We always load from memory then store to memory 
-#else
+#if !CPU_LOAD_STORE_ARCH
+        // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
         /* For small types with overflow check, we need to
             sign/zero extend the result, so we need it in a reg */
 
         if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // CPU_LOAD_STORE_ARCH
+#endif // !CPU_LOAD_STORE_ARCH
         {
             reg = regSet.rsPickReg();
             regSet.rsLockReg(genRegMask(reg));
@@ -7816,14 +7833,14 @@ INCDEC_REG:
         addrReg = genKeepAddressable(op1, addrReg);
         regSet.rsLockUsedReg(addrReg);
 
-#if CPU_LOAD_STORE_ARCH
-        // We always load from memory then store to memory 
-#else
+#if !CPU_LOAD_STORE_ARCH
+        // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory 
+        
         /* For small types with overflow check, we need to
             sign/zero extend the result, so we need it in a reg */
 
         if (ovfl && genTypeSize(treeType) < sizeof(int))
-#endif // CPU_LOAD_STORE_ARCH
+#endif // !CPU_LOAD_STORE_ARCH
         {
             reg = regSet.rsPickReg();
 
@@ -8549,10 +8566,11 @@ void                CodeGen::genCodeForAsgShift(GenTreePtr tree,
         /* Make sure the address registers are still here */
         addrReg = genKeepAddressable(op1, addrReg, op2Regs);
 
-        /* Perform the shift */
 #ifdef _TARGET_XARCH_
+        /* Perform the shift */
         inst_TT_CL(ins, op1);
 #else
+        /* Perform the shift */
         noway_assert(op2->gtFlags & GTF_REG_VAL);
         op2Regs = genRegMask(op2->gtRegNum);
 
@@ -8624,6 +8642,9 @@ void                CodeGen::genCodeForShift(GenTreePtr tree,
         // On ARM, until proven otherwise by performance numbers, just do the shift.
         // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
         // It's smaller than two "add reg, reg".
+
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef _TARGET_ARM_
         if  (oper == GT_LSH)
         {
@@ -8714,10 +8735,11 @@ DO_SHIFT_BY_CNS:
         noway_assert(op1->gtFlags & GTF_REG_VAL);
         reg = op1->gtRegNum;
 
-        /* Perform the shift */
 #ifdef _TARGET_ARM_
+        /* Perform the shift */
         getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
 #else
+        /* Perform the shift */
         inst_RV_CL(ins, reg);
 #endif
         genReleaseReg(op2);
@@ -8736,7 +8758,8 @@ DO_SHIFT_BY_CNS:
 
 /*****************************************************************************
  *
- *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree). Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
+ *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
+ *  Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
  */
 
 void                CodeGen::genCodeForRelop(GenTreePtr tree,
@@ -8943,9 +8966,9 @@ void                CodeGen::genCodeForCopyObj(GenTreePtr tree,
     emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
     emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
 
+#if CPU_USES_BLOCK_MOVE
     // Materialize the trees in the order desired
 
-#if CPU_USES_BLOCK_MOVE
     genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
     genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
     genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
@@ -9006,6 +9029,7 @@ void                CodeGen::genCodeForCopyObj(GenTreePtr tree,
 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
 #endif
 
+    // Materialize the trees in the order desired
     bool         helperUsed;
     regNumber    regDst;
     regNumber    regSrc;
@@ -9441,6 +9465,7 @@ void                CodeGen::genCodeForBlkOp(GenTreePtr tree,
             }
 
             /* Now take care of the remainder */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_64BIT_
             if (length > 4)
@@ -9532,7 +9557,7 @@ void                CodeGen::genCodeForBlkOp(GenTreePtr tree,
 #else // !CPU_USES_BLOCK_MOVE 
 
 #ifndef _TARGET_ARM_
-        // Currently only the ARM implementation is provided
+// Currently only the ARM implementation is provided
 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
 #endif
         //
@@ -10028,15 +10053,14 @@ void                CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
 
             regTracker.rsTrackRegTrash(reg);
 
-            /* Update the live set of register variables */
-
 #ifdef DEBUG
+            /* Update the live set of register variables */
             if (compiler->opts.varNames) genUpdateLife(tree);
 #endif
 
             /* Now we can update the register pointer information */
 
-//          genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+            // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
             gcInfo.gcMarkRegPtrVal(reg, treeType);
 
             genCodeForTree_DONE_LIFE(tree, reg);
@@ -10184,9 +10208,10 @@ void                CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
 
             }
 
+#ifdef PROFILING_SUPPORTED
             //The profiling hook does not trash registers, so it's safe to call after we emit the code for
             //the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
+
             if (compiler->compCurBB == compiler->genReturnBB)
             {
                 genProfilingLeaveCallback();
@@ -10537,9 +10562,10 @@ LockBinOpCommon:
 
                 if (ins == INS_add)
                 {
-                    genUpdateLife(tree); //If the operator was add, then we were called from the GT_LOCKADD
-                                         //case.  In that case we don't use the result, so we don't need to
-                                         //update anything.
+                    // If the operator was add, then we were called from the GT_LOCKADD
+                    // case.  In that case we don't use the result, so we don't need to
+                    // update anything.
+                    genUpdateLife(tree);
                 }
                 else
                 {
@@ -11266,7 +11292,7 @@ void                CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree,
     noway_assert(!(op1->gtFlags & GTF_REG_VAL));
 
     genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
-//    gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+    // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
     noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
 
     regTracker.rsTrackRegTrash(reg);       // reg does have foldable value in it
@@ -11388,6 +11414,8 @@ void                CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
     regMaskTP retMask = genCodeForCall(op2, true);
 
     // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
     assert((mask & (mask - 1)) == 0);
@@ -11522,13 +11550,13 @@ REG_VAR2:
 
         op1Reg = op1->gtRegVar.gtRegNum;
 
+#ifdef DEBUG
         /* Compute the RHS (hopefully) into the variable's register.
            For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
            as variables are kept alive everywhere. So we have to be
            careful if we want to compute the value directly into
            the variable's register. */
 
-#ifdef DEBUG 
         bool   needToUpdateRegSetCheckLevel;
         needToUpdateRegSetCheckLevel = false;
 #endif   
@@ -11715,10 +11743,9 @@ REG_VAR2:
 
         regGC = WriteBarrier(op1, op2, addrReg);
 
+        // Was assignment done by the WriteBarrier
         if  (regGC == RBM_NONE)
         {
-            // No, assignment was not done by the WriteBarrier
-
 #ifdef _TARGET_ARM_
             if (volat)
             {
@@ -12012,9 +12039,8 @@ NOT_SMALL:
                 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
             }
 
-            /* Update the current liveness info */
-
 #ifdef DEBUG
+            /* Update the current liveness info */
             if (compiler->opts.varNames) genUpdateLife(tree);
 #endif
 
@@ -12130,9 +12156,8 @@ NOT_SMALL:
 
             genReleaseReg(op2);
 
-            /* Update the current liveness info */
-
 #ifdef DEBUG
+            /* Update the current liveness info */
             if (compiler->opts.varNames) genUpdateLife(tree);
 #endif
 
@@ -12283,19 +12308,17 @@ void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
         {
 #if defined(_TARGET_XARCH_)
             // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
-            //
+            
             // Since this is a "call", evaluate the operands from right to left.  Don't worry about spilling
             // right now, just get the trees evaluated.
 
             // As a friendly reminder.  IL args are evaluated left to right.
-            //
+            
             GenTreePtr location  = tree->gtCmpXchg.gtOpLocation;     // arg1
             GenTreePtr value     = tree->gtCmpXchg.gtOpValue;        // arg2
             GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand;    // arg3
             regMaskTP addrReg;
 
-
-            // This little piggy (on the left) went to market.
             bool isAddr = genMakeIndAddrMode(location,
                                              tree,
                                              false, /* not for LEA */
@@ -12311,21 +12334,18 @@ void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
                 regSet.rsMarkRegUsed(location);
             }
 
-            // This little piggy (in the middle) went home.
             // We must have a reg for the Value, but it doesn't really matter which register. 
             
             // Try to avoid EAX and the address regsiter if possible.
             genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
 
-            // This little piggy (on the right) had roast beef
+#ifdef DEBUG
             // cmpxchg uses EAX as an implicit operand to hold the comparand
             // We're going to destroy EAX in this operation, so we better not be keeping 
             // anything important in it.
-
-#ifdef DEBUG
             if (RBM_EAX & regSet.rsMaskVars)
             {
-                //We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
+                // We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
                 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
                 {
                     const LclVarDsc & varDesc = compiler->lvaTable[varNum];
@@ -12337,11 +12357,10 @@ void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
                         continue;
                     if (varDesc.lvRegNum != REG_EAX)
                         continue;
-                    //I suppose I should technically check lvOtherReg.
+                    // We may need to check lvOtherReg.
 
-                    //OK, finally.  Let's see if this local goes dead.
-                    //If the variable isn't going dead during this tree, we've just trashed a local with
-                    //cmpxchg.
+                    // If the variable isn't going dead during this tree, we've just trashed a local with
+                    // cmpxchg.
                     noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
 
                     break;
@@ -12350,10 +12369,6 @@ void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
 #endif
             genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
 
-            //Oh, no more piggies.
-            //* Author's note.  I believe in bounty and chose to omit the piggy who got none.
-
-
             //By this point we've evaluated everything.  However the odds are that we've spilled something by
             //now.  Let's recover all the registers and force them to stay.
 
@@ -12400,7 +12415,6 @@ void                CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree,
 
             reg = REG_EAX;
 
-            //Until I try to optimize a cmp after a cmpxchg, just trash the flags for safety's sake.
             genFlagsEqualToNone();
             break;
 #else // not defined(_TARGET_XARCH_)
@@ -12615,9 +12629,9 @@ void                CodeGen::genCodeForBBlist()
 
     regSet.rsSpillBeg();
 
-    /* Initialize the line# tracking logic */
-
 #ifdef DEBUGGING_SUPPORT
+    /* Initialize the line# tracking logic */
+    
     if (compiler->opts.compScopeInfo)
     {
         siInit();
@@ -12878,6 +12892,7 @@ void                CodeGen::genCodeForBBlist()
         }
 
         /* Start a new code output block */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
 #if defined(_TARGET_ARM_)
@@ -12888,18 +12903,17 @@ void                CodeGen::genCodeForBBlist()
         {
             assert(block->bbFlags & BBF_JMP_TARGET);
 
-            // Create a label that we'll use for computing the start of an EH region, if this block is
-            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
-            // determining the EH regions, then this NOP would end up outside of the region, if this
-            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
-            // would be executed, which we would prefer not to do.
-
 #ifdef  DEBUG
             if (compiler->verbose)
             {
                 printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
             }
 #endif
+            // Create a label that we'll use for computing the start of an EH region, if this block is
+            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+            // determining the EH regions, then this NOP would end up outside of the region, if this
+            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+            // would be executed, which we would prefer not to do.
 
             block->bbUnwindNopEmitCookie = getEmitter()->emitAddLabel(
                                      gcInfo.gcVarPtrSetCur,
@@ -13009,6 +13023,7 @@ void                CodeGen::genCodeForBBlist()
          *  Generate code for each statement-tree in the block
          *
          */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
         if (block->bbFlags & BBF_FUNCLET_BEG)
@@ -13501,8 +13516,9 @@ REG_VAR_LONG:
         regPair  = regSet.rsPickRegPair(needReg);
 
         /* Load the value into the registers */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if !   CPU_HAS_FP_SUPPORT
+#if !CPU_HAS_FP_SUPPORT
         if  (oper == GT_CNS_DBL)
         {
             noway_assert(sizeof(__int64) == sizeof(double));
@@ -13594,7 +13610,6 @@ REG_VAR_LONG:
             loadIns = ins_Load(TYP_INT);   // INS_ldr
             regLo   = genRegPairLo(regPair);
             regHi   = genRegPairHi(regPair); 
-            // assert(regLo != regHi);  // regpair property
 
 #if CPU_LOAD_STORE_ARCH
             {
@@ -14748,10 +14763,9 @@ SIMPLE_OR_LONG:
             regTracker.rsTrackRegTrash(regLo);
             regTracker.rsTrackRegTrash(regHi);
 
+            /* Unary "neg": negate the value  in the register pair */
             if  (oper == GT_NEG)
             {
-                /* Unary "neg": negate the value  in the register pair */
-
 #ifdef _TARGET_ARM_
 
                 // ARM doesn't have an opcode that sets the carry bit like
@@ -14905,7 +14919,7 @@ SIMPLE_OR_LONG:
                         }
                         else
                         {
-//                          printf("Overlap: needReg = %08X\n", needReg);
+                            // printf("Overlap: needReg = %08X\n", needReg);
 
                             // Reg-prediction won't allow this
                             noway_assert((regSet.rsMaskVars & addrReg) == 0);
@@ -15108,12 +15122,12 @@ USE_SAR_FOR_CAST:
 
                         regPair = gen2regs2pair(regLo, regHi);
 
-                        /* Copy the lo32 bits from regLo to regHi and sign-extend it */
-
 #ifdef _TARGET_ARM_
+                        /* Copy the lo32 bits from regLo to regHi and sign-extend it */
                         // Use one instruction instead of two
                         getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
 #else
+                        /* Copy the lo32 bits from regLo to regHi and sign-extend it */
                         inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
                         inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
 #endif
@@ -15253,12 +15267,10 @@ USE_SAR_FOR_CAST:
             NYI("64-bit return");
 #endif
 
+#ifdef PROFILING_SUPPORTED
             //The profiling hook does not trash registers, so it's safe to call after we emit the code for
             //the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
-            /* XXX Thu 7/5/2007
-             * Oh look.  More cloned code from the regular processing of GT_RETURN.
-             */
+
             if (compiler->compCurBB == compiler->genReturnBB)
             {
                 genProfilingLeaveCallback();
@@ -15655,12 +15667,10 @@ void                CodeGen::genCodeForTreeFlt(GenTreePtr tree,
             genPInvokeMethodEpilog();
         }
 
+#ifdef PROFILING_SUPPORTED
         //The profiling hook does not trash registers, so it's safe to call after we emit the code for
         //the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
-        /* XXX Thu 7/5/2007
-         * Oh look.  More cloned code from the regular processing of GT_RETURN.
-         */
+
         if (compiler->compCurBB == compiler->genReturnBB)
         {
             genProfilingLeaveCallback();
@@ -15912,8 +15922,8 @@ void        CodeGen::genEmitHelperCall(unsigned    helper,
 
     void * addr = NULL, **pAddr = NULL;
 
-    // Don't ask VM if it hasn't requested ELT hooks 
 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
+    // Don't ask VM if it hasn't requested ELT hooks 
     if (!compiler->compProfilerHookNeeded && 
         compiler->opts.compJitELTHookEnabled &&
         (helper == CORINFO_HELP_PROF_FCN_ENTER ||
@@ -16017,7 +16027,7 @@ regMaskTP           CodeGen::genPushRegs(regMaskTP regs, regMaskTP * byrefRegs,
     *byrefRegs = RBM_NONE;
     *noRefRegs = RBM_NONE;
 
-//  noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+    // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
 
     if (regs == RBM_NONE)
         return RBM_NONE;
@@ -16095,7 +16105,7 @@ void                CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, reg
 
     noway_assert((regs & byrefRegs) == byrefRegs);
     noway_assert((regs & noRefRegs) == noRefRegs);
-//  noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+    // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
     noway_assert((regs & (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur)) == RBM_NONE);
 
     noway_assert(genTypeStSz(TYP_REF)   == genTypeStSz(TYP_INT));
@@ -17160,10 +17170,10 @@ size_t              CodeGen::genPushArgList(GenTreePtr     call)
             /* This is passed as a pointer-sized integer argument */
 
             genCodeForTree(curr, 0);
+            
+            // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
             if (curr->gtFlags & GTF_LATE_ARG)
             {
-                // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
-
 #ifdef _TARGET_ARM_
                 argSize = 0;  // nothing is passed on the stack
 #endif
@@ -17171,7 +17181,7 @@ size_t              CodeGen::genPushArgList(GenTreePtr     call)
             else
             {
                 // The arg is passed in the outgoing argument area of the stack frame
-                //
+                
                 assert(curr->gtOper != GT_ASG);  // GTF_LATE_ARG should be set if this is the case
                 assert(curr->gtFlags & GTF_REG_VAL);  // should be enregistered after genCodeForTree(curr, 0)
                 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
@@ -17295,7 +17305,8 @@ DEFERRED:
                     Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
 
                     if (varDsc->lvPromoted && 
-                        promotionType==Compiler::PROMOTION_TYPE_INDEPENDENT)  // Otherwise it is guaranteed to live on stack.
+                        promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
+                                                                               // on stack.
                     {
                         assert(!varDsc->lvAddrExposed);  // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.  
                         promotedStructLocalVarDesc = varDsc;
@@ -17869,8 +17880,8 @@ bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr        arg,
         {
             // The current slot should contain more than one field.
             // We'll construct a word in memory for the slot, then load it into a register.
-            // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current slot,
-            // in which case we'll just skip this loop altogether.)
+            // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
+            // slot, in which case we'll just skip this loop altogether.)
             while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
             {
                 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
@@ -17952,8 +17963,8 @@ bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr        arg,
                     fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
                 }
             }
-            // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to write to
-            // an argument register, do so.
+            // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
+            // write to an argument register, do so.
             if (curRegNum != MAX_REG_ARG)
             {
                 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
@@ -18184,10 +18195,10 @@ void CodeGen::SetupLateArgs(GenTreePtr call)
             // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
             // table entry for the promoted struct local.  As we fill slots with the contents of a
             // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
-            // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're working
-            // on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're done),
-            // and "nextPromotedStructFieldVar" will be the local
-            // variable number of the next field variable to be copied.
+            // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
+            // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
+            // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
+            // to be copied.
             LclVarDsc* promotedStructLocalVarDesc = NULL;
             unsigned   bytesOfNextSlotOfCurPromotedStruct = 0;  // Size of slot.
             unsigned   nextPromotedStructFieldVar = BAD_VAR_NUM;
@@ -18308,8 +18319,8 @@ void CodeGen::SetupLateArgs(GenTreePtr call)
                 {
                     nextPromotedStructFieldVar++;
                 }
-                // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the first stack slot is after
-                // the last slot.
+                // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
+                // first stack slot is after the last slot.
                 assert(nextPromotedStructFieldVar < fieldVarLim|| firstStackSlot >= slots);
             }
                         
@@ -18474,7 +18485,8 @@ void CodeGen::SetupLateArgs(GenTreePtr call)
                 regSet.rsMarkRegFree(genRegMask(regSrc));
             }
             
-            if (regNum != REG_STK && promotedStructLocalVarDesc == NULL)  // If promoted, we already declared the regs used.
+            if (regNum != REG_STK && promotedStructLocalVarDesc == NULL)  // If promoted, we already declared the regs
+                                                                          // used.
             {
                 arg->gtFlags |= GTF_REG_VAL;
                 for (unsigned i = 1; i < firstStackSlot; i++)
@@ -19054,14 +19066,14 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
     args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize)
                                                :  argSize;
 
+#ifdef PROFILING_SUPPORTED
+
     /*-------------------------------------------------------------------------
      *  Generate the profiling hooks for the call
      */
 
     /* Treat special cases first */
 
-#ifdef PROFILING_SUPPORTED
-
     /* fire the event at the call site */
     /* alas, right now I can only handle calls via a method handle */
     if (compiler->compIsProfilerHookNeeded() &&
@@ -19073,6 +19085,8 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
         //
         // Push the profilerHandle
         //
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_X86_
         regMaskTP byrefPushedRegs;
         regMaskTP norefPushedRegs;
@@ -19107,8 +19121,8 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
         // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
         // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
         //
-        // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want to disturb them
-        // and hence argument registers are locked here.
+        // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
+        // to disturb them and hence argument registers are locked here.
         regMaskTP usedMask = RBM_NONE;
         regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
         
@@ -19485,7 +19499,7 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
 
             noway_assert(callType == CT_USER_FUNC);
 
-            vptrReg   = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
+            vptrReg   = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
             vptrMask  = genRegMask(vptrReg);
 
             /* The register no longer holds a live pointer value */
@@ -19905,6 +19919,8 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
                     // a single indirection.
                     //
                     // For tailcalls we place the target address in REG_TAILCALL_ADDR
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if CPU_LOAD_STORE_ARCH
                     {
                         regNumber indReg = REG_TAILCALL_ADDR;    
@@ -19925,6 +19941,8 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
                     // a double indirection.
                     //
                     // For tailcalls we place the target address in REG_TAILCALL_ADDR
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if CPU_LOAD_STORE_ARCH
                     {
                         regNumber indReg = REG_TAILCALL_ADDR;    
@@ -19959,6 +19977,7 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
                     //
                     // The vast majority of calls end up here....  Wouldn't
                     // it be nice if they all did!
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef _TARGET_ARM_
                     if (!arm_Valid_Imm_For_BL((ssize_t)addr))
                     {
@@ -20005,8 +20024,10 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
                     // Non-virtual direct calls to addresses accessed by
                     // a single indirection.
                     //
-#if CPU_LOAD_STORE_ARCH
+
                     // Load the address into a register, load indirect and call  through a register
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+#if CPU_LOAD_STORE_ARCH
                     indCallReg = regSet.rsGrabReg(RBM_ALLINT);     // Grab an available register to use for the CALL indirection
 
                     instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
@@ -20212,10 +20233,10 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
         gcInfo.gcRegByrefSetCur &= ~(curArgMask);
     }
 
+#if !FEATURE_STACK_FP_X87
     //-------------------------------------------------------------------------
     // free up the FP args
 
-#if !FEATURE_STACK_FP_X87
     for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
     {
         regNumber argRegNum  = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
@@ -20296,8 +20317,10 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
     genStackLevel = saveStackLvl;
 
     /* No trashed registers may possibly hold a pointer at this point */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef  DEBUG
+
     regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) & ~regSet.rsMaskVars & ~vptrMask;
     if  (ptrRegs)
     {
@@ -20428,6 +20451,7 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
     }
 
     /* Are we supposed to pop the arguments? */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_X86_)
     if (call->gtFlags & GTF_CALL_UNMANAGED)
@@ -20605,8 +20629,8 @@ regMaskTP           CodeGen::genCodeForCall(GenTreePtr  call,
             UnspillFloat(call);
         }
 
-        // Mark as free
 #if FEATURE_STACK_FP_X87
+        // Mark as free
         regSet.SetUsedRegFloat(call, false);
 #endif
     }
@@ -20844,8 +20868,8 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
     var_types   type   = genActualType(size->gtType);
     emitAttr    easz   = emitTypeSize(type);
 
+#ifdef DEBUG
     // Verify ESP
-    #ifdef DEBUG
     if (compiler->opts.compStackCheckOnRet)
     {
         noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
@@ -20857,7 +20881,7 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
         getEmitter()->emitIns(INS_BREAKPOINT);
         genDefineTempLabel(esp_check);
     }
-    #endif
+#endif
 
     noway_assert(isFramePointerUsed());
     noway_assert(genStackLevel == 0); // Can't have anything on the stack
@@ -20987,9 +21011,9 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
 
         if (compiler->info.compInitMem)
         {
+#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
             // regCnt will be the number of pointer-sized words to locAlloc
             // If the shift right won't do the 'and' do it here
-#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
             inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
 #endif
             // --- shr regCnt, 2 ---
@@ -21012,6 +21036,7 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
         /* Since we have to zero out the allocated memory AND ensure that
            ESP is always valid by tickling the pages, we will just push 0's
            on the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_ARM_)
         regNumber   regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
@@ -21080,7 +21105,10 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
                   mov   ESP, REG
              end:
           */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_ARM_
+
         inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
         inst_JMP(EJ_hs, loop);
 #else
@@ -21104,6 +21132,7 @@ regNumber           CodeGen::genLclHeap(GenTreePtr size)
         // note that it has to be done BEFORE the update of ESP since
         // ESP might already be on the guard page.  It is OK to leave
         // the final value of ESP on the guard page
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if CPU_LOAD_STORE_ARCH
         getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
@@ -21195,9 +21224,9 @@ void        CodeGen::genSetScopeInfo  (unsigned                 which,
     unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
     noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
 
+#ifdef _TARGET_X86_
     // Non-x86 platforms are allowed to access all arguments directly
     // so we don't need this code.
-#ifdef _TARGET_X86_
 
     // Is this a varargs function?
 
@@ -21750,13 +21779,14 @@ regMaskTP           CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
         regTCB = REG_PINVOKE_TCB;
     }
 
-    /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
-
-    // TODO-ARM-CQ: should we inline TlsGetValue here?
 #if !defined(_TARGET_ARM_)
 #define WIN_NT_TLS_OFFSET (0xE10)
 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
 
+    /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
+
+    // TODO-ARM-CQ: should we inline TlsGetValue here?
+
     if (threadTlsIndex < 64)
     {
         //  mov  reg, FS:[0xE10+threadTlsIndex*4]
@@ -22182,8 +22212,6 @@ regNumber          CodeGen::genPInvokeCallProlog(LclVarDsc*            frameList
                               pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
 #endif // _TARGET_X86_
 
-    /* mov   dword ptr [frame.callSiteReturnAddress], label */
-    
 #if CPU_LOAD_STORE_ARCH
     regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
     getEmitter()->emitIns_J_R (INS_adr,
@@ -22197,6 +22225,8 @@ regNumber          CodeGen::genPInvokeCallProlog(LclVarDsc*            frameList
                              compiler->lvaInlinedPInvokeFrameVar,
                              pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
 #else // !CPU_LOAD_STORE_ARCH
+    /* mov   dword ptr [frame.callSiteReturnAddress], label */
+
     getEmitter()->emitIns_J_S (ins_Store(TYP_I_IMPL),
                              EA_PTRSIZE,
                              returnLabel,
@@ -22277,6 +22307,8 @@ void                CodeGen::genPInvokeCallEpilog(LclVarDsc *  frameListRoot,
     else
     {
         /* mov   reg2, dword ptr [tcb address]    */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_ARM_
         reg2 = REG_R2;
 #else
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 6f0782dc66..78a8db72ab 100755
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -364,9 +364,9 @@ void                CodeGen::genCodeForBBlist()
 
     regSet.rsSpillBeg();
 
+#ifdef DEBUGGING_SUPPORT
     /* Initialize the line# tracking logic */
 
-#ifdef DEBUGGING_SUPPORT
     if (compiler->opts.compScopeInfo)
     {
         siInit();
@@ -455,9 +455,9 @@ void                CodeGen::genCodeForBBlist()
         genUpdateLife(block->bbLiveIn);
 
         // Even if liveness didn't change, we need to update the registers containing GC references.
-        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
-        // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
-        // That would require handling the changes in recordVarLocationsAtStartOfBB().
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
 
         regMaskTP newLiveRegSet = RBM_NONE;
         regMaskTP newRegGCrefSet = RBM_NONE;
@@ -617,6 +617,7 @@ void                CodeGen::genCodeForBBlist()
          *  Generate code for each statement-tree in the block
          *
          */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
         if (block->bbFlags & BBF_FUNCLET_BEG)
@@ -3221,7 +3222,8 @@ CodeGen::genLclHeap(GenTreePtr tree)
     //      Nothing to pop off from the stack.
     if  (compiler->lvaOutgoingArgSpaceSize > 0)
     {
-        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+                                                                        // aligned
         inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
         stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
     }
@@ -3273,10 +3275,11 @@ CodeGen::genLclHeap(GenTreePtr tree)
         }
 
         if (doNoInitLessThanOnePageAlloc)
-        {               
+        {
             // Since the size is less than a page, simply adjust ESP.
             // ESP might already be in the guard page, so we must touch it BEFORE
             // the alloc, not after.
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_X86_
             // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
@@ -5304,7 +5307,8 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, regNumber
     // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
     if (op1->gtRegNum != dstReg)
     {
-        // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area for tail calls) in RDI.
+        // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+        // for tail calls) in RDI.
         // Destination is always local (on the stack) - use EA_PTRSIZE.
         getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
     }
@@ -6519,10 +6523,11 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
             // Move the values into the right registers.
             // 
 
-            // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
-            // Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another basic block may not be expecting it.
-            // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
-            // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+            // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
+            // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
+            // basic block may not be expecting it. Therefore manually update life of argReg.  Note that GT_JMP marks
+            // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
+            // genCodeForBBList().
             if (type0 != TYP_UNKNOWN)
             {
                 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
@@ -6583,9 +6588,10 @@ void CodeGen::genJmpMethod(GenTreePtr jmp)
         }
 
 #if FEATURE_VARARG && defined(_TARGET_AMD64_)
-        // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.        
-        // This is due to the AMD64 ABI which requires floating point values passed to varargs functions to be passed in
-        // both integer and floating point registers. It doesn't apply to x86, which passes floating point values on the stack.
+        // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
+        // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
+        // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
+        // values on the stack.
         if (compiler->info.compIsVarArgs)
         {
             regNumber intArgReg;
@@ -7221,8 +7227,8 @@ void        CodeGen::genJTrueLong(GenTreePtr treeNode)
 //
 //    Opcode          Amd64 equivalent         Comment
 //    ------          -----------------        --------
-//    BLT.UN(a,b)      ucomis[s|d] a, b        Jb branches if CF=1, which means either a<b or unordered from the above table.
-//                     jb
+//    BLT.UN(a,b)      ucomis[s|d] a, b        Jb branches if CF=1, which means either a<b or unordered from the above
+//                     jb                      table
 //
 //    BLT(a,b)         ucomis[s|d] b, a        Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
 //                     ja
@@ -7494,10 +7500,9 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
     }
     else
     {       
+#ifdef DEBUG
         // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
         // These are represented by two conditions.
-
-#ifdef DEBUG
         if (tree->gtOper == GT_EQ)
         {
             // This must be an ordered comparison.
@@ -8836,8 +8841,9 @@ CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
                 {
                     // We have a GC (byref or ref) pointer
                     // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
-                    // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
-                    // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
+                    // but the logic for emitting a GC info record is not available (it is internal for the emitter
+                    // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+                    // instGen(INS_movsq); and emission of gc info.
 
                     var_types memType;
                     if (gcPtrs[i] == TYPE_GC_REF)
@@ -9275,6 +9281,7 @@ void                CodeGen::genAmd64EmitterUnitTests()
     //
     // Loads
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef ALL_XARCH_EMITTER_UNIT_TESTS
 #ifdef FEATURE_AVX_SUPPORT
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 15620f43fc..1f8a319784 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -554,7 +554,7 @@ var_types    Compiler::getPrimitiveTypeForStruct( unsigned  structSize,
         }
         break;
 
-#ifndef _TARGET_XARCH_   
+#ifndef _TARGET_XARCH_
     case 5:
     case 6:
     case 7:
@@ -823,6 +823,7 @@ var_types  Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
         {
             // We have a (large) struct that can't be replaced with a "primitive" type
             // and can't be passed in multiple registers
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
 
@@ -1518,9 +1519,9 @@ void                Compiler::compDisplayStaticSizes(FILE* fout)
 #if FEATURE_STACK_FP_X87
     fprintf(fout, "Offset / size of gtFPlvl        = %2u / %2u\n", offsetof(GenTree, gtFPlvl       ), sizeof(gtDummy->gtFPlvl       ));
 #endif // FEATURE_STACK_FP_X87
-// TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree class (see https://github.com/dotnet/coreclr/pull/493)
-//    fprintf(fout, "Offset / size of gtCostEx       = %2u / %2u\n", offsetof(GenTree, _gtCostEx     ), sizeof(gtDummy->_gtCostEx     ));
-//    fprintf(fout, "Offset / size of gtCostSz       = %2u / %2u\n", offsetof(GenTree, _gtCostSz     ), sizeof(gtDummy->_gtCostSz     ));
+    // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree class (see https://github.com/dotnet/coreclr/pull/493)
+    // fprintf(fout, "Offset / size of gtCostEx       = %2u / %2u\n", offsetof(GenTree, _gtCostEx     ), sizeof(gtDummy->_gtCostEx     ));
+    // fprintf(fout, "Offset / size of gtCostSz       = %2u / %2u\n", offsetof(GenTree, _gtCostSz     ), sizeof(gtDummy->_gtCostSz     ));
     fprintf(fout, "Offset / size of gtFlags        = %2u / %2u\n", offsetof(GenTree, gtFlags       ), sizeof(gtDummy->gtFlags       ));
     fprintf(fout, "Offset / size of gtVNPair       = %2u / %2u\n", offsetof(GenTree, gtVNPair      ), sizeof(gtDummy->gtVNPair      ));
     fprintf(fout, "Offset / size of gtRsvdRegs     = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs    ), sizeof(gtDummy->gtRsvdRegs    ));
@@ -2132,6 +2133,7 @@ const   char *      Compiler::compRegNameForSize(regNumber reg, size_t size)
     if (size == 0 || size >= 4)
         return compRegVarName(reg, true);
 
+    // clang-format off
     static
     const char  *   sizeNames[][2] =
     {
@@ -2154,6 +2156,7 @@ const   char *      Compiler::compRegNameForSize(regNumber reg, size_t size)
         { "r15b", "r15w" },
 #endif // _TARGET_AMD64_
     };
+    // clang-format on
 
     assert(isByteReg (reg));
     assert(genRegMask(reg) & RBM_BYTE_REGS);
@@ -2245,6 +2248,8 @@ void Compiler::compSetProcessor()
     //
     // Processor specific optimizations
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_AMD64_
     opts.compUseFCOMI   = false;
     opts.compUseCMOV    = true;
@@ -2419,8 +2424,8 @@ void                Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
     opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
     opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
     opts.compDbgEnC  = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC)  != 0;
-    // We never want to have debugging enabled when regenerating GC encoding patterns
 #if REGEN_SHORTCUTS || REGEN_CALLPAT
+    // We never want to have debugging enabled when regenerating GC encoding patterns
     opts.compDbgCode = false;
     opts.compDbgInfo = false;
     opts.compDbgEnC  = false;
@@ -3136,8 +3141,9 @@ void                Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
         compProfilerMethHndIndirected = false;
     }
 
-    // Right now this ELT hook option is enabled only for arm and amd64
 #if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+    // Right now this ELT hook option is enabled only for arm and amd64
+
     // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler 
     // hooks in the first place. That is, Override VM only if it hasn't asked for a 
     // profiler callback for this method.
@@ -4196,6 +4202,7 @@ void                 Compiler::compCompile(void * * methodCodePtr,
 
     // IMPORTANT, after this point, every place where tree topology changes must redo evaluation
     // order (gtSetStmtInfo) and relink nodes (fgSetStmtSeq) if required.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     // Now  we have determined the order of evaluation and the gtCosts for every node.
@@ -4370,6 +4377,7 @@ void                 Compiler::compCompile(void * * methodCodePtr,
 #endif // _TARGET_ARMARCH_
 
     /* Assign registers to variables, etc. */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef LEGACY_BACKEND
     ///////////////////////////////////////////////////////////////////////////////
@@ -4640,7 +4648,7 @@ int           Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
     }
 #endif // FUNC_INFO_LOGGING
 
-//  if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
+    // if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
 
     info.compCompHnd     = compHnd;
     info.compMethodHnd   = methodHnd;
@@ -4650,9 +4658,9 @@ int           Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
     // with an ARM-targeting "altjit").
     info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
 
+#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
     // ToDo: This code is to allow us to run UNIX codegen on Windows for now. Remove when appropriate.
     // Make sure that the generated UNIX altjit code is skipped on Windows. The static jit codegen is used to run.
-#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
     info.compMatchedVM = false;
 #endif // UNIX_AMD64_ABI
 
@@ -4948,11 +4956,13 @@ void Compiler::compCompileFinish()
         static bool headerPrinted = false;
         if (!headerPrinted)
         {
+            // clang-format off
             headerPrinted = true;
             printf("         |  Profiled  | Exec-    |   Method has    |   calls   | Num |LclV |AProp| CSE |   Reg   |bytes | %3s code size | \n", Target::g_tgtCPUName);
             printf(" mdToken |     |  RGN |    Count | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt |  Alloc  |  IL  |   HOT |  COLD | method name \n");
             printf("---------+-----+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-------+-----------\n");
-            //      06001234 | PRF |  HOT |      219 | EH | ebp | LOOP |  15 |   6 |  12 |  17 |  12 |   8 |   28 p2 |  145 |   211 |   123 | System.Example(int)  
+            //      06001234 | PRF |  HOT |      219 | EH | ebp | LOOP |  15 |   6 |  12 |  17 |  12 |   8 |   28 p2 |  145 |   211 |   123 | System.Example(int)
+            // clang-format on
         }
 
         printf("%08X | ", currentMethodToken);
@@ -6018,7 +6028,7 @@ START:
             pParam->pComp->prevCompiler = JitTls::GetCompiler();
             JitTls::SetCompiler(pParam->pComp);
 
-///PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
+// PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
 #if defined(_PREFAST_) || defined(_PREFIX_)             
             PREFIX_ASSUME(pParam->pComp != NULL);
 #else
@@ -7134,7 +7144,6 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
 // Completes the timing of the current method, and adds it to "sum".
 void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
 {
-    // Otherwise...
 #ifdef DEBUG
     unsigned __int64 totCycles2 = 0;
     for (int i = 0; i < PHASE_NUMBER_OF; i++)
@@ -7417,8 +7426,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  *      cReach,      dReach         : Display all block reachability (call fgDispReach()).
  *      cDoms,       dDoms          : Display all block dominators (call fgDispDoms()).
  *      cLiveness,   dLiveness      : Display per-block variable liveness (call fgDispBBLiveness()).
- *      cCVarSet,    dCVarSet       : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable indices.
- *                                    These are converted to variable numbers and sorted. (Calls dumpConvertedVarSet()).
+ *      cCVarSet,    dCVarSet       : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
+ *                                    indices. These are converted to variable numbers and sorted. (Calls
+ *                                    dumpConvertedVarSet()).
  *
  *      cFuncIR,     dFuncIR        : Display all the basic blocks of a function in linear IR form.
  *      cLoopIR,     dLoopIR        : Display a loop in linear IR form.
@@ -8171,6 +8181,7 @@ int cTreeFlagsIR(Compiler *comp, GenTree *tree)
         chars += printf("flags=");
 
         // Node flags
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(DEBUG)
 #if SMALL_TREE_NODES
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 3eb2fdcb83..fdef4df228 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -422,10 +422,12 @@ public:
 
 private:
 
-    regNumberSmall      _lvRegNum;      // Used to store the register this variable is in (or, the low register of a register pair).
-                                        //   For LEGACY_BACKEND, this is only set if lvRegister is non-zero. For non-LEGACY_BACKEND, it is set during codegen
-                                        //   any time the variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set to non-zero if the
-                                        //   variable gets the same register assignment for its entire lifetime).
+    regNumberSmall      _lvRegNum;      // Used to store the register this variable is in (or, the low register of a
+                                        // register pair). For LEGACY_BACKEND, this is only set if lvRegister is
+                                        // non-zero. For non-LEGACY_BACKEND, it is set during codegen any time the
+                                        // variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set 
+                                        // to non-zero if the variable gets the same register assignment for its entire
+                                        // lifetime).
 #if !defined(_TARGET_64BIT_)
     regNumberSmall      _lvOtherReg;    // Used for "upper half" of long var.
 #endif // !defined(_TARGET_64BIT_)
@@ -1608,9 +1610,10 @@ public:
     // For a finally handler, find the region index that the BBJ_CALLFINALLY lives in that calls the handler,
     // or NO_ENCLOSING_INDEX if the BBJ_CALLFINALLY lives in the main function body. Normally, the index
     // is the same index as the handler (and the BBJ_CALLFINALLY lives in the 'try' region), but for AMD64 the
-    // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function body.
-    // If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the BBJ_CALLFINALLY
-    // lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never lives in a filter.)
+    // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function
+    // body. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the
+    // BBJ_CALLFINALLY lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never
+    // lives in a filter.)
     unsigned            ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion);
 
     // Find the range of basic blocks in which all BBJ_CALLFINALLY will be found that target the 'finallyIndex' region's
@@ -2017,9 +2020,9 @@ public:
     GenTreePtr              gtGetThisArg(GenTreePtr call);
 
     // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the 
-    // static field contain an object pointer to the boxed struct.  This simplifies the GC implementation...but complicates
-    // the JIT somewhat.  This predicate returns "true" iff a node with type "fieldNodeType", representing the given "fldHnd",
-    // is such an object pointer.
+    // static field contain an object pointer to the boxed struct.  This simplifies the GC implementation...but
+    // complicates the JIT somewhat.  This predicate returns "true" iff a node with type "fieldNodeType", representing
+    // the given "fldHnd", is such an object pointer.
     bool                    gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
 
     // Return true if call is a recursive call; return false otherwise.
@@ -2030,12 +2033,13 @@ public:
     GenTreePtr              gtFoldExpr       (GenTreePtr    tree);
     GenTreePtr              
 #ifdef __clang__
-        // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is refactored in a simpler code.
-        // This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in release build the optimizer is mistyping 
-        // (or just wrongly decides to use 32 bit operation for a corner case of MIN_LONG) the args of the (ltemp / lval2)
-        // to int (it does a 32 bit div operation instead of 64 bit) - see the implementation of the method in gentree.cpp. 
-        // For the case of lval1 and lval2 equal to MIN_LONG (0x8000000000000000) this results in raising a SIGFPE. 
-        // The method implementation is rather complex. Disable optimizations for now.
+        // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is
+        // refactored in a simpler code. This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in
+        // release build the optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner case
+        // of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div operation instead of 64 bit) - see
+        // the implementation of the method in gentree.cpp. For the case of lval1 and lval2 equal to MIN_LONG
+        // (0x8000000000000000) this results in raising a SIGFPE. The method implementation is rather complex. Disable
+        // optimizations for now.
     __attribute__((optnone))
 #endif // __clang__
                             gtFoldExprConst(GenTreePtr      tree);
@@ -2295,8 +2299,8 @@ public :
     // On architectures whose ABIs allow structs to be passed in registers, struct promotion will sometimes
     // require us to "rematerialize" a struct from it's separate constituent field variables.  Packing several sub-word
     // field variables into an argument register is a hard problem.  It's easier to reserve a word of memory into which
-    // such field can be copied, after which the assembled memory word can be read into the register.  We will allocate this
-    // variable to be this scratch word whenever struct promotion occurs.
+    // such field can be copied, after which the assembled memory word can be read into the register.  We will allocate
+    // this variable to be this scratch word whenever struct promotion occurs.
     unsigned            lvaPromotedStructAssemblyScratchVar;
 #endif // _TARGET_ARM_
 
@@ -3254,11 +3258,12 @@ public :
     unsigned            fgDomBBcount;       // # of BBs for which we have dominator and reachability information
     BasicBlock**        fgBBInvPostOrder;   // The flow graph stored in an array sorted in topological order, needed to compute dominance. Indexed by block number. Size: fgBBNumMax + 1.
 
-    // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute dominance queries in O(1).
-    // fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and postorder number, respectively.
-    // The arrays are indexed by basic block number. (Note that blocks are numbered starting from one. Thus, we always waste
-    // element zero. This makes debugging easier and makes the code less likely to suffer from bugs stemming from forgetting
-    // to add or subtract one from the block number to form an array index). The arrays are of size fgBBNumMax + 1.
+    // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute
+    // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and
+    // postorder number, respectively. The arrays are indexed by basic block number. (Note that blocks are numbered
+    // starting from one. Thus, we always waste element zero. This makes debugging easier and makes the code less likely
+    // to suffer from bugs stemming from forgetting to add or subtract one from the block number to form an array
+    // index). The arrays are of size fgBBNumMax + 1.
     unsigned *          fgDomTreePreOrder;
     unsigned *          fgDomTreePostOrder;
 
@@ -3603,9 +3608,9 @@ public :
     void                fgInterBlockLocalVarLiveness();
 
     // The presence of "x op= y" operations presents some difficulties for SSA: this is both a use of some SSA name of
-    // "x", and a def of a new SSA name for "x".  The tree only has one local variable for "x", so it has to choose whether
-    // to treat that as the use or def.  It chooses the "use", and thus the old SSA name.  This map allows us to record/recover
-    // the "def" SSA number, given the lcl var node for "x" in such a tree.
+    // "x", and a def of a new SSA name for "x".  The tree only has one local variable for "x", so it has to choose
+    // whether to treat that as the use or def.  It chooses the "use", and thus the old SSA name.  This map allows us
+    // to record/recover the "def" SSA number, given the lcl var node for "x" in such a tree.
     typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, unsigned, JitSimplerHashBehavior> NodeToUnsignedMap;
     NodeToUnsignedMap*  m_opAsgnVarDefSsaNums;
     NodeToUnsignedMap*  GetOpAsgnVarDefSsaNums()
@@ -3933,8 +3938,9 @@ protected:
                                                                                    // (performed by fgComputeDoms), this procedure builds the dominance tree represented
                                                                                    // adjacency lists.
 
-    // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder traversal of the dominance tree and the
-    // dominance query will become A dominates B iif preOrder(A) <= preOrder(B) && postOrder(A) >= postOrder(B) making the computation O(1).
+    // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder
+    // traversal of the dominance tree and the dominance query will become A dominates B iif preOrder(A) <= preOrder(B)
+    // && postOrder(A) >= postOrder(B) making the computation O(1).
     void                fgTraverseDomTree       (unsigned         bbNum,
                                                  BasicBlockList** domTree,
                                                  unsigned*        preNum,
@@ -4562,8 +4568,8 @@ private:
     // all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
     // small; hence the other fields of MorphAddrContext.  Finally, the odd structure of GT_COPYBLK, in which the second
     // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
-    // each of its arguments should be evaluated in MACK_Ind contexts.  (This would not be true for GT_LIST nodes representing
-    // method call argument lists.)
+    // each of its arguments should be evaluated in MACK_Ind contexts.  (This would not be true for GT_LIST nodes
+    // representing method call argument lists.)
     enum MorphAddrContextKind {
         MACK_Ind,
         MACK_Addr,
@@ -4882,8 +4888,8 @@ protected:
     // outside of that loop.  Exempt expressions whose value number is in "hoistedInParents"; add VN's of hoisted
     // expressions to "hoistInLoop". 
     // Returns "true" iff "tree" is loop-invariant (wrt "lnum").
-    // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before any
-    // possible globally visible side effects.  Assume is called in evaluation order, and updates this.
+    // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before
+    // any possible globally visible side effects.  Assume is called in evaluation order, and updates this.
     bool                optHoistLoopExprsForTree(GenTreePtr tree,
                                                  unsigned lnum, 
                                                  LoopHoistContext* hoistCtxt,
@@ -4914,8 +4920,8 @@ protected:
 
 private:
     // Requires "lnum" to be the index of an outermost loop in the loop table.  Traverses the body of that loop,
-    // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and static)
-    // written to, and SZ-array element type equivalence classes updated.
+    // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and
+    // static) written to, and SZ-array element type equivalence classes updated.
     void                optComputeLoopNestSideEffects(unsigned lnum);
 
     // Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part.
@@ -4969,13 +4975,14 @@ protected :
 
 public:
 
-    // A "LoopDsc" describes a ("natural") loop.  We (currently) require the body of a loop to be a contiguous (in bbNext order)
-    // sequence of basic blocks.  (At times, we may require the blocks in a loop to be "properly numbered" in bbNext order;
-    // we use comparisons on the bbNum to decide order.)
+    // A "LoopDsc" describes a ("natural") loop.  We (currently) require the body of a loop to be a contiguous (in
+    // bbNext order) sequence of basic blocks.  (At times, we may require the blocks in a loop to be "properly numbered"
+    // in bbNext order; we use comparisons on the bbNum to decide order.)
     // The blocks that define the body are
     //   first <= top <= entry <= bottom   .
-    // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a single 'head' block.
-    // The meanings of these blocks are given in the definitions below. Also see the picture at Compiler::optFindNaturalLoops().
+    // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a
+    // single 'head' block. The meanings of these blocks are given in the definitions below. Also see the picture at
+    // Compiler::optFindNaturalLoops().
     struct  LoopDsc
     {
         BasicBlock *        lpHead;     // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor.
@@ -5021,7 +5028,8 @@ public:
 #define LPFLG_DONT_UNROLL   0x2000      // do not unroll this loop
 
 #define LPFLG_ASGVARS_YES   0x4000      // "lpAsgVars" has been  computed
-#define LPFLG_ASGVARS_INC   0x8000      // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet tyep are assigned to.
+#define LPFLG_ASGVARS_INC   0x8000      // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
+                                        // type are assigned to.
 
 
         bool                lpLoopHasHeapHavoc;         // The loop contains an operation that we assume has arbitrary heap side effects.
@@ -5224,8 +5232,8 @@ protected :
     // loop nested in "loopInd" that shares the same head as "loopInd".
     void                optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
 
-    // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in "redirectMap",
-    // change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
+    // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in
+    // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
     void                optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
 
     // Marks the containsCall information to "lnum" and any parent loops.
@@ -6562,6 +6570,8 @@ public :
 
     // ICorStaticInfo wrapper functions
 
+    bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
+
 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
 #ifdef DEBUG
     static void                 dumpSystemVClassificationType(SystemVClassificationType ct);
@@ -6571,7 +6581,6 @@ public :
                                                                                 /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
 
-    bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
 
     template<typename ParamType>
     bool eeRunWithErrorTrap(void (*function)(ParamType*), ParamType* param)
@@ -6583,12 +6592,13 @@ public :
 
     // Utility functions
 
+    const char *                eeGetFieldName      (CORINFO_FIELD_HANDLE   fieldHnd,
+                                                     const char **  classNamePtr = NULL);
+
 #if defined(DEBUG)
     const wchar_t *             eeGetCPString       (size_t stringHandle);
 #endif
 
-    const char *                eeGetFieldName      (CORINFO_FIELD_HANDLE   fieldHnd,
-                                                     const char **  classNamePtr = NULL);
     const char*                 eeGetClassName      (CORINFO_CLASS_HANDLE clsHnd);
 
     static CORINFO_METHOD_HANDLE eeFindHelper       (unsigned       helper);
@@ -6788,11 +6798,11 @@ public :
     regMaskTP           compNoGCHelperCallKillSet (CorInfoHelpFunc helper);
 
 #ifdef _TARGET_ARM_
-    // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at "firstArgRegNum",
-    // which is assumed to have already been aligned to the register alignment restriction of the struct type.
-    // Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" -- i.e., internal
-    // "holes" caused by internal alignment constraints.  For example, if the struct contained an int and a double, and we
-    // at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
+    // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at
+    // "firstArgRegNum", which is assumed to have already been aligned to the register alignment restriction of the
+    // struct type. Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" -- 
+    // i.e., internal "holes" caused by internal alignment constraints.  For example, if the struct contained an int and
+    // a double, and we at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
     void                fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, 
                                                             unsigned   firstArgRegNum, 
                                                             regMaskTP* pArgSkippedRegMask);
@@ -7288,8 +7298,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         return emitTypeSize(TYP_SIMD8);
     }
 
-    // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.) 
 #ifdef FEATURE_AVX_SUPPORT
+    // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.) 
     static const unsigned   maxPossibleSIMDStructBytes = 32;
 #else // !FEATURE_AVX_SUPPORT
     static const unsigned   maxPossibleSIMDStructBytes = 16;
@@ -7456,10 +7466,10 @@ public :
     bool                compBlkOpUsed;      // Does the method do a COPYBLK or INITBLK
 #endif
 
+#ifdef DEBUG
     // State information - which phases have completed?
     // These are kept together for easy discoverability
 
-#ifdef DEBUG
     bool                bRangeAllowStress;
     bool                compCodeGenDone;
     int64_t             compNumStatementLinksTraversed;  // # of links traversed while doing debug checks
@@ -7487,8 +7497,9 @@ public :
     bool                getNeedsGSSecurityCookie() const { return compNeedsGSSecurityCookie; }
     void                setNeedsGSSecurityCookie() { compNeedsGSSecurityCookie = true; }
     
-    FrameLayoutState    lvaDoneFrameLayout;             // The highest frame layout state that we've completed. During frame layout calculations,
-                                                        // this is the level we are currently computing.
+    FrameLayoutState    lvaDoneFrameLayout;             // The highest frame layout state that we've completed. During
+                                                        // frame layout calculations, this is the level we are currently
+                                                        // computing.
 
     //---------------------------- JITing options -----------------------------
 
@@ -7634,7 +7645,6 @@ public :
 
         bool                compNeedSecurityCheck; // This flag really means where or not a security object needs 
                                                    // to be allocated on the stack.
-                                                   
                                                    // It will be set to true in the following cases:
                                                    //   1. When the method being compiled has a declarative security 
                                                    //        (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
@@ -7664,9 +7674,9 @@ public :
         // This flag  is indicating if there is a need to align the frame.
         // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
         // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
-        // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
-        // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
-        // are calls and making sure the frame alignment logic is executed.
+        // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of
+        // 0. The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that
+        // there are calls and making sure the frame alignment logic is executed.
         bool                compNeedToAlignFrame;
 #endif // UNIX_AMD64_ABI
 
@@ -7776,7 +7786,7 @@ public :
 #endif // DEBUG
 
 
-
+// clang-format off
 #define STRESS_MODES                                                                            \
                                                                                                 \
         STRESS_MODE(NONE)                                                                       \
@@ -7816,6 +7826,7 @@ public :
         STRESS_MODES
 #undef STRESS_MODE
     };
+// clang-format on
 
 #ifdef DEBUG
     static 
@@ -7948,8 +7959,9 @@ public :
                                                     // current number of EH clauses (after additions like synchronized
                                                     // methods and funclets, and removals like unreachable code deletion).
 
-        bool            compMatchedVM;              // true if the VM is "matched": either the JIT is a cross-compiler and the VM expects that,
-                                                    // or the JIT is a "self-host" compiler (e.g., x86 hosted targeting x86) and the VM expects that.
+        bool            compMatchedVM;              // true if the VM is "matched": either the JIT is a cross-compiler
+                                                    // and the VM expects that, or the JIT is a "self-host" compiler
+                                                    // (e.g., x86 hosted targeting x86) and the VM expects that.
 
 #if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
 
@@ -8010,6 +8022,8 @@ public :
         //
         // 3. Windows 64-bit native calling convention also requires the address of RetBuff
         //    to be returned in RAX.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_AMD64_
         return (info.compRetBuffArg != BAD_VAR_NUM);
 #else // !_TARGET_AMD64_  
@@ -8405,6 +8419,8 @@ protected:
                                       CORJIT_FLAGS * compileFlags);
 
     // Data required for generating profiler Enter/Leave/TailCall hooks
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef PROFILING_SUPPORTED
     bool                compProfilerHookNeeded;            // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
     void                *compProfilerMethHnd;              // Profiler handle of the method being compiled. Passed as param to ELT callbacks
@@ -8650,8 +8666,8 @@ public:
     //            having multiple try native code regions for a single try il region. This is doable and shouldnt be
     //            a big change in the exception.
     //
-    //      Given the low frequency of the cases where we have transition blocks, I've decided to dumb down optimizations
-    //      For these 2 cases:
+    //      Given the low frequency of the cases where we have transition blocks, I've decided to dumb down
+    //      optimizations. For these 2 cases:
     //
     //          - When there is a chance that we will have FP transition blocks, we won't do procedure splitting.
     //          - When a method has a handler, it won't enregister any FP variables that go thru a conditional long or
@@ -8877,9 +8893,9 @@ public:
 
     typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, FieldSeqNode*, JitSimplerHashBehavior> NodeToFieldSeqMap;
 
-    // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since the offset of
-    // the field is zero, there's no "GT_ADD" node.  We normally attach a field sequence to the constant that is
-    // added, but what do we do when that constant is zero, and is thus not present?  We use this mechanism to
+    // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since
+    // the offset of the field is zero, there's no "GT_ADD" node.  We normally attach a field sequence to the constant
+    // that is added, but what do we do when that constant is zero, and is thus not present?  We use this mechanism to
     // attach the field sequence directly to the address node.
     NodeToFieldSeqMap* m_zeroOffsetFieldMap;
 
@@ -8903,8 +8919,8 @@ public:
     // record the the field sequence using the ZeroOffsetFieldMap described above.
     //
     // One exception above is that "op1" is a node of type "TYP_REF" where "op1" is a GT_LCL_VAR.
-    // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in CoreRT.
-    // Such case is handled same as the default case.
+    // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in
+    // CoreRT. Such case is handled same as the default case.
     void fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq);
 
 
@@ -8925,9 +8941,9 @@ public:
 
     NodeToUnsignedMap* m_heapSsaMap;
 
-    // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap states.
-    // (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state, all the possible
-    // heap states are possible initial states of the corresponding catch block(s).)
+    // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
+    // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
+    // all the possible heap states are possible initial states of the corresponding catch block(s).)
     NodeToUnsignedMap* GetHeapSsaMap()
     {
         Compiler* compRoot = impInlineRoot();
@@ -9132,9 +9148,10 @@ struct NodeSizeStats
 
     size_t genTreeNodeCnt;
     size_t genTreeNodeSize;         // The size we allocate
-    size_t genTreeNodeActualSize;   // The actual size of the node. Note that the actual size will likely be smaller than the
-                                    //   allocated size, but we sometimes use SetOper()/ChangeOper() to change a smaller node
-                                    //   to a larger one. TODO-Cleanup: add stats on SetOper()/ChangeOper() usage to quanitfy this.
+    size_t genTreeNodeActualSize;   // The actual size of the node. Note that the actual size will likely be smaller
+                                    //   than the allocated size, but we sometimes use SetOper()/ChangeOper() to change
+                                    //   a smaller node to a larger one. TODO-Cleanup: add stats on
+                                    //   SetOper()/ChangeOper() usage to quanitfy this.
 };
 extern NodeSizeStats genNodeSizeStats;          // Total node size stats
 extern NodeSizeStats genNodeSizeStatsPerFunc;   // Per-function node size stats
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index b6127e72ed..c1c0be1ff8 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -2291,8 +2291,8 @@ int                 Compiler::lvaFrameAddress(int varNum, bool * pFPbased)
         if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
         {
 #ifdef _TARGET_AMD64_
-            // On amd64, every param has a stack location, except on Unix-like systems.
 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // On amd64, every param has a stack location, except on Unix-like systems.
             assert(varDsc->lvIsParam);
 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
 #elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
@@ -2377,6 +2377,7 @@ int                 Compiler::lvaFrameAddress(int varNum, bool * pFPbased)
             if (!FPbased)
             {
                 // Worst case stack based offset.
+                CLANG_FORMAT_COMMENT_ANCHOR;
 #if FEATURE_FIXED_OUT_ARGS
                 int outGoingArgSpaceSize = lvaOutgoingArgSpaceSize;
 #else
@@ -2387,6 +2388,8 @@ int                 Compiler::lvaFrameAddress(int varNum, bool * pFPbased)
             else
             {
                 // Worst case FP based offset.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_ARM_
                 offset = codeGen->genCallerSPtoInitialSPdelta() - codeGen->genCallerSPtoFPdelta();
 #else
@@ -2486,11 +2489,13 @@ BOOL                Compiler::lvaIsOriginalThisArg(unsigned varNum)
     {   
         LclVarDsc   *   varDsc = lvaTable + varNum;
         // Should never write to or take the address of the original 'this' arg
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef JIT32_GCENCODER
         // With the general encoder/decoder, when the original 'this' arg is needed as a generics context param, we
         // copy to a new local, and mark the original as DoNotEnregister, to
-        // ensure that it is stack-allocated.  It should not be the case that the original one can be modified -- it should
-        // not be written to, or address-exposed.
+        // ensure that it is stack-allocated.  It should not be the case that the original one can be modified -- it
+        // should not be written to, or address-exposed.
         assert(!varDsc->lvArgWrite && (!varDsc->lvAddrExposed || ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)));
 #else
         assert(!varDsc->lvArgWrite && !varDsc->lvAddrExposed);
@@ -4198,9 +4203,9 @@ bool Compiler::compIsProfilerHookNeeded()
 #ifdef PROFILING_SUPPORTED
     return compProfilerHookNeeded 
 
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
     // IL stubs are excluded by VM and we need to do the same even running 
     // under a complus env hook to generate profiler hooks
-#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
         || (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
 #endif
         ;
@@ -4377,9 +4382,11 @@ Compiler::lvaPromotionType   Compiler::lvaGetPromotionType (const LclVarDsc *
         return PROMOTION_TYPE_DEPENDENT;
     }
 
-    // we have a parameter that could be enregistered
+    // We have a parameter that could be enregistered
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
     // The struct parameter is a register candidate
     return PROMOTION_TYPE_INDEPENDENT;
 #else
@@ -4560,7 +4567,8 @@ bool Compiler::fgExcludeFromSsa(unsigned lclNum)
         (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
     {
         // SSA must exclude struct fields that are not independent
-        // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct assignment.
+        // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct
+        //   assignment.
         // - SSA doesn't allow a single node to contain multiple SSA definitions.
         // - and PROMOTION_TYPE_DEPENDEDNT fields  are never candidates for a register.
         //
diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h
index 1ffd2db0a1..1e18d516f8 100644
--- a/src/jit/compmemkind.h
+++ b/src/jit/compmemkind.h
@@ -10,6 +10,7 @@
 // This list of macro invocations should be used to define the CompMemKind enumeration,
 // and the corresponding array of string names for these enum members.
 
+// clang-format off
 CompMemKindMacro(AssertionProp)
 CompMemKindMacro(ASTNode)
 CompMemKindMacro(InstDesc)
@@ -50,5 +51,6 @@ CompMemKindMacro(Codegen)
 CompMemKindMacro(LoopOpt)
 CompMemKindMacro(LoopHoist)
 CompMemKindMacro(Unknown)
+//clang-format on
 
 #undef CompMemKindMacro
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
index 9291e227cf..999f6cf348 100644
--- a/src/jit/compphases.h
+++ b/src/jit/compphases.h
@@ -18,6 +18,7 @@
 //         (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
 //     "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
 
+// clang-format off
 CompPhaseNameMacro(PHASE_PRE_IMPORT,             "Pre-import",                     "PRE-IMP",  false, -1)
 CompPhaseNameMacro(PHASE_IMPORTATION,            "Importation",                    "IMPORT",   false, -1)
 CompPhaseNameMacro(PHASE_POST_IMPORT,            "Post-import",                    "POST-IMP", false, -1)
@@ -84,5 +85,6 @@ CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE,    "LSRA resolve",
 CompPhaseNameMacro(PHASE_GENERATE_CODE,          "Generate code",                  "CODEGEN",  false, -1)
 CompPhaseNameMacro(PHASE_EMIT_CODE,              "Emit code",                      "EMIT",     false, -1)
 CompPhaseNameMacro(PHASE_EMIT_GCEH,              "Emit GC+EH tables",              "EMT-GCEH", false, -1)
+// clang-format on
 
 #undef CompPhaseNameMacro
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 365a0ff529..4af97efe2e 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -94,7 +94,8 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block)
 void DecomposeLongs::DecomposeStmt(GenTreeStmt* stmt)
 {
     GenTree* savedStmt = m_compiler->compCurStmt; // We'll need to restore this later, in case this call was recursive.
-    m_compiler->compCurStmt = stmt;   // Publish the current statement globally. One reason: fgInsertEmbeddedFormTemp requires it.
+    m_compiler->compCurStmt = stmt;               // Publish the current statement globally. One reason:
+                                                  // fgInsertEmbeddedFormTemp requires it.
     m_compiler->fgWalkTreePost(&stmt->gtStmt.gtStmtExpr, &DecomposeLongs::DecompNodeHelper, this, true);
     m_compiler->compCurStmt = savedStmt;
 }
@@ -805,7 +806,7 @@ void DecomposeLongs::DecomposeStoreInd(GenTree** ppTree, Compiler::fgWalkData* d
     SimpleLinkNodeAfter(addrHigh, storeIndHigh);
     
     // External links of storeIndHigh tree
-    //dataHigh->gtPrev = nullptr;
+    // dataHigh->gtPrev = nullptr;
     if (isEmbeddedStmt)
     {
         // If storeIndTree is an embedded statement, connect storeIndLow
@@ -1286,4 +1287,4 @@ void DecomposeLongs::SimpleLinkNodeAfter(GenTree* insertionPoint, GenTree* node)
 
 
 #endif // !_TARGET_64BIT_
-#endif // !LEGACY_BACKEND
-\ No newline at end of file
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/disasm.cpp b/src/jit/disasm.cpp
index 7d8383f327..e7030b309e 100644
--- a/src/jit/disasm.cpp
+++ b/src/jit/disasm.cpp
@@ -123,7 +123,7 @@ size_t              DisAssembler::disCchAddrMember  (const DIS* pdis,
 
     switch (terminationType)
     {
-//        int disCallSize;
+        // int disCallSize;
 
     case DISX86::trmtaJmpShort:
     case DISX86::trmtaJmpCcShort:
@@ -171,7 +171,7 @@ size_t              DisAssembler::disCchAddrMember  (const DIS* pdis,
         /* find the emitter block and the offset of the call fixup */
         /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
 
-//        disCallSize = 1;
+        // disCallSize = 1;
 
         {
             size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
@@ -214,7 +214,7 @@ size_t              DisAssembler::disCchAddrMember  (const DIS* pdis,
 
     switch (terminationType)
     {
-//        int disCallSize;
+        // int disCallSize;
 
     case DISARM64::TRMTA::trmtaBra:
     case DISARM64::TRMTA::trmtaBraCase:
@@ -257,7 +257,7 @@ size_t              DisAssembler::disCchAddrMember  (const DIS* pdis,
         /* find the emitter block and the offset of the call fixup */
         /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
 
-//        disCallSize = 1;
+        // disCallSize = 1;
 
         {
             size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
@@ -1524,8 +1524,8 @@ void    DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* col
         return;
     }
 
-    // Should we make it diffable?
 #ifdef DEBUG
+    // Should we make it diffable?
     disDiffable = disComp->opts.dspDiffable;
 #else // !DEBUG
     // NOTE: non-debug builds are always diffable!
diff --git a/src/jit/disasm.h b/src/jit/disasm.h
index 5afaecd2d4..27480615fa 100644
--- a/src/jit/disasm.h
+++ b/src/jit/disasm.h
@@ -148,7 +148,8 @@ private:
     /* Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section) */
     const BYTE*     disGetLinearAddr(size_t offset);
 
-    /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset points to */
+    /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset
+     * points to */
     size_t          disGetBufferSize(size_t offset);
 
     // Map of instruction addresses to call target method handles for normal calls.
@@ -245,7 +246,6 @@ private:
                                       bool           printit         = false,
                                       bool           dispOffs        = false,
                                       bool           dispCodeBytes   = false);
-
 };
 
 
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index c726856b9b..d7293d53ba 100755
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -435,6 +435,7 @@ unsigned           Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_
 
     // Everything fits into a single 'slot' size
     // to accommodate irregular sized structs, they are passed byref
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     CORINFO_CLASS_HANDLE        argClass;
@@ -461,9 +462,11 @@ unsigned           Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_
         // make certain the EE passes us back the right thing for refanys
         assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2*sizeof(void*));
 
-#if FEATURE_MULTIREG_ARGS
         // For each target that supports passing struct args in multiple registers 
         // apply the target specific rules for them here:
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_MULTIREG_ARGS
 #if defined(_TARGET_ARM64_)
         // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
         if (structSize > MAX_PASS_MULTIREG_BYTES)
@@ -484,12 +487,11 @@ unsigned           Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_
                     return TARGET_POINTER_SIZE;
                 }
             }
+            // otherwise will we pass this struct by value in multiple registers
         }
-        // otherwise will we pass this struct by value in multiple registers
-        //
 #elif defined(_TARGET_ARM_)
         //  otherwise will we pass this struct by value in multiple registers
-#else // 
+#else
         NYI("unknown target");
 #endif // defined(_TARGET_XXX_)
 #endif // FEATURE_MULTIREG_ARGS
@@ -1269,10 +1271,10 @@ LONG TryResolveTokenFilter(struct _EXCEPTION_POINTERS* exceptionPointers, void*
 {
     assert(exceptionPointers->ExceptionRecord->ExceptionCode != SEH_VERIFICATION_EXCEPTION);
 
-    // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to verification exceptions 
-    // if we are verifying. Verification exceptions will cause the JIT of the basic block to fail, but the JITing of the whole method 
-    // is still going to succeed. This is done for backward compatibility only. Ideally, we would always treat bad tokens in the IL 
-    // stream as fatal errors.
+    // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to
+    // verification exceptions if we are verifying. Verification exceptions will cause the JIT of the basic block to
+    // fail, but the JITing of the whole method is still going to succeed. This is done for backward compatibility only.
+    // Ideally, we would always treat bad tokens in the IL stream as fatal errors.
     if (exceptionPointers->ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
     {
         auto* param = reinterpret_cast<TryResolveTokenFilterParam*>(theParam);
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 0766735578..6111a2b290 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -781,7 +781,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
     emitSizeMethod    += sz;
 #endif
 
-//  printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
+    // printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
 
     /* Record the live GC register set - if and only if it is not an emitter added block */
 
@@ -844,8 +844,8 @@ insGroup* emitter::emitSavIG(bool emitAdd)
             size_t of = (BYTE*)oj - emitCurIGfreeBase;
             instrDescJmp* nj = (instrDescJmp*)(ig->igData + of);
 
-//          printf("Jump moved from %08X to %08X\n", oj, nj);
-//          printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
+            // printf("Jump moved from %08X to %08X\n", oj, nj);
+            // printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
 
             assert(nj->idjIG   == ig);
             assert(nj->idIns() == oj->idIns());
@@ -1089,9 +1089,9 @@ void                emitter::emitBegFN(bool     hasFramePtr
 
     memset(&emitConsDsc, 0, sizeof(emitConsDsc));
 
-    // for random NOP insertion
-    
 #ifdef PSEUDORANDOM_NOP_INSERTION
+    // for random NOP insertion
+
     emitEnableRandomNops();
     emitComp->info.compRNG.Init(emitComp->info.compChecksum);
     emitNextNop = emitNextRandomNop();
@@ -1281,11 +1281,11 @@ void        *       emitter::emitAllocInstr(size_t sz, emitAttr opsz)
     }
 #endif
 
+#ifdef PSEUDORANDOM_NOP_INSERTION
     // TODO-ARM-Bug?: PSEUDORANDOM_NOP_INSERTION is not defined for _TARGET_ARM_
     //     ARM - This is currently broken on _TARGET_ARM_
     //     When nopSize is odd we misalign emitCurIGsize
     //
-#ifdef PSEUDORANDOM_NOP_INSERTION
     if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT)
         && !emitInInstrumentation
         && !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog
@@ -1365,9 +1365,8 @@ void        *       emitter::emitAllocInstr(size_t sz, emitAttr opsz)
 
     emitInsCount++;
 
-    /* In debug mode we clear/set some additional fields */
-
 #if defined(DEBUG) || defined(LATE_DISASM)
+    /* In debug mode we clear/set some additional fields */
 
     instrDescDebugInfo *  info = (instrDescDebugInfo *) emitGetMem(sizeof(*info));
 
@@ -1571,9 +1570,9 @@ void                emitter::emitEndProlog()
     if  (emitCurIGnonEmpty() || emitCurIG == emitPrologIG)
         emitSavIG();
 
+#if EMIT_TRACK_STACK_DEPTH
     /* Reset the stack depth values */
 
-#if EMIT_TRACK_STACK_DEPTH
     emitCurStackLvl   = 0;
     emitCntStackDepth = sizeof(int);
 #endif
@@ -1699,10 +1698,10 @@ void                emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igT
 
 #ifdef DEBUGGING_SUPPORT
 
+#if FEATURE_EH_FUNCLETS
     // Add the appropriate IP mapping debugging record for this placeholder
     // group.
 
-#if FEATURE_EH_FUNCLETS
     // genExitCode() adds the mapping for main function epilogs
     if (emitComp->opts.compDbgInfo)
     {
@@ -1967,9 +1966,9 @@ void                emitter::emitEndPrologEpilog()
 
     assert(emitCurIGsize <= MAX_PLACEHOLDER_IG_SIZE);
 
+#if EMIT_TRACK_STACK_DEPTH
     /* Reset the stack depth values */
 
-#if EMIT_TRACK_STACK_DEPTH
     emitCurStackLvl   = 0;
     emitCntStackDepth = sizeof(int);
 #endif
@@ -2576,10 +2575,9 @@ void               emitter::emitSplit(emitLocation* startLoc, emitLocation* endL
                 reportCandidate = false;
             }
 
+            // Report it!
             if (reportCandidate)
             {
-                // Report it!
-
 #ifdef DEBUG
                 if (EMITVERBOSE && (candidateSize >= maxSplitSize))
                     printf("emitSplit: split at IG%02u is size %d, larger than requested maximum size of %d\n", igLastCandidate->igNum, candidateSize, maxSplitSize);
@@ -3102,7 +3100,7 @@ emitter::instrDesc *emitter::emitNewInstrCallDir(int
     {
         instrDescCGCA* id = emitAllocInstrCGCA(retSize);
 
-//      printf("Direct call with GC vars / big arg cnt / explicit scope\n");
+        // printf("Direct call with GC vars / big arg cnt / explicit scope\n");
 
         id->idSetIsLargeCall();
 
@@ -3122,7 +3120,7 @@ emitter::instrDesc *emitter::emitNewInstrCallDir(int
     {
         instrDesc     * id = emitNewInstrCns(retSize, argCnt);
 
-//      printf("Direct call w/o  GC vars / big arg cnt / explicit scope\n");
+        // printf("Direct call w/o  GC vars / big arg cnt / explicit scope\n");
 
         /* Make sure we didn't waste space unexpectedly */
         assert(!id->idIsLargeCns());
@@ -3442,11 +3440,11 @@ size_t              emitter::emitIssue1Instr(insGroup  *ig,
 
     /* Issue the next instruction */
 
-//  printf("[S=%02u] " , emitCurStackLvl);
+    // printf("[S=%02u] " , emitCurStackLvl);
 
     is = emitOutputInstr(ig, id, dp);
 
-//  printf("[S=%02u]\n", emitCurStackLvl);
+    // printf("[S=%02u]\n", emitCurStackLvl);
 
 #if EMIT_TRACK_STACK_DEPTH
 
@@ -3547,7 +3545,8 @@ void                emitter::emitRecomputeIGoffsets()
  *  ARM has a small, medium, and large encoding. The large encoding is a pseudo-op
  *      to handle greater range than the conditional branch instructions can handle.
  *  ARM64 has a small and large encoding for both conditional branch and loading label addresses.
- *      The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently NYI).
+ *      The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
+ *      NYI).
  */
 
 void                emitter::emitJumpDistBind()
@@ -3782,7 +3781,8 @@ AGAIN:
                 do
                 {
                     lstIG = lstIG->igNext; assert(lstIG);
-//                  printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, lstIG->igOffs - adjIG);
+                    // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                    // lstIG->igOffs - adjIG);
                     lstIG->igOffs -= adjIG;
                     assert(IsCodeAligned(lstIG->igOffs));
                 }
@@ -3800,6 +3800,7 @@ AGAIN:
         jmp->idjOffs -= adjLJ;
 
         // If this is a jump via register, the instruction size does not change, so we are done.
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_ARM64_)
         // JIT code and data will be allocated together for arm64 so the relative offset to JIT data is known.
@@ -3857,6 +3858,7 @@ AGAIN:
         else
         {
             /* First time we've seen this label, convert its target */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef  DEBUG
             if  (EMITVERBOSE)
@@ -3894,9 +3896,9 @@ AGAIN:
         // We should not be jumping/branching across funclets/functions
         emitCheckFuncletBranch(jmp, jmpIG);
 
+#ifdef _TARGET_XARCH_
         /* Done if this is not a variable-sized jump */
 
-#ifdef _TARGET_XARCH_
         if ( (jmp->idIns() == INS_push)      ||
              (jmp->idIns() == INS_mov)       ||
              (jmp->idIns() == INS_call)      || 
@@ -4225,7 +4227,8 @@ AGAIN:
             lstIG = lstIG->igNext;
             if  (!lstIG)
                 break;
-//          printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs, lstIG->igOffs - adjIG);
+            // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+            // lstIG->igOffs - adjIG);
             lstIG->igOffs -= adjIG;
             assert(IsCodeAligned(lstIG->igOffs));
         }
@@ -4235,7 +4238,7 @@ AGAIN:
 #endif
 
         /* Is there a chance of other jumps becoming short? */
-
+        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef  DEBUG
 #if defined(_TARGET_ARM_)
         if  (EMITVERBOSE) printf("Total shrinkage = %3u, min extra short jump size = %3u, min extra medium jump size = %u\n", adjIG, minShortExtra, minMediumExtra);
@@ -4580,7 +4583,8 @@ unsigned            emitter::emitEndCodeGen(Compiler *comp,
 #endif
 
 
-//  if  (emitConsDsc.dsdOffs) printf("Cons=%08X\n", consBlock);
+    // if (emitConsDsc.dsdOffs)
+    //     printf("Cons=%08X\n", consBlock);
 
     /* Give the block addresses to the caller and other functions here */
 
@@ -4589,6 +4593,7 @@ unsigned            emitter::emitEndCodeGen(Compiler *comp,
     *consAddr     = emitConsBlock     = consBlock;
 
     /* Nothing has been pushed on the stack */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if EMIT_TRACK_STACK_DEPTH
     emitCurStackLvl   = 0;
@@ -5022,6 +5027,7 @@ unsigned            emitter::emitEndCodeGen(Compiler *comp,
                 if  (jmp->idjShort)
                 {
                     // Patch Forward Short Jump
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(_TARGET_XARCH_)
                     *(BYTE *)adr -= (BYTE)adj;
 #elif defined(_TARGET_ARM_)
@@ -5038,6 +5044,7 @@ unsigned            emitter::emitEndCodeGen(Compiler *comp,
                 else
                 {
                     // Patch Forward non-Short Jump
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 #if defined(_TARGET_XARCH_)
                     *(int  *)adr -= adj;
 #elif defined(_TARGET_ARMARCH_)
@@ -5098,11 +5105,11 @@ unsigned            emitter::emitEndCodeGen(Compiler *comp,
 // See specification comment at the declaration.
 void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
 {
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
     // We only emit this GC information on targets where finally's are implemented via funclets,
     // and the finally is invoked, during non-exceptional execution, via a branch with a predefined
     // link register, rather than a "true call" for which we would already generate GC info.  Currently,
     // this means precisely ARM.
-#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
     if (ig->igFlags & IGF_FINALLY_TARGET)
     {
         // We don't actually have a call instruction in this case, so we don't have
@@ -5161,9 +5168,8 @@ UNATIVE_OFFSET    emitter::emitFindOffset(insGroup *ig, unsigned insNum)
     instrDesc *     id = (instrDesc *)ig->igData;
     UNATIVE_OFFSET  of = 0;
 
-    /* Make sure we were passed reasonable arguments */
-
 #ifdef DEBUG
+    /* Make sure we were passed reasonable arguments */
     assert(ig && ig->igSelf == ig);
     assert(ig->igInsCnt >= insNum);
 #endif
@@ -5687,7 +5693,7 @@ void                emitter::emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE *a
 
                 int     offs = val & ~OFFSET_MASK;
 
-//              printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
+                // printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
 
                 if  (VarSetOps::IsMember(emitComp, vars, num))
                 {
@@ -5948,7 +5954,7 @@ void                emitter::emitGCregLiveSet(GCtype    gcType,
     regPtrDsc  *    regPtrNext;
 
     assert(!isThis || emitComp->lvaKeepAliveAndReportThis());
-//  assert(emitFullyInt || isThis);
+    // assert(emitFullyInt || isThis);
     assert(emitFullGCinfo);
 
     assert(((emitThisGCrefRegs|emitThisByrefRegs) & regMask) == 0);
@@ -5980,7 +5986,7 @@ void                emitter::emitGCregDeadSet(GCtype    gcType,
 
     regPtrDsc  *    regPtrNext;
 
-//  assert(emitFullyInt);
+    // assert(emitFullyInt);
     assert(emitFullGCinfo);
 
     assert(((emitThisGCrefRegs|emitThisByrefRegs) & regMask) != 0);
@@ -6126,7 +6132,7 @@ UNATIVE_OFFSET      emitter::emitCodeOffset(void *blockPtr, unsigned codePos)
 
         of = emitGetInsOfsFromCodePos(codePos);
 
-//      printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
+        // printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
 
         /* Make sure the offset estimate is accurate */
 
@@ -6731,7 +6737,7 @@ void                emitter::emitStackPushLargeStk (BYTE *    addr,
     {
         /* Push an entry for this argument on the tracking stack */
 
-//      printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
+        // printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
 
         assert(level.IsOverflow() || u2.emitArgTrackTop == u2.emitArgTrackTab + level.Value());
               *u2.emitArgTrackTop++ = (BYTE)gcType;
@@ -6808,7 +6814,7 @@ void                emitter::emitStackPopLargeStk(BYTE *    addr,
 
         assert(IsValidGCtype(gcType));
 
-//      printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
+        // printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
 
         // This is an "interesting" argument
 
@@ -6845,7 +6851,7 @@ void                emitter::emitStackPopLargeStk(BYTE *    addr,
     }
 
 #ifdef JIT32_GCENCODER
-    // For the general encoder, we always have to record calls, so we don't take this early return.    /* Are there any args to pop at this call site? */
+    // For the general encoder, we always have to record calls, so we don't take this early return.    /* Are there any args to pop at this call site?
 
     if  (argRecCnt.Value() == 0)
     {
@@ -6853,6 +6859,7 @@ void                emitter::emitStackPopLargeStk(BYTE *    addr,
             Or do we have a partially interruptible EBP-less frame, and any
             of EDI,ESI,EBX,EBP are live, or is there an outer/pending call?
          */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !FPO_INTERRUPTIBLE
         if  (emitFullyInt ||
@@ -6939,7 +6946,7 @@ void            emitter::emitStackKillArgs(BYTE *addr, unsigned count, unsigned
 
             if (needsGC(gcType))
             {
-//              printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
+                // printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
 
                 *argTrackTop = GCT_NONE;
                 gcCnt += 1;
diff --git a/src/jit/emit.h b/src/jit/emit.h
index e3eee557d4..aaf042e4bb 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -306,7 +306,7 @@ struct          insGroup
 #endif // FEATURE_EH_FUNCLETS
 
     // Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
-
+    CLANG_FORMAT_COMMENT_ANCHOR;
 #if REGMASK_BITS <= 32
 
     union
@@ -658,10 +658,10 @@ protected:
         // On Amd64, this is where the second DWORD begins
         // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has  member that 
         // stores the GC-ness of the second register.
-        // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the instructions.
-        // This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more space to add members
-        // for keeping GC-ness of the second return registers. It will also bloat the base struct unnecessarily
-        // since the GC-ness of the second register is only needed for call instructions.
+        // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the
+        // instructions. This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more
+        // space to add members for keeping GC-ness of the second return registers. It will also bloat the base struct
+        // unnecessarily since the GC-ness of the second register is only needed for call instructions.
         // The instrDescCGCA struct's member keeping the GC-ness of the first return register is _idcSecondRetRegGCType.
         GCtype          _idGCref     :2;  // GCref operand? (value is a "GCtype")
 
@@ -679,6 +679,7 @@ protected:
         // amd64: 38 bits
         // arm:   32 bits
         // arm64: 30 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if HAS_TINY_DESC
         //
@@ -748,6 +749,7 @@ protected:
         // amd64: 46 bits
         // arm:   48 bits
         // arm64: 48 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef RELOC_SUPPORT
 
@@ -768,6 +770,7 @@ protected:
         // amd64: 48 bits
         // arm:   50 bits
         // arm64: 50 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
         #define ID_EXTRA_BITS        (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
 
@@ -789,6 +792,7 @@ protected:
         ////////////////////////////////////////////////////////////////////////
         // Space taken up to here (with RELOC_SUPPORT): 64 bits, all architectures, by design.
         ////////////////////////////////////////////////////////////////////////
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #endif // !HAS_TINY_DESC
 
@@ -819,6 +823,7 @@ protected:
         // There should no padding or alignment issues on any platform or
         //   configuration (including DEBUG which has 1 extra pointer).
         //
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if HAS_TINY_DESC
 
@@ -1893,9 +1898,10 @@ public:
 
     unsigned        emitCurStackLvl;           // amount of bytes pushed on stack
 
-    /* Functions for stack tracking */
 
 #if EMIT_TRACK_STACK_DEPTH
+    /* Functions for stack tracking */
+
     void            emitStackPush       (BYTE *     addr,
                                          GCtype     gcType);
 
@@ -2193,7 +2199,7 @@ unsigned            emitter::emitCurOffset()
     assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
     assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
 
-//  printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
+    // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
 
     return codePos;
 }
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index e2daab4a34..952ef75d46 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -527,8 +527,8 @@ bool           emitter::emitInsMayWriteToGCReg(instrDesc *id)
     case IF_T2_E0: case IF_T2_E1: case IF_T2_E2:
     case IF_T2_G0: case IF_T2_G1: case IF_T2_H0: case IF_T2_H1:
     case IF_T2_K1: case IF_T2_K4:
-        // Some formats with "destination" or "target" registers are actually used for store instructions, for the "source" value
-        // written to memory.
+        // Some formats with "destination" or "target" registers are actually used for store instructions, for the
+        // "source" value written to memory.
         // Similarly, PUSH has a target register, indicating the start of the set of registers to push.  POP
         // *does* write to at least one register, so we do not make that a special case.
         // Various compare/test instructions do not write (except to the flags). Technically "teq" does not need to be
@@ -638,6 +638,7 @@ const char *emitter::emitFloatRegName(regNumber reg, emitAttr attr, bool varName
 
 emitter::insFormat  emitter::emitInsFormat(instruction ins)
 {
+    // clang-format off
     const static insFormat insFormats[] =
     {
         #define INST1(id, nm, fp, ldst, fmt, e1                                ) fmt,
@@ -650,6 +651,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
         #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
         #include "instrs.h"
     };
+    // clang-format on
 
     assert(ins < ArrLen(insFormats));
     assert((insFormats[ins] != IF_NONE));
@@ -662,6 +664,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
 #define   ST  4
 #define   CMP 8
 
+// clang-format off
 /*static*/ const BYTE CodeGenInterface::instInfo[] =
 {
     #define INST1(id, nm, fp, ldst, fmt, e1                                ) ldst | INST_FP*fp,
@@ -674,6 +677,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
     #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
     #include "instrs.h"
 };
+// clang-format on
 
 /*****************************************************************************
  *
@@ -743,6 +747,7 @@ bool  emitter::emitInsIsLoadOrStore(instruction ins)
 
 size_t emitter::emitInsCode(instruction ins, insFormat fmt)
 {
+    // clang-format off
     const static size_t insCodes1[] =
     {
         #define INST1(id, nm, fp, ldst, fmt, e1                                ) e1,
@@ -872,6 +877,7 @@ size_t emitter::emitInsCode(instruction ins, insFormat fmt)
     const static insFormat formatEncode2E[2] = { IF_T1_E,  IF_T2_C6 };
     const static insFormat formatEncode2F[2] = { IF_T1_E,  IF_T2_C5 };
     const static insFormat formatEncode2G[2] = { IF_T1_J3, IF_T2_M1 };
+    // clang-format on
 
     size_t    code   = BAD_CODE;
     insFormat insFmt = emitInsFormat(ins);
@@ -2591,10 +2597,11 @@ void                emitter::emitIns_R_R_I(instruction ins,
         assert(insOptsNone(opt));
 
         // On ARM, the immediate shift count of LSL and ROR must be between 1 and 31. For LSR and ASR, it is between
-        // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in instruction
-        // encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to the IL SHR, SHL, or SHL.UN
-        // instruction that is greater than or equal to the width of the type will yield an undefined value. We choose that
-        // undefined value in this case to match x86 behavior, by only using the lower 5 bits of the constant shift count.
+        // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in
+        // instruction encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to
+        // the IL SHR, SHL, or SHL.UN instruction that is greater than or equal to the width of the type will yield
+        // an undefined value. We choose that undefined value in this case to match x86 behavior, by only using the
+        // lower 5 bits of the constant shift count.
         imm &= 0x1f;
 
         if (imm == 0)
@@ -6573,8 +6580,8 @@ DONE_CALL:
         }
     }
 
-    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC ref or
-    // overwritten one.
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
     if (emitInsWritesToLclVarStackLoc(id))
     {
         int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
@@ -7047,7 +7054,8 @@ void                emitter::emitDispInsHelp(instrDesc *  id,
 {
     if (EMITVERBOSE)
     {
-        unsigned  idNum = id->idDebugOnlyInfo()->idNum;    // Do not remove this!  It is needed for VisualStudio conditional breakpoints
+        unsigned  idNum = id->idDebugOnlyInfo()->idNum;    // Do not remove this!  It is needed for VisualStudio
+                                                           // conditional breakpoints
 
         printf("IN%04x: ", idNum);
     }
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index 317ca87935..9bc8b14ac3 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -1063,6 +1063,7 @@ emitAttr  emitter::emitInsLoadStoreSize(instrDesc *id)
 /*****************************************************************************/
 #ifdef  DEBUG
 
+// clang-format off
 static const char * const  xRegNames[] =
 {
     #define REGDEF(name, rnum, mask, xname, wname) xname,
@@ -1117,6 +1118,7 @@ static const char * const  bRegNames[] =
     "b25", "b26", "b27", "b28", "b29",
     "b30", "b31"
 };
+// clang-format on
 
 /*****************************************************************************
  *
@@ -1182,6 +1184,7 @@ const   char *      emitter::emitVectorRegName(regNumber reg)
 
 emitter::insFormat  emitter::emitInsFormat(instruction ins)
 {
+    // clang-format off
     const static insFormat insFormats[] =
     {
         #define INST1(id, nm, fp, ldst, fmt, e1                                ) fmt,
@@ -1193,6 +1196,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
         #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
         #include "instrs.h"
     };
+    // clang-format on
 
     assert(ins < ArrLen(insFormats));
     assert((insFormats[ins] != IF_NONE));
@@ -1205,6 +1209,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
 #define   ST  4
 #define   CMP 8
 
+// clang-format off
 /*static*/ const BYTE CodeGenInterface::instInfo[] =
 {
     #define INST1(id, nm, fp, ldst, fmt, e1                                ) ldst | INST_FP*fp,
@@ -1216,7 +1221,7 @@ emitter::insFormat  emitter::emitInsFormat(instruction ins)
     #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
     #include "instrs.h"
 };
-
+// clang-format on
 
 /*****************************************************************************
  *
@@ -1285,6 +1290,7 @@ bool  emitter::emitInsIsLoadOrStore(instruction ins)
 
 emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
 {
+    // clang-format off
     const static code_t insCodes1[] =
     {
         #define INST1(id, nm, fp, ldst, fmt, e1                                ) e1,
@@ -1384,6 +1390,8 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
         #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
         #include "instrs.h"
     };
+    // clang-format on
+
     const static insFormat formatEncode9[9]  = { IF_DR_2E,  IF_DR_2G,  IF_DI_1B,  IF_DI_1D,  IF_DV_3C,  
                                                  IF_DV_2B,  IF_DV_2C,  IF_DV_2E,  IF_DV_2F };
     const static insFormat formatEncode6A[6] = { IF_DR_3A,  IF_DR_3B,  IF_DR_3C,  IF_DI_2A,  IF_DV_3A, 
@@ -2132,7 +2140,8 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
         // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
         UINT64 maxVal = 1ULL << width;
         UINT64 lowBitsMask = maxVal - 1;
-        UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+        UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
+                                                                    // (sign bit) must be set.
         assert((value < maxVal) ||
                ((value & signBitsMask) == signBitsMask));
 
@@ -2309,7 +2318,8 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
         INT32 maxVal       = 1 << immWidth;
         INT32 lowBitsMask  = maxVal - 1;
         INT32 hiBitsMask   = ~lowBitsMask;
-        INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+        INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit
+                                                                 // (sign bit) must be set.
         assert((imm < maxVal) ||
                ((imm & signBitsMask) == signBitsMask));
 
@@ -7249,7 +7259,8 @@ void                emitter::emitIns_Call(EmitCallType  callType,
 
 /*****************************************************************************
  *
- *  Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the architecture manual).
+ *  Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
+ *  architecture manual).
  */
 
 /*static*/ emitter::code_t  emitter::insEncodeInvertedCond(insCond cond)
@@ -8086,9 +8097,9 @@ BYTE*               emitter::emitOutputLJ(insGroup  *ig, BYTE *dst, instrDesc *i
 
     if  (dstOffs <= srcOffs)
     {
+#if     DEBUG_EMIT
         /* This is a backward jump - distance is known at this point */
 
-#if     DEBUG_EMIT
         if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
         {
             size_t      blkOffs = id->idjIG->igOffs;
@@ -9526,8 +9537,8 @@ size_t              emitter::emitOutputInstr(insGroup  *ig,
         }
     }
 
-    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC ref or
-    // overwritten one.
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
     if (emitInsWritesToLclVarStackLoc(id))
     {
         int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
diff --git a/src/jit/emitfmtsarm.h b/src/jit/emitfmtsarm.h
index 1e6db34427..bc7492003a 100644
--- a/src/jit/emitfmtsarm.h
+++ b/src/jit/emitfmtsarm.h
@@ -3,7 +3,7 @@
 // See the LICENSE file in the project root for more information.
 //////////////////////////////////////////////////////////////////////////////
 
-
+// clang-format off
 #if !defined(_TARGET_ARM_)
   #error Unexpected target type
 #endif
@@ -150,3 +150,4 @@ IF_DEF(INVALID,     IS_NONE,               NONE)     //
 
 #endif // !DEFINE_ID_OPS
 //////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsarm64.h b/src/jit/emitfmtsarm64.h
index 294bb38701..3ea1168915 100644
--- a/src/jit/emitfmtsarm64.h
+++ b/src/jit/emitfmtsarm64.h
@@ -3,7 +3,7 @@
 // See the LICENSE file in the project root for more information.
 //////////////////////////////////////////////////////////////////////////////
 
-
+//clang-format off
 #if !defined(_TARGET_ARM64_)
   #error Unexpected target type
 #endif
@@ -206,3 +206,4 @@ IF_DEF(INVALID,     IS_NONE,               NONE)     //
 
 #endif // !DEFINE_ID_OPS
 //////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h
index f050d09d61..49afcb5c8b 100644
--- a/src/jit/emitfmtsxarch.h
+++ b/src/jit/emitfmtsxarch.h
@@ -7,6 +7,7 @@
 //  This file was previously known as emitfmts.h
 //
 
+// clang-format off
 #if !defined(_TARGET_XARCH_)
   #error Unexpected target type
 #endif
@@ -236,3 +237,4 @@ IF_DEF(AWR_TRD,     IS_FP_STK|IS_AM_WR,         AMD )     // write [adr], read S
 #endif // DEFINE_IS_OPS
 #endif // DEFINE_ID_OPS
 //////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitjmps.h b/src/jit/emitjmps.h
index 534e33edf4..60815d13ea 100644
--- a/src/jit/emitjmps.h
+++ b/src/jit/emitjmps.h
@@ -2,7 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-
+// clang-format off
 #ifndef JMP_SMALL
 #error Must define JMP_SMALL macro before including this file
 #endif
@@ -54,3 +54,5 @@ JMP_SMALL(le    , gt    , ble    )  // LE
 /*****************************************************************************/
 #undef JMP_SMALL
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/emitpub.h b/src/jit/emitpub.h
index 996234408c..e76ed2540a 100644
--- a/src/jit/emitpub.h
+++ b/src/jit/emitpub.h
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-
     /************************************************************************/
     /*       Overall emitter control (including startup and shutdown)       */
     /************************************************************************/
@@ -171,3 +170,4 @@
     unsigned        emitGetInstructionSize(emitLocation* emitLoc);
 
 #endif // defined(_TARGET_ARM_)
+
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 53f9fc341d..2687b4984e 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -784,6 +784,7 @@ ssize_t             emitter::emitGetInsCIdisp(instrDesc* id)
 #define INST_DEF_FL 0x20                // does the instruction set flags?
 #define INST_USE_FL 0x40                // does the instruction use flags?
 
+// clang-format off
 const BYTE          CodeGenInterface::instInfo[] =
 {
     #define INST0(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
@@ -800,12 +801,14 @@ const BYTE          CodeGenInterface::instInfo[] =
     #undef  INST4
     #undef  INST5
 };
+// clang-format on
 
 /*****************************************************************************
  *
  *  Initialize the table used by emitInsModeFormat().
  */
 
+// clang-format off
 const BYTE          emitter::emitInsModeFmtTab[] =
 {
     #define INST0(id, nm, fp, um, rf, wf, mr                ) um,
@@ -822,6 +825,7 @@ const BYTE          emitter::emitInsModeFmtTab[] =
     #undef  INST4
     #undef  INST5
 };
+// clang-format on
 
 #ifdef  DEBUG
 unsigned const      emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab)/
@@ -929,6 +933,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
 inline
 size_t              insCode(instruction ins)
 {
+    // clang-format off
     const static
     size_t          insCodes[] =
     {
@@ -946,6 +951,7 @@ size_t              insCode(instruction ins)
         #undef  INST4
         #undef  INST5
     };
+    // clang-format on
 
     assert((unsigned)ins < sizeof(insCodes)/sizeof(insCodes[0]));
     assert((insCodes[ins] != BAD_CODE));
@@ -961,6 +967,7 @@ size_t              insCode(instruction ins)
 inline
 size_t              insCodeMI(instruction ins)
 {
+    // clang-format off
     const static
     size_t          insCodesMI[] =
     {
@@ -978,6 +985,7 @@ size_t              insCodeMI(instruction ins)
         #undef  INST4
         #undef  INST5
     };
+    // clang-format on
 
     assert((unsigned)ins < sizeof(insCodesMI)/sizeof(insCodesMI[0]));
     assert((insCodesMI[ins] != BAD_CODE));
@@ -993,6 +1001,7 @@ size_t              insCodeMI(instruction ins)
 inline
 size_t              insCodeRM(instruction ins)
 {
+    // clang-format off
     const static
     size_t          insCodesRM[] =
     {
@@ -1010,6 +1019,7 @@ size_t              insCodeRM(instruction ins)
         #undef  INST4
         #undef  INST5
     };
+    // clang-format on
 
     assert((unsigned)ins < sizeof(insCodesRM)/sizeof(insCodesRM[0]));
     assert((insCodesRM[ins] != BAD_CODE));
@@ -1025,6 +1035,7 @@ size_t              insCodeRM(instruction ins)
 inline
 size_t              insCodeACC(instruction ins)
 {
+    // clang-format off
     const static
     size_t          insCodesACC[] =
     {
@@ -1042,6 +1053,7 @@ size_t              insCodeACC(instruction ins)
         #undef  INST4
         #undef  INST5
     };
+    // clang-format on
 
     assert((unsigned)ins < sizeof(insCodesACC)/sizeof(insCodesACC[0]));
     assert((insCodesACC[ins] != BAD_CODE));
@@ -1057,6 +1069,7 @@ size_t              insCodeACC(instruction ins)
 inline
 size_t              insCodeRR(instruction ins)
 {
+    // clang-format off
     const static
     size_t          insCodesRR[] =
     {
@@ -1074,6 +1087,7 @@ size_t              insCodeRR(instruction ins)
         #undef  INST4
         #undef  INST5
     };
+    // clang-format on
 
     assert((unsigned)ins < sizeof(insCodesRR)/sizeof(insCodesRR[0]));
     assert((insCodesRR[ins] != BAD_CODE));
@@ -1081,6 +1095,7 @@ size_t              insCodeRR(instruction ins)
     return  insCodesRR[ins];
 }
 
+// clang-format off
 const static
 size_t          insCodesMR[] =
 {
@@ -1098,6 +1113,7 @@ size_t          insCodesMR[] =
     #undef  INST4
     #undef  INST5
 };
+// clang-format on
 
 // Returns true iff the give CPU instruction has an MR encoding.
 inline
@@ -1695,6 +1711,8 @@ UNATIVE_OFFSET      emitter::emitInsSizeSV(size_t code, int var, int dsp)
 #endif
                 {
                     // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef UNIX_AMD64_ABI
                     LclVarDsc*  varDsc = emitComp->lvaTable + var;
                     bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
@@ -1923,8 +1941,9 @@ UNATIVE_OFFSET      emitter::emitInsSizeAM(instrDesc* id, size_t code)
     {
         size = 2;
 
-        // Most 16-bit operands will require a size prefix .
+        // Most 16-bit operands will require a size prefix.
         // This refers to 66h size prefix override.
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_STACK_FP_X87
         if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
@@ -2542,10 +2561,12 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
 //
 // Post-conditions:
 //    For base address of int constant:
-//        -- the caller must have added the int constant base to the instrDesc when creating it via emitNewInstrAmdCns().
+//        -- the caller must have added the int constant base to the instrDesc when creating it via
+//           emitNewInstrAmdCns().
 //    For simple address modes (base + scale * index + offset):
 //        -- the base register, index register, and scale factor are set.
-//        -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via emitNewInstrAmdCns().
+//        -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
+//           emitNewInstrAmdCns().
 //
 //    The instruction format is set.
 //
@@ -3572,9 +3593,9 @@ void                emitter::emitIns_R_I(instruction ins,
     // Vex prefix size
     sz += emitGetVexPrefixSize(ins, attr);
 
-    // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a 64-bit sized
-    // operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target register. So we also
-    // need to check if that built-in register is an extended register.
+    // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
+    // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
+    // register. So we also need to check if that built-in register is an extended register.
     if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
     {
         sz += emitGetRexPrefixSize(ins);
@@ -3824,6 +3845,8 @@ void                emitter::emitIns_R_R   (instruction ins,
     emitAttr   size = EA_SIZE(attr);
 
     /* We don't want to generate any useless mov instructions! */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_AMD64_
     // Same-reg 4-byte mov can be useful because it performs a
     // zero-extension to 8 bytes.
@@ -4191,13 +4214,13 @@ void                emitter::emitIns_J_S    (instruction ins,
 #endif
 
 #if RELOC_SUPPORT
+#ifndef _TARGET_AMD64_
     // Storing the address of a basicBlock will need a reloc
     // as the instruction uses the absolute address,
     // not a relative address.
     //
     // On Amd64, Absolute code addresses should always go through a reloc to
     // to be encoded as RIP rel32 offset. 
-#ifndef _TARGET_AMD64_
     if (emitComp->opts.compReloc)
 #endif
     {
@@ -6686,9 +6709,9 @@ void                emitter::emitDispIns(instrDesc*   id,
         case IF_TWR_MRD:
         case IF_TRW_MRD:
 
-//      case IF_MRD_TRD:
+        // case IF_MRD_TRD:
+        // case IF_MRW_TRD:
         case IF_MWR_TRD:
-//      case IF_MRW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
         case IF_MRD_OFF:
@@ -6768,13 +6791,13 @@ void                emitter::emitDispIns(instrDesc*   id,
         }
     }
 
-//  printf("[F=%s] "   , emitIfName(id->idInsFmt()));
-//  printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
-//  printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
-//  printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
-//  printf("[A=%08X] " , emitSimpleStkMask);
-//  printf("[A=%08X] " , emitSimpleByrefStkMask);
-//  printf("[L=%02u] " , id->idCodeSize());
+    // printf("[F=%s] "   , emitIfName(id->idInsFmt()));
+    // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
+    // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
+    // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
+    // printf("[A=%08X] " , emitSimpleStkMask);
+    // printf("[A=%08X] " , emitSimpleByrefStkMask);
+    // printf("[L=%02u] " , id->idCodeSize());
 
     if  (!emitComp->opts.dspEmit && !isNew && !asmfm)
         doffs = true;
@@ -6904,9 +6927,9 @@ PRINT_CONSTANT:
     case IF_TWR_ARD:
     case IF_TRW_ARD:
 
-//  case IF_ARD_TRD:
+    // case IF_ARD_TRD:
     case IF_AWR_TRD:
-//  case IF_ARW_TRD:
+    // case IF_ARW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
         if  (ins == INS_call && id->idIsCallRegPtr())
@@ -7023,9 +7046,9 @@ PRINT_CONSTANT:
     case IF_TWR_SRD:
     case IF_TRW_SRD:
 
-//  case IF_SRD_TRD:
+    // case IF_SRD_TRD:
+    // case IF_SRW_TRD:
     case IF_SWR_TRD:
-//  case IF_SRW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
 
@@ -7327,9 +7350,9 @@ PRINT_CONSTANT:
     case IF_TWR_MRD:
     case IF_TRW_MRD:
 
-//  case IF_MRD_TRD:
+    // case IF_MRD_TRD:
+    // case IF_MRW_TRD:
     case IF_MWR_TRD:
-//  case IF_MRW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
 
@@ -9532,6 +9555,8 @@ BYTE*               emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 
             // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
             // register pointer mask.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
             GCtype gc1, gc2;
 
@@ -10120,8 +10145,8 @@ BYTE*               emitter::emitOutputIV(BYTE* dst, instrDesc* id)
 /*****************************************************************************
  *
  *  Output a local jump instruction.
- *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that needs
- *  to get bound to an actual address and processed by branch shortening.
+ *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
+ *  needs to get bound to an actual address and processed by branch shortening.
  */
 
 BYTE*               emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
@@ -10191,6 +10216,8 @@ BYTE*               emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
     if  (dstOffs <= srcOffs)
     {
         // This is a backward jump - distance is known at this point
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if     DEBUG_EMIT
         if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
         {
@@ -10920,9 +10947,9 @@ size_t              emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
     case IF_TWR_ARD:
     case IF_TRW_ARD:
 
-//  case IF_ARD_TRD:
+    // case IF_ARD_TRD:
+    // case IF_ARW_TRD:
     case IF_AWR_TRD:
-//  case IF_ARW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
 
@@ -11015,9 +11042,9 @@ size_t              emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
     case IF_TWR_SRD:
     case IF_TRW_SRD:
 
-//  case IF_SRD_TRD:
+    // case IF_SRD_TRD:
+    // case IF_SRW_TRD:
     case IF_SWR_TRD:
-//  case IF_SRW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
 
@@ -11025,6 +11052,7 @@ size_t              emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
         if  (ins == INS_pop)
         {
             // The offset in "pop [ESP+xxx]" is relative to the new ESP value
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !FEATURE_FIXED_OUT_ARGS
             emitCurStackLvl -= sizeof(int);
@@ -11126,9 +11154,9 @@ size_t              emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE**
     case IF_TWR_MRD:
     case IF_TRW_MRD:
 
-//  case IF_MRD_TRD:
+    // case IF_MRD_TRD:
+    // case IF_MRW_TRD:
     case IF_MWR_TRD:
-//  case IF_MRW_TRD:
 
 #endif // FEATURE_STACK_FP_X87
 
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 7c841176cc..7d0e648c26 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -193,7 +193,7 @@ private:
                                                       insFormat FPld,
                                                       insFormat FPst);
 
-   bool            emitVerifyEncodable(instruction  ins, 
+    bool            emitVerifyEncodable(instruction  ins, 
                                        emitAttr     size,
                                        regNumber    reg1,
                                        regNumber    reg2 = REG_NA);
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 8d59d22ff4..baf3575529 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -3463,8 +3463,8 @@ void Compiler::SwitchUniqueSuccSet::UpdateTarget(IAllocator* alloc, BasicBlock*
     }
     else if (!fromStillPresent && !toAlreadyPresent)
     {
-        // write "to" where "from" was
 #ifdef DEBUG
+        // write "to" where "from" was
         bool foundFrom = false;
 #endif // DEBUG
         for (unsigned i = 0; i < numDistinctSuccs; i++)
@@ -3483,8 +3483,8 @@ void Compiler::SwitchUniqueSuccSet::UpdateTarget(IAllocator* alloc, BasicBlock*
     else
     {
         assert(!fromStillPresent && toAlreadyPresent);
-        // remove "from".
 #ifdef DEBUG
+        // remove "from".
         bool foundFrom = false;
 #endif // DEBUG
         for (unsigned i = 0; i < numDistinctSuccs; i++)
@@ -3824,6 +3824,7 @@ void                Compiler::fgCreateGCPolls()
         // the test.  This depends on the value of opts.compGCPollType.
 
         // If we're doing GCPOLL_CALL, just insert a GT_CALL node before the last node in the block.
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         switch (block->bbJumpKind)
@@ -3912,10 +3913,10 @@ bool                Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* bl
 
     addrTrap = info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
 
+#ifdef ENABLE_FAST_GCPOLL_HELPER
     // I never want to split blocks if we've got two indirections here.
     // This is a size trade-off assuming the VM has ENABLE_FAST_GCPOLL_HELPER.
     // So don't do it when that is off
-#ifdef ENABLE_FAST_GCPOLL_HELPER
     if (pAddrOfCaptureThreadGlobal != NULL)
     {
         pollType = GCPOLL_CALL;
@@ -4025,12 +4026,15 @@ bool                Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* bl
             }
             fgRemoveStmt(top, stmt);
             fgInsertStmtAtEnd(bottom, stmt);
+            
         }
+
         // for BBJ_ALWAYS blocks, bottom is an empty block.
 
         //  4) Create a GT_EQ node that checks against g_TrapReturningThreads.  True jumps to Bottom,
         //  false falls through to poll.  Add this to the end of Top.  Top is now BBJ_COND.  Bottom is
         //  now a jump target
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef ENABLE_FAST_GCPOLL_HELPER
         // Prefer the fast gc poll helepr over the double indirection
@@ -4749,15 +4753,16 @@ DECODE_OPCODE:
 
             __fallthrough;
 
-        // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call target
-        // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll have to
-        // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline candidate
-        // so we can generate an inlined call frame. It might be nice to call getCallInfo to figure out what kind of
-        // call we have here.
         case CEE_CALLI:
         case CEE_LOCALLOC:
         case CEE_MKREFANY:
         case CEE_RETHROW:
+            // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call target
+            // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll have to
+            // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline
+            // candidate so we can generate an inlined call frame. It might be nice to call getCallInfo to figure out
+            // what kind of call we have here.
+
             //Consider making this only for not force inline.
             if (makeInlineObservations)
             {
@@ -5334,14 +5339,15 @@ DECODE_OPCODE:
 #ifndef LEGACY_BACKEND
                 if (opts.compProcedureSplitting)
                 {
-                    // TODO-CQ: We might need to create a switch table; we won't know for sure until much later. However, switch tables
-                    // don't work with hot/cold splitting, currently. The switch table data needs a relocation such that if the base
-                    // (the first block after the prolog) and target of the switch branch are put in different sections, the difference
-                    // stored in the table is updated. However, our relocation implementation doesn't support three different pointers
-                    // (relocation address, base, and target). So, we need to change our switch table implementation to be more like
-                    // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself (maybe
-                    // immediately after the switch jump), and make the "base" address be also in that section, probably the
-                    // address after the switch jump.
+                    // TODO-CQ: We might need to create a switch table; we won't know for sure until much later.
+                    // However, switch tables don't work with hot/cold splitting, currently. The switch table data needs
+                    // a relocation such that if the base (the first block after the prolog) and target of the switch
+                    // branch are put in different sections, the difference stored in the table is updated. However, our
+                    // relocation implementation doesn't support three different pointers (relocation address, base, and
+                    // target). So, we need to change our switch table implementation to be more like
+                    // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself
+                    // (maybe immediately after the switch jump), and make the "base" address be also in that section,
+                    // probably the address after the switch jump.
                     opts.compProcedureSplitting = false;
                     JITDUMP("Turning off procedure splitting for this method, as it might need switch tables; implementation limitation.\n");
                 }
@@ -5406,8 +5412,9 @@ DECODE_OPCODE:
                     bool isCallPopAndRet = false;
 
                     // impIsTailCallILPattern uses isRecursive flag to determine whether ret in a fallthrough block is
-                    // allowed. We don't know at this point whether the call is recursive so we conservatively pass false.
-                    // This will only affect explicit tail calls when IL verification is not needed for the method.
+                    // allowed. We don't know at this point whether the call is recursive so we conservatively pass
+                    // false. This will only affect explicit tail calls when IL verification is not needed for the
+                    // method.
                     bool isRecursive = false;
                     if (!impIsTailCallILPattern(tailCall, opcode, codeAddr+sz, codeEndp, isRecursive, &isCallPopAndRet))
                     {
@@ -6144,9 +6151,9 @@ void          Compiler::fgFindBasicBlocks()
         verCheckNestingLevel(initRoot);
     }
 
+#ifndef DEBUG
     // fgNormalizeEH assumes that this test has been passed.  And Ssa assumes that fgNormalizeEHTable
     // has been run.  So do this unless we're in minOpts mode (and always in debug).
-#ifndef DEBUG
     if (tiVerificationNeeded || !opts.MinOpts())
 #endif
     {
@@ -9268,7 +9275,7 @@ void Compiler::fgRemoveLinearOrderDependencies(GenTreePtr tree)
             // So don't fix their prev next links.
             if (stmt->gtStmtIsEmbedded() && stack.Height() == 2)
             {
-                //
+                // clang-format off
                 // Two cases:
                 // Case 1 (Initial case -- we are discovering the first embedded stmt):
                 // Before:
@@ -9283,7 +9290,7 @@ void Compiler::fgRemoveLinearOrderDependencies(GenTreePtr tree)
                 // Currently, "node" is emb3List and "lastNestEmbedNode" is emb2Expr.
                 // After:
                 // ... -> emb2List -> emb2Expr ->                      -> emb3List -> emb3Expr -> stmtNode -> ... -> stmtExpr
-                //
+                // clang-format on
 
                 // Drop stmtNodes that occur between emb2Expr and emb3List. 
                 if (lastNestEmbedNode)
@@ -10442,9 +10449,9 @@ void                Compiler::fgRemoveBlock(BasicBlock*   block,
         }
 #endif // DEBUG
 
+#ifdef DEBUG
         /* Some extra checks for the empty case */
 
-#ifdef DEBUG
         switch (block->bbJumpKind)
         {
         case BBJ_NONE:
@@ -11327,6 +11334,7 @@ bool                Compiler::fgEhAllowsMoveBlock(BasicBlock* bBefore,
 void Compiler::fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk)
 {
     /* We have decided to insert the block(s) after 'insertAfterBlk' */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if  (verbose)
@@ -11371,6 +11379,8 @@ BasicBlock*   Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE
 {
     INDEBUG(const char* reason = "None";)
 
+    // Figure out the range of blocks we're going to move
+
     unsigned     XTnum;
     EHblkDsc*    HBtab;
     BasicBlock*  bStart  = NULL;
@@ -11378,8 +11388,6 @@ BasicBlock*   Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE
     BasicBlock*  bLast   = NULL;
     BasicBlock*  bPrev = NULL;
 
-    // Figure out the range of blocks we're going to move
-
 #if FEATURE_EH_FUNCLETS
     // We don't support moving try regions... yet?
     noway_assert(relocateType == FG_RELOCATE_HANDLER);
@@ -11434,6 +11442,7 @@ BasicBlock*   Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE
     // 1. Verify that all the blocks in the range are either all rarely run or not rarely run.
     // When creating funclets, we ignore the run rarely flag, as we need to be able to move any blocks
     // in the range.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !FEATURE_EH_FUNCLETS
     bool isRare;
@@ -12205,6 +12214,8 @@ bool                Compiler::fgRelocateEHRegions()
 
                 // Currently it is not good to move the rarely run handler regions to the end of the method
                 // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if 0
                 // Now try to move the entire handler region if it can be moved.
                 // Don't try to move a finally handler unless we already moved the try region.
@@ -13540,6 +13551,8 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
         // replace it with a COMMA node.  In such a case we will end up with GT_JTRUE node pointing to
         // a COMMA node which results in noway asserts in fgMorphSmpOp(), optAssertionGen() and rpPredictTreeRegUse().
         // For the same reason fgMorphSmpOp() marks GT_JTRUE nodes with RELOP children as GTF_DONT_CSE.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
         if  (verbose)
         {
@@ -13555,9 +13568,11 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
                                                zeroConstNode);
         switchTree->gtOp.gtOp1  = condNode;
         switchTree->gtOp.gtOp1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
         // Re-link the nodes for this statement.
         // We know that this is safe for the Lowered form, because we will have eliminated any embedded trees
         // when we cloned the switch condition (it is also asserted above).
+
         fgSetStmtSeq(switchStmt);
         block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
         block->bbJumpKind = BBJ_COND;
@@ -16749,8 +16764,8 @@ BasicBlock*  Compiler::fgFindInsertPoint(unsigned              regionIndex,
     for (blk = startBlk; blk != endBlk; blk = blk->bbNext)
     {
         // The only way (blk == nullptr) could be true is if the caller passed an endBlk that preceded startBlk in the
-        // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar well-formedness
-        // assert earlier in this function.
+        // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar
+        // well-formedness assert earlier in this function.
         noway_assert(blk != nullptr);
 
         if (blk == nearBlk)
@@ -16923,12 +16938,12 @@ DONE:;
 // If 'putInFilter' it true, then the block is inserted in the filter region given by 'hndIndex'. In this case, tryIndex
 // must be a less nested EH region (that is, tryIndex > hndIndex).
 //
-// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner region.
-// In other words, if the try region indicated by tryIndex is nested in the handler region indicated by hndIndex,
+// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner
+// region. In other words, if the try region indicated by tryIndex is nested in the handler region indicated by hndIndex,
 // then the new BB will be created in the try region. Vice versa.
 //
-// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is, "0" is
-// "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is,
+// "0" is "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
 //
 // To be more specific, this function will create a new BB in one of the following 5 regions (if putInFilter is false):
 // 1. When tryIndex = 0 and hndIndex = 0:
@@ -16957,13 +16972,16 @@ DONE:;
 //
 // Arguments:
 //    jumpKind - the jump kind of the new block to create.
-//    tryIndex - the try region to insert the new block in, described above. This must be a number in the range [0..compHndBBtabCount].
-//    hndIndex - the handler region to insert the new block in, described above. This must be a number in the range [0..compHndBBtabCount].
-//    nearBlk  - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere in the requested region.
+//    tryIndex - the try region to insert the new block in, described above. This must be a number in the range
+//               [0..compHndBBtabCount].
+//    hndIndex - the handler region to insert the new block in, described above. This must be a number in the range
+//               [0..compHndBBtabCount].
+//    nearBlk  - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere
+//               in the requested region.
 //    putInFilter - put the new block in the filter region given by hndIndex, as described above.
 //    runRarely - 'true' if the new block is run rarely.
-//    insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only implemented when
-//                  inserting into the main function (not into any EH region).
+//    insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
+//                  implemented when inserting into the main function (not into any EH region).
 //
 // Return Value:
 //    The new block.
@@ -17495,8 +17513,8 @@ BasicBlock*         Compiler::fgAddCodeRef(BasicBlock*      srcBlk,
                             break;
 #endif // COR_JIT_EE_VERSION
 
-//  case SCK_PAUSE_EXEC:
-//      noway_assert(!"add code to pause exec");
+    // case SCK_PAUSE_EXEC:
+    //     noway_assert(!"add code to pause exec");
 
     default:
         noway_assert(!"unexpected code addition kind");
@@ -17930,12 +17948,12 @@ void                Compiler::fgSetBlockOrder()
     }
 #endif // DEBUG
 
-    /* Walk the basic blocks to assign sequence numbers */
-
 #ifdef DEBUG
     BasicBlock::s_nMaxTrees = 0;
 #endif
 
+    /* Walk the basic blocks to assign sequence numbers */
+
     /* If we don't compute the doms, then we never mark blocks as loops. */
     if (fgDomsComputed)
     {
@@ -18129,9 +18147,9 @@ BAD_LIST:;
     noway_assert(list.gtNext->gtPrev == &list);
     list.gtNext->gtPrev = NULL;
 
+#ifdef DEBUG
     /* Keep track of the highest # of tree nodes */
 
-#ifdef DEBUG
     if  (BasicBlock::s_nMaxTrees < fgTreeSeqNum)
     {
          BasicBlock::s_nMaxTrees = fgTreeSeqNum;
@@ -18693,6 +18711,7 @@ struct escapeMapping_t
     const char* sub;
 };
 
+// clang-format off
 static escapeMapping_t s_EscapeFileMapping[] =
 {
     {':', "="},
@@ -18714,6 +18733,7 @@ static escapeMapping_t s_EscapeMapping[] =
     {'"', "&quot;"},
     {0, 0}
 };
+// clang-formt on
 
 const char*   Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
 {
@@ -19096,18 +19116,21 @@ ONE_FILE_PER_METHOD:;
 //      - The command "C:\Program Files (x86)\Graphviz2.38\bin\dot.exe" -Tsvg -oFoo.svg -Kdot Foo.dot
 //        will produce a Foo.svg file that can be opened with any svg-capable browser (e.g. IE).
 //    - http://rise4fun.com/Agl/
-//      - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play button.
+//      - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play
+//        button.
 //      - It will show a rotating '/' and then render the graph in the browser.
 //    MSAGL has also been open-sourced to https://github.com/Microsoft/automatic-graph-layout.git.
 //
 //    Here are the config values that control it:
-//      COMPlus_JitDumpFg       A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs for.
+//      COMPlus_JitDumpFg       A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs
+//                              for.
 //      COMPlus_JitDumpFgDir    A path to a directory into which the flowgraphs will be dumped.
 //      COMPlus_JitDumpFgFile   The filename to use. The default is "default.[xml|dot]".
 //                              Note that the new graphs will be appended to this file if it already exists.
 //      COMPlus_JitDumpFgPhase  Phase(s) after which to dump the flowgraph.
 //                              Set to the short name of a phase to see the flowgraph after that phase.
-//                              Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all phases.
+//                              Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all
+//                              phases.
 //      COMPlus_JitDumpFgDot    Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.)
 
 bool               Compiler::fgDumpFlowGraph(Phases phase)
@@ -19769,8 +19792,9 @@ void                Compiler::fgDispBasicBlocks(BasicBlock*  firstBlock,
     padWidth = 8;
 #endif // _TARGET_AMD64_
 
-    // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as wide as necessary to accommodate
-    // all the various IBC weights. It's at least 4 characters wide, to accommodate the "IBC" title and leading space.
+    // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as
+    // wide as necessary to accommodate all the various IBC weights. It's at least 4 characters wide, to accommodate
+    // the "IBC" title and leading space.
     int ibcColWidth = 0;
     for (block = firstBlock; block != nullptr; block = block->bbNext)
     {
diff --git a/src/jit/fp.h b/src/jit/fp.h
index 44bf684362..eb8a79613d 100644
--- a/src/jit/fp.h
+++ b/src/jit/fp.h
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-
 #ifndef _JIT_FP
 
 #define _JIT_FP
@@ -12,13 +11,12 @@
 
 enum dummyFPenum
 {
-    #define REGDEF(name, rnum, mask, sname)  dummmy_##name = rnum,
-    #include "registerfp.h"
+#define REGDEF(name, rnum, mask, sname)  dummmy_##name = rnum,
+#include "registerfp.h"
 
     FP_VIRTUALREGISTERS,
 };
 
-
 // FlatFPStateX87 holds the state of the virtual register file. For each
 // virtual register we keep track to which physical register we're 
 // mapping. We also keep track of the physical stack.
@@ -42,10 +40,10 @@ public:
     unsigned                Pop                     ();
     void                    Push                    (unsigned uEntry);
     bool                    IsEmpty                 ();
-            
+
     // Debug/test methods
     static bool             AreEqual                (FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
-    #ifdef DEBUG    
+#ifdef DEBUG
     bool                    IsValidEntry            (unsigned uEntry);
     bool                    IsConsistent            ();
     void                    UpdateMappingFromStack  ();
@@ -60,16 +58,16 @@ public:
     {
         m_bIgnoreConsistencyChecks = bIgnore;
     }
-    #else
+#else
     inline void IgnoreConsistencyChecks(bool bIgnore) 
-    {       
-    }    
-    #endif
+    {
+    }
+#endif
 
     unsigned                m_uVirtualMap[FP_VIRTUALREGISTERS];
     unsigned                m_uStack[FP_PHYSICREGISTERS];
     unsigned                m_uStackSize;
-};    
-    
+};
+
 #endif // FEATURE_STACK_FP_X87
 #endif
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
index d515d78c4b..ec0eba3fe3 100644
--- a/src/jit/gcencode.cpp
+++ b/src/jit/gcencode.cpp
@@ -1086,6 +1086,7 @@ size_t              GCInfo::gcInfoBlockHdrSave(BYTE*      dest,
 #endif
 
     /* Write the method size first (using between 1 and 5 bytes) */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef  DEBUG
     if (compiler->verbose)
@@ -3735,6 +3736,8 @@ void                GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
                 // pointers" section of the GC info even if lvTracked==true
 
                 // Has this argument been fully enregistered?
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
                 if (!varDsc->lvOnFrame)
 #else // LEGACY_BACKEND
@@ -3771,7 +3774,8 @@ void                GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
             }
 
             // If we haven't continued to the next variable, we should report this as an untracked local.
-            
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if DOUBLE_ALIGN
             // For genDoubleAlign(), locals are addressed relative to ESP and
             // arguments are addressed relative to EBP.
@@ -4390,6 +4394,7 @@ void                GCInfo::gcMarkFilterVarsPinned()
                         //     (2) a regular one for after the filter
                         // and then adjust the original lifetime to end before
                         // the filter.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                         if (compiler->verbose)
@@ -4431,6 +4436,7 @@ void                GCInfo::gcMarkFilterVarsPinned()
                         // somewhere inside it, so we only create 1 new lifetime,
                         // and then adjust the original lifetime to end before
                         // the filter.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
                         if (compiler->verbose)
@@ -4469,7 +4475,7 @@ void                GCInfo::gcMarkFilterVarsPinned()
                         // lifetime for the part inside the filter and adjust
                         // the start of the original lifetime to be the end
                         // of the filter
-
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (compiler->verbose)
                         {
@@ -4501,6 +4507,7 @@ void                GCInfo::gcMarkFilterVarsPinned()
                     {
                         // The variable lifetime is completely within the filter,
                         // so just add the pinned flag.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (compiler->verbose)
                         {
@@ -4508,6 +4515,7 @@ void                GCInfo::gcMarkFilterVarsPinned()
                             gcDumpVarPtrDsc(varTmp);
                         }
 #endif // DEBUG
+
                         varTmp->vpdVarNum |= pinned_OFFSET_FLAG;
 #ifdef DEBUG
                         if (compiler->verbose)
diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp
index c20eb9e345..e458e86d19 100644
--- a/src/jit/gcinfo.cpp
+++ b/src/jit/gcinfo.cpp
@@ -420,6 +420,8 @@ void                GCInfo::gcCountForHeader(UNALIGNED unsigned int * untrackedC
                  */
 
                 /* Has this argument been fully enregistered? */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
                 if (!varDsc->lvOnFrame)
 #else // LEGACY_BACKEND
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 8eafd68700..bf0b0f7785 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -79,10 +79,14 @@ genTreeOps        GenTree::OpAsgToOper(genTreeOps op)
 //    by the caller of the Push() method.
 
 enum IndentChars {ICVertical, ICBottom, ICTop, ICMiddle, ICDash, ICEmbedded, ICTerminal, ICError, IndentCharCount };
+
+// clang-format off
 // Sets of strings for different dumping options            vert             bot             top             mid             dash       embedded    terminal    error
 static const char*  emptyIndents[IndentCharCount]   = {     " ",             " ",            " ",            " ",            " ",           "{",      "",        "?"  };
 static const char*  asciiIndents[IndentCharCount]   = {     "|",            "\\",            "/",            "+",            "-",           "{",      "*",       "?"  };
 static const char*  unicodeIndents[IndentCharCount] = { "\xe2\x94\x82", "\xe2\x94\x94", "\xe2\x94\x8c", "\xe2\x94\x9c", "\xe2\x94\x80",     "{", "\xe2\x96\x8c", "?"  };
+// clang-format on
+
 typedef ArrayStack<Compiler::IndentInfo> IndentInfoStack;
 struct IndentStack
 {
@@ -237,11 +241,12 @@ void                GenTree::InitNodeSize()
     }
 
     // Now set all of the appropriate entries to 'large'
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
     // On ARM32, ARM64 and System V for struct returning 
     // there is code that does GT_ASG-tree.CopyObj call.
     // CopyObj is a large node and the GT_ASG is small, which triggers an exception.
-#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
     GenTree::s_gtNodeSizes[GT_ASG             ] = TREE_NODE_SZ_LARGE;
     GenTree::s_gtNodeSizes[GT_RETURN          ] = TREE_NODE_SZ_LARGE;
 #endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
@@ -3525,9 +3530,9 @@ COMMON_CNS:
 
             case GT_ADDR:
 
+#if FEATURE_STACK_FP_X87
                 /* If the operand was floating point, pop the value from the stack */
 
-#if FEATURE_STACK_FP_X87
                 if (varTypeIsFloating(op1->TypeGet()))
                 {
                     codeGen->genDecrementFPstkLevel();
@@ -3794,6 +3799,8 @@ COMMON_CNS:
                         //   [base + idx * mul + cns]  // mul can be 0, 2, 4, or 8
                         // Note that mul == 0 is semantically equivalent to mul == 1.
                         // Note that cns can be zero.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if SCALED_ADDR_MODES
                         assert((base != NULL) || (idx != NULL && mul >= 2));
 #else
@@ -4154,13 +4161,13 @@ COMMON_CNS:
         costSz += (op1->gtCostSz + op2->gtCostSz);
 
     DONE_OP1_AFTER_COST:
+#if FEATURE_STACK_FP_X87
         /*
             Binary FP operators pop 2 operands and produce 1 result;
             FP comparisons pop 2 operands and produces 0 results.
             assignments consume 1 value and don't produce anything.
          */
 
-#if FEATURE_STACK_FP_X87
         if  (isflt && !tree->IsPhiDefn())
         {
             assert(oper != GT_COMMA);
@@ -4613,12 +4620,12 @@ COMMON_CNS:
 #endif
 #endif
 
+#if GTF_CALL_REG_SAVE
         // Normally function calls don't preserve caller save registers 
         //   and thus are much more expensive.
         // However a few function calls do preserve these registers
         //   such as the GC WriteBarrier helper calls.
 
-#if GTF_CALL_REG_SAVE
         if  (!(tree->gtFlags & GTF_CALL_REG_SAVE))
 #endif
         {
@@ -4722,7 +4729,7 @@ COMMON_CNS:
 DONE:
 
 #if FEATURE_STACK_FP_X87
-//  printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
+    // printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
     noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
     tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
 
@@ -5372,8 +5379,9 @@ GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
 #define GTSTRUCT_4(nm, tag, tag2, tag3, tag4) /*handle explicitly*/
 #define GTSTRUCT_N(nm, ...) /*handle explicitly*/
 #include "gtstructs.h"
-        // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
+
 #if !FEATURE_EH_FUNCLETS
+        // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
     case GT_END_LFIN: 
     case GT_JMP:
         { GenTreeVal gt(GT_JMP, TYP_INT, 0); res = *reinterpret_cast<VtablePtr*>(&gt); break; }
@@ -5822,7 +5830,7 @@ GenTreePtr          Compiler::gtNewLclLNode(unsigned   lnum,
 #if SMALL_TREE_NODES
     /* This local variable node may later get transformed into a large node */
 
-//    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+    // assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
 
     GenTreePtr node = new(this, GT_CALL) GenTreeLclVar(type, lnum, ILoffs
                                                        DEBUGARG(/*largeNode*/true));
@@ -6673,6 +6681,7 @@ GenTreePtr          Compiler::gtCloneExpr(GenTree * tree,
     if  (kind & GTK_SMPOP)
     {
         /* If necessary, make sure we allocate a "fat" tree node */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if SMALL_TREE_NODES
         switch (oper)
@@ -7490,9 +7499,9 @@ bool                GenTree::gtRequestSetFlags()
 {
     bool result = false;
 
+#if FEATURE_SET_FLAGS
     // This method is a Nop unless FEATURE_SET_FLAGS is defined
 
-#if FEATURE_SET_FLAGS
     // In order to set GTF_SET_FLAGS 
     //              we must have a GTK_SMPOP
     //          and we have a integer or machine size type (not floating point or TYP_LONG on 32-bit)
@@ -9854,8 +9863,8 @@ GenTreePtr          Compiler::gtDispLinearTree(GenTreeStmt* curStmt,
                     // get child msg
                     if (tree->IsCall())
                     {
-                        // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in multiple registers)
-                        // print the nodes of the nested list and continue to the next argument.
+                        // If this is a call and the arg (listElem) is a GT_LIST (Unix LCL_FLD for passing a var in
+                        // multiple registers) print the nodes of the nested list and continue to the next argument.
                         if (listElem->gtOper == GT_LIST)
                         {
                             int listCount = 0;
@@ -10795,6 +10804,7 @@ CHK_OVF:
                     // Cross-compilation is an issue here; if that becomes an important scenario, we should
                     // capture the target-specific values of overflow casts to the various integral types as
                     // constants in a target-specific function.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_XARCH_
                     // Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper
@@ -11229,6 +11239,8 @@ LNG_OVF:
         // expect long constants to show up as an operand of overflow cast operation.
         //
         // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
         if (!fgGlobalMorph)
         {
@@ -11245,6 +11257,7 @@ LNG_OVF:
         goto OVF;
 
 INT_OVF:
+#ifndef LEGACY_BACKEND
         // Don't fold overflow operations if not global morph phase.
         // The reason for this is that this optimization is replacing a gentree node
         // with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
@@ -11260,7 +11273,7 @@ INT_OVF:
         // expect long constants to show up as an operand of overflow cast operation.
         //
         // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
-#ifndef LEGACY_BACKEND
+
         if (!fgGlobalMorph)
         {
             assert(tree->gtOverflow());
@@ -11441,10 +11454,11 @@ LNG_ADD_CHKOVF:
 
                         // TODO-Amd64-Unix: Remove the code that disables optimizations for this method when the clang 
                         // optimizer is fixed and/or the method implementation is refactored in a simpler code.
-                        // There is a bug in the clang-3.5 optimizer. The issue is that in release build the optimizer is mistyping 
-                        // (or just wrongly decides to use 32 bit operation for a corner case of MIN_LONG) the args of the (ltemp / lval2)
-                        // to int (it does a 32 bit div operation instead of 64 bit.)
-                        // For the case of lval1 and lval2 equal to MIN_LONG (0x8000000000000000) this results in raising a SIGFPE.
+                        // There is a bug in the clang-3.5 optimizer. The issue is that in release build the
+                        // optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner
+                        // case of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div
+                        // operation instead of 64 bit.). For the case of lval1 and lval2 equal to MIN_LONG
+                        // (0x8000000000000000) this results in raising a SIGFPE.
                         // Optimizations disabled for now. See compiler.h.
                         if ((ltemp/lval2) != lval1) goto LNG_OVF;
                     }
@@ -11597,7 +11611,8 @@ CNS_LONG:
         //
         // Example:
         // float a = float.MaxValue;
-        // float b = a*a;   This will produce +inf in single precision and 1.1579207543382391e+077 in double precision.
+        // float b = a*a;   This will produce +inf in single precision and 1.1579207543382391e+077 in double
+        //                  precision.
         // flaot c = b/b;   This will produce NaN in single precision and 1 in double precision.
         case GT_ADD:
             if (op1->TypeGet() == TYP_FLOAT)
@@ -12475,8 +12490,8 @@ bool            Compiler::gtHasCatchArg(GenTreePtr tree)
 //------------------------------------------------------------------------
 void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* parentStack)
 {
-    // We only need to Quirk for _TARGET_64BIT_
 #ifdef _TARGET_64BIT_
+    // We only need to Quirk for _TARGET_64BIT_
 
     // Do we have a parent node that is a Call?
     if (!Compiler::gtHasCallOnStack(parentStack))
@@ -13109,6 +13124,8 @@ bool GenTree::DefinesLocalAddr(Compiler* comp, unsigned width, GenTreeLclVarComm
         // that we don't miss 'use' of any local.  The below logic is making the assumption
         // that in case of LEA(base, index, offset) - only base can be a GT_LCL_VAR_ADDR
         // and index is not.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
         GenTreePtr index = gtOp.gtOp2;
         if (index != nullptr)
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index ef98214b54..4cfd1e6d13 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -28,10 +28,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "nodeinfo.h"
 #include "simd.h"
 
-// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type it's got.
-// This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the build to define DEBUGGABLE_GENTREE=1,
-// as well as pass /OPT:NOICF to the linker (or else all the vtables get merged, making the debugging value supplied by them useless).
-// See protojit.nativeproj for a commented example of setting the build flags correctly.
+// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type
+// it's got. This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the
+// build to define DEBUGGABLE_GENTREE=1, as well as pass /OPT:NOICF to the linker (or else all the vtables get merged,
+// making the debugging value supplied by them useless). See protojit.nativeproj for a commented example of setting the
+// build flags correctly.
 #ifndef DEBUGGABLE_GENTREE
 #ifdef DEBUG
 #define DEBUGGABLE_GENTREE  1
@@ -72,14 +73,13 @@ DECLARE_TYPED_ENUM(genTreeOps,BYTE)
 
     GT_COUNT,
 
+#ifdef _TARGET_64BIT_
     // GT_CNS_NATIVELONG is the gtOper symbol for GT_CNS_LNG or GT_CNS_INT, depending on the target.
     // For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
-    // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
-    // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
-    //
-#ifdef _TARGET_64BIT_
     GT_CNS_NATIVELONG = GT_CNS_INT,
 #else
+    // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+    // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
     GT_CNS_NATIVELONG = GT_CNS_LNG,
 #endif
 }
@@ -435,6 +435,7 @@ private:
     //
     // Register or register pair number of the node.
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
 public:
@@ -744,7 +745,7 @@ public:
 
     #define GTF_CALL_UNMANAGED  0x80000000  // GT_CALL    -- direct call to unmanaged code
     #define GTF_CALL_INLINE_CANDIDATE 0x40000000 // GT_CALL -- this call has been marked as an inline candidate
-//  
+
     #define GTF_CALL_VIRT_KIND_MASK  0x30000000 
     #define GTF_CALL_NONVIRT         0x00000000  // GT_CALL    -- a non virtual call
     #define GTF_CALL_VIRT_STUB       0x10000000  // GT_CALL    -- a stub-dispatch virtual call
@@ -765,25 +766,31 @@ public:
     #define GTF_INX_REFARR_LAYOUT 0x20000000  // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT
     #define GTF_INX_STRING_LAYOUT 0x40000000  // GT_INDEX -- this uses the special string array layout
 
-    #define GTF_IND_VOLATILE      0x40000000  // GT_IND   -- the load or store must use volatile sematics (this is a nop on X86)
+    #define GTF_IND_VOLATILE      0x40000000  // GT_IND   -- the load or store must use volatile sematics (this is a nop
+                                              //             on X86)
     #define GTF_IND_REFARR_LAYOUT 0x20000000  // GT_IND   -- the array holds object refs (only effects layout of Arrays)
     #define GTF_IND_TGTANYWHERE   0x10000000  // GT_IND   -- the target could be anywhere
     #define GTF_IND_TLS_REF       0x08000000  // GT_IND   -- the target is accessed via TLS
-    #define GTF_IND_ASG_LHS       0x04000000  // GT_IND   -- this GT_IND node is (the effective val) of the LHS of an assignment; don't evaluate it independently.
-    #define GTF_IND_UNALIGNED     0x02000000  // GT_IND   -- the load or store is unaligned (we assume worst case alignment of 1 byte) 
+    #define GTF_IND_ASG_LHS       0x04000000  // GT_IND   -- this GT_IND node is (the effective val) of the LHS of an
+                                              //             assignment; don't evaluate it independently.
+    #define GTF_IND_UNALIGNED     0x02000000  // GT_IND   -- the load or store is unaligned (we assume worst case
+                                              //             alignment of 1 byte) 
     #define GTF_IND_INVARIANT     0x01000000  // GT_IND   -- the target is invariant (a prejit indirection)
-    #define GTF_IND_ARR_LEN       0x80000000  // GT_IND   -- the indirection represents an array length (of the REF contribution to its argument).
+    #define GTF_IND_ARR_LEN       0x80000000  // GT_IND   -- the indirection represents an array length (of the REF
+                                              //             contribution to its argument).
     #define GTF_IND_ARR_INDEX     0x00800000  // GT_IND   -- the indirection represents an (SZ) array index
 
     #define GTF_IND_FLAGS         (GTF_IND_VOLATILE|GTF_IND_REFARR_LAYOUT|GTF_IND_TGTANYWHERE|GTF_IND_NONFAULTING|\
                                    GTF_IND_TLS_REF|GTF_IND_UNALIGNED|GTF_IND_INVARIANT|GTF_IND_ARR_INDEX)
 
-    #define GTF_CLS_VAR_ASG_LHS   0x04000000  // GT_CLS_VAR   -- this GT_CLS_VAR node is (the effective val) of the LHS of an assignment; don't evaluate it independently.
+    #define GTF_CLS_VAR_ASG_LHS   0x04000000  // GT_CLS_VAR   -- this GT_CLS_VAR node is (the effective val) of the LHS
+                                              //                 of an assignment; don't evaluate it independently.
 
     #define GTF_ADDR_ONSTACK      0x80000000  // GT_ADDR    -- this expression is guaranteed to be on the stack
 
 
-    #define GTF_ADDRMODE_NO_CSE 0x80000000  // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex addressing mode
+    #define GTF_ADDRMODE_NO_CSE 0x80000000  // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex
+                                            //                         addressing mode
 
     #define GTF_MUL_64RSLT      0x40000000  // GT_MUL     -- produce 64-bit result
 
@@ -796,12 +803,15 @@ public:
     #define GTF_RELOP_NAN_UN    0x80000000  // GT_<relop> -- Is branch taken if ops are NaN?
     #define GTF_RELOP_JMP_USED  0x40000000  // GT_<relop> -- result of compare used for jump or ?:
     #define GTF_RELOP_QMARK     0x20000000  // GT_<relop> -- the node is the condition for ?:
-    #define GTF_RELOP_SMALL     0x10000000  // GT_<relop> -- We should use a byte or short sized compare (op1->gtType is the small type)
-    #define GTF_RELOP_ZTT       0x08000000  // GT_<relop> -- Loop test cloned for converting while-loops into do-while with explicit "loop test" in the header block.
+    #define GTF_RELOP_SMALL     0x10000000  // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
+                                            //               is the small type)
+    #define GTF_RELOP_ZTT       0x08000000  // GT_<relop> -- Loop test cloned for converting while-loops into do-while
+                                            //               with explicit "loop test" in the header block.
 
-    #define GTF_QMARK_CAST_INSTOF 0x80000000  // GT_QMARK   -- Is this a top (not nested) level qmark created for castclass or instanceof?
+    #define GTF_QMARK_CAST_INSTOF 0x80000000  // GT_QMARK -- Is this a top (not nested) level qmark created for
+                                              //             castclass or instanceof?
 
-    #define GTF_BOX_VALUE 0x80000000  // GT_BOX   -- "box" is on a value type
+    #define GTF_BOX_VALUE 0x80000000  // GT_BOX -- "box" is on a value type
 
     #define GTF_ICON_HDL_MASK   0xF0000000  // Bits used by handle types below
 
@@ -838,7 +848,8 @@ public:
 
     #define GTF_STMT_CMPADD     0x80000000  // GT_STMT    -- added by compiler
     #define GTF_STMT_HAS_CSE    0x40000000  // GT_STMT    -- CSE def or use was subsituted
-    #define GTF_STMT_TOP_LEVEL  0x20000000  // GT_STMT    -- Top-level statement - true iff gtStmtList->gtPrev == nullptr
+    #define GTF_STMT_TOP_LEVEL  0x20000000  // GT_STMT    -- Top-level statement - 
+                                            //               true iff gtStmtList->gtPrev == nullptr
                                             //               True for all stmts when in FGOrderTree
     #define GTF_STMT_SKIP_LOWER 0x10000000  // GT_STMT    -- Skip lowering if we already lowered an embedded stmt.
 
@@ -1507,8 +1518,8 @@ public:
 
     // Requires "this" to be a GT_IND.  Requires the outermost caller to set "*pFldSeq" to nullptr.
     // Returns true if it is an array index expression, or access to a (sequence of) struct field(s)
-    // within a struct array element.  If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq to the sequence
-    // of struct field accesses.
+    // within a struct array element.  If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq
+    // to the sequence of struct field accesses.
     bool ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
 
     // Requires "this" to be the address of a (possible) array element (or struct field within that).
@@ -1520,8 +1531,8 @@ public:
     // returns true and sets "*pFldSeq" to the sequence of fields with which those constants are annotated.
     bool ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq);
 
-    // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of an affine expression, to the value of the
-    // of the index.
+    // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of
+    // an affine expression, to the value of the of the index.
     void LabelIndex(Compiler* comp, bool isConst = true);
 
     // Assumes that "this" occurs in a context where it is being dereferenced as the LHS of an assignment-like
@@ -2539,11 +2550,11 @@ struct GenTreeCall final : public GenTree
     regMaskTP         gtCallRegUsedMask;      // mask of registers used to pass parameters
 #endif // LEGACY_BACKEND
 
+#if FEATURE_MULTIREG_RET
     // State required to support multi-reg returning call nodes.
     // For now it is enabled only for x64 unix.
     //
     // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
-#if FEATURE_MULTIREG_RET
     ReturnTypeDesc    gtReturnTypeDesc;
 
     // gtRegNum would always be the first return reg.
@@ -3476,7 +3487,8 @@ struct GenTreeAddrMode: public GenTreeOp
     // So, for example:
     //      1. Base + Index is legal with Scale==1
     //      2. If Index is null, Scale should be zero (or unintialized / unused)
-    //      3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of "Index*Scale + Offset".
+    //      3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of
+    //         "Index*Scale + Offset".
 
     // First operand is base address/pointer
     bool            HasBase() const     { return gtOp1 != nullptr; }
@@ -3997,10 +4009,10 @@ struct GenTreePutArgStk: public GenTreeUnOp
 // Represents GT_COPY or GT_RELOAD node
 struct GenTreeCopyOrReload : public GenTreeUnOp
 {
+#if FEATURE_MULTIREG_RET
     // State required to support copy/reload of a multi-reg call node.
     // First register is is always given by gtRegNum.
     //
-#if FEATURE_MULTIREG_RET
     regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
 #endif
 
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index ee33797234..b56952a4b2 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
 /*****************************************************************************/
 #ifndef GTNODE
 #error  Define GTNODE before including this file.
@@ -246,3 +247,4 @@ GTNODE(SWAP         , "swap"          ,0,GTK_BINOP)          // op1 and op2 swap
 /*****************************************************************************/
 #undef  GTNODE
 /*****************************************************************************/
+// clang-format on
diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h
index 06c5b9816f..c76e69f417 100644
--- a/src/jit/gtstructs.h
+++ b/src/jit/gtstructs.h
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
 
 /*****************************************************************************/
 
@@ -108,3 +109,5 @@ GTSTRUCT_1(SIMD        , GT_SIMD)
 #undef  GTSTRUCT_4
 #undef  GTSTRUCT_N
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/hashbv.cpp b/src/jit/hashbv.cpp
index d3e2c5ed9c..33822144d2 100644
--- a/src/jit/hashbv.cpp
+++ b/src/jit/hashbv.cpp
@@ -1723,7 +1723,7 @@ void hashBv::InorderTraverseTwo(hashBv *other, dualNodeAction a)
       
     }
     delete[] nodesThis;
-    delete[] nodesOther;;
+    delete[] nodesOther;
 }
 
 
diff --git a/src/jit/hashbv.h b/src/jit/hashbv.h
index 1c6803af66..d2d15559c2 100644
--- a/src/jit/hashbv.h
+++ b/src/jit/hashbv.h
@@ -322,6 +322,7 @@ public:
 
 indexType HbvNext(hashBv *bv, Compiler *comp);
 
+// clang-format off
 #define FOREACH_HBV_BIT_SET(index, bv) \
     { \
         for (int hashNum=0; hashNum<(bv)->hashtable_size(); hashNum++) {\
@@ -344,7 +345,7 @@ indexType HbvNext(hashBv *bv, Compiler *comp);
             }\
         }\
     } \
-
+//clang-format on
 
 #ifdef DEBUG
 void SimpleDumpNode(hashBvNode *n);
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index 98f8183b7a..bf7afa7b42 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -80,8 +80,8 @@ void                Compiler::impPushOnStack(GenTreePtr tree, typeInfo ti)
         BADCODE("stack overflow");
     }
 
-        // If we are pushing a struct, make certain we know the precise type!
 #ifdef DEBUG
+    // If we are pushing a struct, make certain we know the precise type!
     if (tree->TypeGet() == TYP_STRUCT)
     {
         assert(ti.IsType(TI_STRUCT));
@@ -3699,14 +3699,17 @@ void    Compiler::verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool l
     // The rationale behind this workaround is to avoid modifying the VM and maintain compatibility between JIT64 and
     // RyuJIT for the time being until we completely replace JIT64.
     // TODO-ARM64-Cleanup:  We probably want to actually modify the VM in the future to avoid the unnecesary two passes.
-#ifdef _TARGET_64BIT_
 
-#ifdef DEBUG
     // In AMD64 we must make sure we're behaving the same way as JIT64, meaning we should only raise the verification 
     // exception if we are only importing and verifying.  The method verNeedsVerification() can also modify the
     // tiVerificationNeeded flag in the case it determines it can 'skip verification' during importation and defer it
     // to a runtime check. That's why we must assert one or the other (since the flag tiVerificationNeeded can 
     // be turned off during importation).
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+
+#ifdef DEBUG
     bool canSkipVerificationResult = info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) != CORINFO_VERIFICATION_CANNOT_SKIP;
     assert(tiVerificationNeeded || canSkipVerificationResult);
 #endif // DEBUG
@@ -4349,8 +4352,8 @@ DONE_ARGS:
             else
             {
                 // We allow direct calls to value type constructors
-                // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a constrained
-                // callvirt to illegally re-enter a .ctor on a value of reference type.
+                // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a
+                // constrained callvirt to illegally re-enter a .ctor on a value of reference type.
                 VerifyOrReturn(tiThis.IsByRef() && DereferenceByRef(tiThis).IsValueClass(), "Bad call to a constructor");
             }
         }
@@ -4907,6 +4910,7 @@ void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken,
     // The non-varargs helper is enabled for CoreRT only for now. Enabling this 
     // unconditionally would require ReadyToRun version bump.
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if COR_JIT_EE_VERSION > 460
     if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI))
@@ -5783,8 +5787,8 @@ bool                Compiler::impIsTailCallILPattern(bool tailPrefixed,
     if (isRecursive)
 #endif
     {
-        // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the sequence.
-        // Make sure we don't go past the end of the IL however.
+        // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the
+        // sequence. Make sure we don't go past the end of the IL however.
         codeEnd = min(codeEnd + 1, info.compCode + info.compILCodeSize);
     }
 
@@ -6486,8 +6490,8 @@ var_types  Compiler::impImportCall(OPCODE                  opcode,
         call->gtType = callRetTyp;
     }
 
-    /* Check for varargs */
 #if !FEATURE_VARARG
+    /* Check for varargs */
     if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
         (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
     {
@@ -6510,6 +6514,7 @@ var_types  Compiler::impImportCall(OPCODE                  opcode,
            tailcall to a function with a different number of arguments, we
            are hosed. There are ways around this (caller remembers esp value,
            varargs is not caller-pop, etc), but not worth it. */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_X86_
         if (canTailCall)
@@ -6809,8 +6814,8 @@ var_types  Compiler::impImportCall(OPCODE                  opcode,
         extraArg = gtNewArgList(instParam);
     }
 
-    // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular to inline
-    // 'polytypic' operations such as static field accesses, type tests and method calls which
+    // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular
+    // to inline 'polytypic' operations such as static field accesses, type tests and method calls which
     // rely on the exact context. The exactContextHnd is passed back to the JitInterface at appropriate points.
     // exactContextHnd is not currently required when inlining shared generic code into shared 
     // generic code, since the inliner aborts whenever shared code polytypic operations are encountered
@@ -6989,9 +6994,9 @@ DONE:
             BADCODE("Stack should be empty after tailcall");
 #endif //!_TARGET_64BIT_
         }
-        
-//      assert(compCurBB is not a catch, finally or filter block);
-//      assert(compCurBB is not a try block protected by a finally block);
+
+        // assert(compCurBB is not a catch, finally or filter block);
+        // assert(compCurBB is not a try block protected by a finally block);
 
         // Check for permission to tailcall
         bool  explicitTailCall = (tailCall & PREFIX_TAILCALL_EXPLICIT) != 0;
@@ -7825,7 +7830,8 @@ void                Compiler::impImportLeave(BasicBlock * block)
                                                       0,
                                                       step);
                 assert(step->bbJumpKind == BBJ_ALWAYS);
-                step->bbJumpDest    = callBlock; // the previous call to a finally returns to this call (to the next finally in the chain)
+                step->bbJumpDest    = callBlock; // the previous call to a finally returns to this call (to the next
+                                                 // finally in the chain)
                 step->bbJumpDest->bbRefs++;
 
                 /* The new block will inherit this block's weight */
@@ -8152,19 +8158,21 @@ void                Compiler::impImportLeave(BasicBlock * block)
             }
             else
             {
-                // Calling the finally block. We already have a step block that is either the call-to-finally from a more nested
-                // try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by a 'finally'), or the step
-                // block is the return from a catch.
+                // Calling the finally block. We already have a step block that is either the call-to-finally from a
+                // more nested try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by
+                // a 'finally'), or the step block is the return from a catch.
                 // 
-                // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block directly. Note that if a
-                // 'catch' ends without resetting the ThreadAbortException, the VM will automatically re-raise the exception, using the
-                // return address of the catch (that is, the target block of the BBJ_EHCATCHRET) as the re-raise address. If this address
-                // is in a finally, the VM will refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On
-                // AMD64/ARM64, we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a finally clause
-                // to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a BBJ_CALLFINALLY directly. (Note that on ARM32,
-                // we don't mark the thunk specially -- it lives directly within the 'try' region protected by the finally, since we generate
-                // code in such a way that execution never returns to the call-to-finally call, and the finally-protected 'try' region doesn't
-                // appear on stack walks.)
+                // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block
+                // directly. Note that if a 'catch' ends without resetting the ThreadAbortException, the VM will
+                // automatically re-raise the exception, using the return address of the catch (that is, the target
+                // block of the BBJ_EHCATCHRET) as the re-raise address. If this address is in a finally, the VM will
+                // refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On AMD64/ARM64,
+                // we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a
+                // finally clause to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a
+                // BBJ_CALLFINALLY directly. (Note that on ARM32, we don't mark the thunk specially -- it lives directly
+                // within the 'try' region protected by the finally, since we generate code in such a way that execution
+                // never returns to the call-to-finally call, and the finally-protected 'try' region doesn't appear on
+                // stack walks.)
 
                 assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
 
@@ -8201,7 +8209,8 @@ void                Compiler::impImportLeave(BasicBlock * block)
 #endif // !FEATURE_EH_CALLFINALLY_THUNKS
 
                 callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step);
-                step->bbJumpDest    = callBlock; // the previous call to a finally returns to this call (to the next finally in the chain)
+                step->bbJumpDest    = callBlock; // the previous call to a finally returns to this call (to the next
+                                                 // finally in the chain)
                 step->bbJumpDest->bbRefs++;
 
 #if defined(_TARGET_ARM_)
@@ -8252,17 +8261,18 @@ void                Compiler::impImportLeave(BasicBlock * block)
             // We are jumping out of a catch-protected try.
             //
             // If we are returning from a call to a finally, then we must have a step block within a try
-            // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the finally
-            // raises an exception), the VM will find this step block, notice that it is in a protected region, and invoke
-            // the appropriate catch.
+            // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the
+            // finally raises an exception), the VM will find this step block, notice that it is in a protected region,
+            // and invoke the appropriate catch.
             //
             // We also need to handle a special case with the handling of ThreadAbortException. If a try/catch
             // catches a ThreadAbortException (which might be because it catches a parent, e.g. System.Exception),
             // and the catch doesn't call System.Threading.Thread::ResetAbort(), then when the catch returns to the VM,
-            // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target address
-            // of the catch return as the new exception address. That is, the re-raised exception appears to occur at
-            // the catch return address. If this exception return address skips an enclosing try/catch that catches
-            // ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should. For example:
+            // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target
+            // address of the catch return as the new exception address. That is, the re-raised exception appears to
+            // occur at the catch return address. If this exception return address skips an enclosing try/catch that
+            // catches ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should.
+            // For example:
             //
             // try {
             //    try {
@@ -8424,12 +8434,12 @@ void                Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmp
     //  } finally { }
     //  OUTSIDE: 
     //
-    // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block where
-    // a finally would branch to (and such block is marked as finally target).  Block B1 branches to step block.  Because
-    // of re-import of B0, Bstep is also orphaned.   Since Bstep is a finally target it cannot be removed.  To work around
-    // this we will duplicate B0 (call it B0Dup) before reseting.  B0Dup is marked as BBJ_CALLFINALLY and only serves to pair
-    // up with B1 (BBJ_ALWAYS) that got orphaned.  Now during orphan block deletion B0Dup and B1 will be treated as pair
-    // and handled correctly.
+    // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block
+    // where a finally would branch to (and such block is marked as finally target).  Block B1 branches to step block.
+    // Because of re-import of B0, Bstep is also orphaned. Since Bstep is a finally target it cannot be removed.  To
+    // work around this we will duplicate B0 (call it B0Dup) before reseting. B0Dup is marked as BBJ_CALLFINALLY and
+    // only serves to pair up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1
+    // will be treated as pair and handled correctly.
     if (block->bbJumpKind == BBJ_CALLFINALLY)
     {
         BasicBlock *dupBlock = bbNewBasicBlock(block->bbJumpKind);
@@ -8599,6 +8609,7 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
             // <BUGNUM> VSW 318822 </BUGNUM>
             //                  
             // So here we decide to make the resulting type to be a native int.
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_64BIT_
             if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
@@ -9260,8 +9271,8 @@ APPEND:
 
 DONE_APPEND:
 
-            // Remember at which BC offset the tree was finished
 #ifdef DEBUG
+            // Remember at which BC offset the tree was finished
             impNoteLastILoffs();
 #endif
             break;
@@ -9527,7 +9538,8 @@ _PopValue:
                 {
                     // This is a sequence of (ldloc, dup, stloc).  Can simplify
                     // to (ldloc, stloc).  Goto LDVAR to reconstruct the ldloc node.
-                    
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                     if (tiVerificationNeeded)
                     {
@@ -9603,8 +9615,8 @@ _PopValue:
                 // From SPILL_APPEND
                 impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
                 
-                // From DONE_APPEND
 #ifdef DEBUG
+                // From DONE_APPEND
                 impNoteLastILoffs();
 #endif
                 op1 = NULL;
@@ -10895,8 +10907,8 @@ CMP_2_OPs:
             op1 = impPopStack().val;
             assertImp(genActualTypeIsIntOrI(op1->TypeGet()));
 
-            // Widen 'op1' on 64-bit targets
 #ifdef _TARGET_64BIT_
+            // Widen 'op1' on 64-bit targets
             if (op1->TypeGet() != TYP_I_IMPL)
             {
                 if (op1->OperGet() == GT_CNS_INT)
@@ -12599,16 +12611,21 @@ FIELD_DONE:
                     op1->gtFlags |= GTF_IND_UNALIGNED;
                 }
 
-                /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full trust apps).  The reason this works is
-                   that JIT stores an i4 constant in Gentree union during importation and reads from the union as if it were a long during code generation.
-                   Though this can potentially read garbage, one can get lucky to have this working correctly.  
+                /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full trust
+                   apps).  The reason this works is that JIT stores an i4 constant in Gentree union during importation
+                   and reads from the union as if it were a long during code generation. Though this can potentially
+                   read garbage, one can get lucky to have this working correctly.
                    
-                   This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with /O2 switch (default when compiling 
-                   retail configs in Dev10) and a customer app has taken a dependency on it.  To be backward compatible, we will explicitly add an upward
-                   cast here so that it works correctly always.
+                   This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with /O2
+                   switch (default when compiling retail configs in Dev10) and a customer app has taken a dependency on
+                   it.  To be backward compatible, we will explicitly add an upward cast here so that it works correctly
+                   always.
 
-                   Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for V4.0.
+                   Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for
+                   V4.0.
                 */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_X86_
                 if (op1->TypeGet() != op2->TypeGet() &&
                     op2->OperIsConst() &&
@@ -13267,9 +13284,9 @@ FIELD_DONE:
                 //Observation: the following code introduces a boxed value class on the stack, but,
                 //according to the ECMA spec, one would simply expect: tiRetVal = typeInfo(TI_REF,impGetObjectClass());
 
-                /* Push the result back on the stack, */
-                /* even if clsHnd is a value class we want the TI_REF */
-                /*  we call back to the EE to get find out what hte type we should push (for nullable<T> we push T) */
+                // Push the result back on the stack,
+                // even if clsHnd is a value class we want the TI_REF
+                // we call back to the EE to get find out what hte type we should push (for nullable<T> we push T)
                 tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(resolvedToken.hClass));
             }
 
@@ -14260,11 +14277,11 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
                                      (unsigned)CHECK_SPILL_ALL);
                 }
 
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
                 // TODO-ARM64-NYI: HFA
                 // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
                 // next ifdefs could be refactored in a single method with the ifdef inside.
-#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-#if defined(_TARGET_ARM_)
                 if (IsHfa(retClsHnd))
                 {
                     // Same as !IsHfa but just don't bother with impAssignStructPtr.
@@ -14279,6 +14296,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
                     // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
                     assert(retRegCount == MAX_RET_REG_COUNT);
                     // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
 
                     if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
@@ -14367,7 +14385,10 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
         impAppendTree(op2, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
 
         // There are cases where the address of the implicit RetBuf should be returned explicitly (in RAX).  
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if defined(_TARGET_AMD64_)
+
         // x64 (System V and Win64) calling convention requires to 
         // return the implicit return buffer explicitly (in RAX).
         // Change the return type to be BYREF.  
@@ -14424,8 +14445,8 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
     }
 
     impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
-    // Remember at which BC offset the tree was finished
 #ifdef DEBUG
+    // Remember at which BC offset the tree was finished
     impNoteLastILoffs();
 #endif
     return true;
@@ -14940,11 +14961,11 @@ SPILLSTACK:
             }
 #endif // _TARGET_64BIT_
 
+#if FEATURE_X87_DOUBLES
             // X87 stack doesn't differentiate between float/double
             // so promoting is no big deal.
             // For everybody else keep it as float until we have a collision and then promote
             // Just like for x64's TYP_INT<->TYP_I_IMPL
-#if FEATURE_X87_DOUBLES
 
             if (multRef > 1 && tree->gtType == TYP_FLOAT)
             {
diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp
index 36e1f1b578..8a8166b13e 100644
--- a/src/jit/inlinepolicy.cpp
+++ b/src/jit/inlinepolicy.cpp
@@ -1102,6 +1102,7 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
 //    compiler -- compiler instance doing the inlining (root compiler)
 //    isPrejitRoot -- true if this compiler is prejitting the root method
 
+// clang-format off
 DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
     : LegacyPolicy(compiler, isPrejitRoot)
     , m_Depth(0)
@@ -1147,6 +1148,7 @@ DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
 {
     // Empty
 }
+// clang-format on
 
 //------------------------------------------------------------------------
 // NoteBool: handle an observed boolean value
@@ -1688,6 +1690,7 @@ void DiscretionaryPolicy::EstimateCodeSize()
     // R=0.55, MSE=177, MAE=6.59
     //
     // Suspect it doesn't handle factors properly...
+    // clang-format off
     double sizeEstimate =
         -13.532 +
           0.359 * (int) m_CallsiteFrequency +
@@ -1710,6 +1713,7 @@ void DiscretionaryPolicy::EstimateCodeSize()
          -5.357 * m_IsFromPromotableValueClass +
          -7.901 * (m_ConstantArgFeedsConstantTest > 0 ? 1 : 0)  +
           0.065 * m_CalleeNativeSizeEstimate;
+    // clang-format on
 
     // Scaled up and reported as an integer value.
     m_ModelCodeSizeEstimate = (int) (SIZE_SCALE * sizeEstimate);
@@ -1729,6 +1733,7 @@ void DiscretionaryPolicy::EstimatePerformanceImpact()
 {
     // Performance estimate based on GLMNET model.
     // R=0.24, RMSE=16.1, MAE=8.9.
+    // clang-format off
     double perCallSavingsEstimate =
         -7.35
         + (m_CallsiteFrequency == InlineCallsiteFrequency::BORING ?  0.76 : 0)
@@ -1737,6 +1742,7 @@ void DiscretionaryPolicy::EstimatePerformanceImpact()
         + (m_ArgType[3] == CORINFO_TYPE_BOOL  ? 20.7  : 0)
         + (m_ArgType[4] == CORINFO_TYPE_CLASS ?  0.38 : 0)
         + (m_ReturnType == CORINFO_TYPE_CLASS ?  2.32 : 0);
+    // clang-format on
 
     // Scaled up and reported as an integer value.
     m_PerCallInstructionEstimate = (int) (SIZE_SCALE * perCallSavingsEstimate);
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index 843b4022ba..c1f3527b85 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -32,6 +32,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 const   char *      CodeGen::genInsName(instruction ins)
 {
+// clang-format off
     static
     const char * const insNames[] =
     {
@@ -69,6 +70,8 @@ const   char *      CodeGen::genInsName(instruction ins)
 #error "Unknown _TARGET_"
 #endif
     };
+// clang-format on
+
     assert((unsigned)ins < sizeof(insNames)/sizeof(insNames[0]));
     assert(insNames[ins] != NULL);
 
@@ -122,6 +125,7 @@ void                CodeGen::instInit()
 
 const   char *      CodeGen::genSizeStr(emitAttr attr)
 {
+// clang-format off
     static
     const char * const sizes[] =
     {
@@ -148,6 +152,7 @@ const   char *      CodeGen::genSizeStr(emitAttr attr)
         0, 0, 0, 0, 0, 0, 0,
         "ymmword ptr"
     };
+// clang-format on
 
     unsigned size = EA_SIZE(attr);
 
@@ -1274,6 +1279,8 @@ void                CodeGen::instEmit_indCall(GenTreePtr  call,
     else
     {
         // Force the address into a register
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef LEGACY_BACKEND
         genCodeForTree(addr, RBM_NONE);
 #endif // LEGACY_BACKEND
@@ -4076,6 +4083,8 @@ void                CodeGen::instGen_Store_Imm_Into_Lcl(var_types   dstType,
     }
 #elif defined(_TARGET_ARMARCH_)
     // Load imm into a register
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef LEGACY_BACKEND
     regNumber immReg = regToUse;
     assert(regToUse != REG_NA);
diff --git a/src/jit/instr.h b/src/jit/instr.h
index 0a0e9326ff..26245a0a69 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -11,6 +11,7 @@
 
 /*****************************************************************************/
 
+// clang-format off
 DECLARE_TYPED_ENUM(instruction,unsigned)
 {
 #if defined(_TARGET_XARCH_)
@@ -293,6 +294,7 @@ enum InstructionSet
 #endif
     InstructionSet_NONE
 };
+// clang-format on
 
 /*****************************************************************************/
 #endif//_INSTR_H_
diff --git a/src/jit/instrsarm.h b/src/jit/instrsarm.h
index 492d0409e0..324c281761 100644
--- a/src/jit/instrsarm.h
+++ b/src/jit/instrsarm.h
@@ -64,6 +64,7 @@
 //   * If the instruction writes to more than one destination register, update the function
 //     emitInsMayWriteMultipleRegs in emitArm.cpp.
 
+// clang-format off
 INST9(invalid, "INVALID", 0, 0, IF_NONE,   BAD_CODE,  BAD_CODE,    BAD_CODE,     BAD_CODE,   BAD_CODE,     BAD_CODE,      BAD_CODE, BAD_CODE,   BAD_CODE)
 
 //    enum     name      FP LD/ST         Rdn,Rm     Rd,Rn,Rm     Rdn,i8        Rd,Rn,i3    Rd,Rn,+i8<<i4 Rd,Rn,Rm{,sh}  SP,i9     Rd,SP,i10   Rd,PC,i10
@@ -541,6 +542,7 @@ INST1(vmov_i2d,  "vmov.i2d",   1, 0,   IF_T2_VMOVD, 0xEC400B10) // A8.6.332 VMOV
 INST1(vmov_d2i,  "vmov.d2i",   1, 0,   IF_T2_VMOVD, 0xEC500B10) // A8.6.332 VMOV from a double to 2 int regs
 INST1(vmov_i2f,  "vmov.i2f",   1, 0,   IF_T2_VMOVS, 0xEE000A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
 INST1(vmov_f2i,  "vmov.f2i",   1, 0,   IF_T2_VMOVS, 0xEE100A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+// clang-format on
 
 /*****************************************************************************/
 #undef  INST1
diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h
index 4e40309ff6..3e1d00417c 100644
--- a/src/jit/instrsarm64.h
+++ b/src/jit/instrsarm64.h
@@ -53,7 +53,7 @@
 //   * If the instruction writes to more than one destination register, update the function
 //     emitInsMayWriteMultipleRegs in emitArm64.cpp.
 
-
+// clang-format off
 INST9(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,   BAD_CODE,     BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE)
 
 //    enum     name     FP LD/ST            DR_2E        DR_2G        DI_1B        DI_1D        DV_3C        DV_2B        DV_2C        DV_2E        DV_2F
@@ -941,7 +941,7 @@ INST1(uxtl,    "uxtl",   0, 0, IF_DV_2O,  0x2F00A400)
 
 INST1(uxtl2,   "uxtl2",  0, 0, IF_DV_2O,  0x6F00A400)
                                    //  uxtl2   Vd,Vn                DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn      (shift - vector)
-
+// clang-format on
 
 /*****************************************************************************/
 #undef  INST1
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 37697d394a..436563babf 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -22,6 +22,7 @@
  *
 ******************************************************************************/
 
+// clang-format off
 #if !defined(_TARGET_XARCH_)
   #error Unexpected target type
 #endif
@@ -535,3 +536,5 @@ INST0(align  , "align"        , 0, IUM_RD, 0, 0, BAD_CODE)
 #undef  INST4
 #undef  INST5
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/jit.h b/src/jit/jit.h
index fb5d0144af..cbff78f8a6 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -18,6 +18,10 @@
 #endif
 #endif
 
+// Clang-format messes with the indentation of comments if they directly precede an
+// ifdef. This macro allows us to anchor the comments to the regular flow of code.
+#define CLANG_FORMAT_COMMENT_ANCHOR ;
+
 #ifdef _MSC_VER
 // These don't seem useful, so turning them off is no big deal
 #pragma warning(disable:4510)   // can't generate default constructor
@@ -37,7 +41,8 @@
 #endif
 
 #ifdef _MSC_VER
-#define CHECK_STRUCT_PADDING    0   // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after construct 'member_name'" on interesting structs/classes
+#define CHECK_STRUCT_PADDING    0   // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
+                                    // construct 'member_name'" on interesting structs/classes
 #else
 #define CHECK_STRUCT_PADDING    0   // Never enable it for non-MSFT compilers
 #endif
@@ -434,15 +439,18 @@ typedef ptrdiff_t   ssize_t;
 #define VERIFY_GC_TABLES    0
 #define REARRANGE_ADDS      1
 
-#define FUNC_INFO_LOGGING   1   // Support dumping function info to a file. In retail, only NYIs, with no function name, are dumped.
+#define FUNC_INFO_LOGGING   1   // Support dumping function info to a file. In retail, only NYIs, with no function name,
+                                // are dumped.
 
 /*****************************************************************************/
 /*****************************************************************************/
 /* Set these to 1 to collect and output various statistics about the JIT */
 
 #define CALL_ARG_STATS      0   // Collect stats about calls and call arguments.
-#define COUNT_BASIC_BLOCKS  0   // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple case of single block methods.
-#define COUNT_LOOPS         0   // Collect stats about loops, such as the total number of natural loops, a histogram of the number of loop exits, etc.
+#define COUNT_BASIC_BLOCKS  0   // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
+                                // case of single block methods.
+#define COUNT_LOOPS         0   // Collect stats about loops, such as the total number of natural loops, a histogram of
+                                // the number of loop exits, etc.
 #define COUNT_RANGECHECKS   0   // Count range checks removed (in lexical CSE?).
 #define DATAFLOW_ITER       0   // Count iterations in lexical CSE and constant folding dataflow.
 #define DISPLAY_SIZES       0   // Display generated code, data, and GC information sizes.
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 0aa0b9ab45..9969a4e430 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -120,7 +120,8 @@ CONFIG_METHODSET(JitImportBreak, W("JitImportBreak"))
 CONFIG_METHODSET(JitInclude, W("JitInclude"))
 CONFIG_METHODSET(JitLateDisasm, W("JitLateDisasm"))
 CONFIG_METHODSET(JitMinOptsName, W("JITMinOptsName")) // Forces MinOpts for a named function
-CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified methods
+CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified
+                                                                        // methods
 CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // Disallow procedure splitting for specified methods if they contain exception handling
 CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
 CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
index f8e3cebbef..8098992356 100644
--- a/src/jit/jiteh.cpp
+++ b/src/jit/jiteh.cpp
@@ -774,7 +774,8 @@ unsigned        Compiler::ehGetMostNestedRegionIndex(BasicBlock* block, bool* in
         }
         else
         {
-            assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region of the same EH region
+            assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region
+                                                            // of the same EH region
             mostNestedRegion = block->bbHndIndex;
             *inTryRegion = false;
         }
@@ -1099,7 +1100,8 @@ void*               Compiler::ehEmitCookie(BasicBlock* block)
     {
         // Use the offset of the beginning of the NOP padding, not the main block.
         // This might include loop head padding, too, if this is a loop head.
-        assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us anything
+        assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us
+                                              // anything
         cookie = block->bbUnwindNopEmitCookie;
     }
     else
@@ -1384,6 +1386,8 @@ void                Compiler::fgAllocEHTable()
     // twice the number of EH clauses in the IL, which should be good in practice.
     // In extreme cases, we might need to abandon this and reallocate. See
     // fgAddEHTableEntry() for more details.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
 #else // DEBUG
@@ -1905,7 +1909,8 @@ void          Compiler::fgSortEHTable()
     // In Dev11 (Visual Studio 2012), x86 did not sort the EH table (it never had before)
     // but ARM did. It turns out not sorting the table can cause the EH table to incorrectly
     // set the bbHndIndex value in some nested cases, and that can lead to a security exploit
-    // that allows the execution of arbitrary code. 
+    // that allows the execution of arbitrary code.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (verbose)
@@ -1914,6 +1919,7 @@ void          Compiler::fgSortEHTable()
     }
 #endif // DEBUG
 
+
     EHblkDsc *      xtab1;
     EHblkDsc *      xtab2;
     unsigned        xtabnum1, xtabnum2;
@@ -1981,7 +1987,6 @@ void          Compiler::fgSortEHTable()
     }
 }
 
-
 // fgNormalizeEH: Enforce the following invariants:
 //
 //   1. No block is both the first block of a handler and the first block of a try. In IL (and on entry
@@ -2101,19 +2106,21 @@ void          Compiler::fgSortEHTable()
 //
 //      The benefit of this is, once again, to remove the need to consider every EH region when adding new blocks.
 //
-// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or handler.
-// There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last" block of the try,
-// or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored, so this case doesn't apply.)
-// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will help.)
+// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or
+// handler. There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last"
+// block of the try, or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored,
+// so this case doesn't apply.)
+// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will
+// help.)
 //
-// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to traverse
-// the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a single region (except
-// for mutually-protect "try" regions).
+// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to
+// traverse the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a
+// single region (except for mutually-protect "try" regions).
 //
-// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block B2
-// violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a block to
-// enforce this invariant prevents us from having to consider the first block of T2's handler as a possible successor of B1.
-// This is somewhat akin to breaking of "critical edges" in a flowgraph.
+// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block
+// B2 violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a
+// block to enforce this invariant prevents us from having to consider the first block of T2's handler as a possible
+// successor of B1. This is somewhat akin to breaking of "critical edges" in a flowgraph.
 
 void Compiler::fgNormalizeEH()
 {
@@ -2148,8 +2155,9 @@ void Compiler::fgNormalizeEH()
     }
 
 #if 0
-    // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially blocks that are unreachable.
-    // There are lots of asserts when such things occur. We will re-evaluate whether we can do this normalization.
+    // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially
+    // blocks that are unreachable. There are lots of asserts when such things occur. We will re-evaluate whether we
+    // can do this normalization.
     // Note: there are cases in fgVerifyHandlerTab() that are also disabled to match this.
 
     // Case #3: Prevent any two EH regions from ending with the same block.
@@ -2190,16 +2198,17 @@ bool Compiler::fgNormalizeEHCase1()
     //
     // Case #1: Is the first block of a handler also the first block of any try?
     //
-    // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical 'try' begin
-    // blocks as well as this case, e.g.:
+    // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical
+    // 'try' begin blocks as well as this case, e.g.:
     //     try {
     //     } finally { try { try {
     //         } catch {}
     //         } catch {}
     //     }
     // where the finally/try/try are all the same block.
-    // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin blocks (and
-    // only those within the 'try' region's parents), not handler begin blocks, when we are inserting new header blocks.
+    // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin
+    // blocks (and only those within the 'try' region's parents), not handler begin blocks, when we are inserting new
+    // header blocks.
     //
 
     for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
@@ -2281,9 +2290,9 @@ bool Compiler::fgNormalizeEHCase2()
             BasicBlock* tryStart = eh->ebdTryBeg;
             BasicBlock* insertBeforeBlk = tryStart; // If we need to insert new blocks, we insert before this block.
 
-            // We need to keep track of the last "mutually protect" region so we can properly not add additional header blocks
-            // to the second and subsequent mutually protect try blocks. We can't just keep track of the EH region
-            // pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
+            // We need to keep track of the last "mutually protect" region so we can properly not add additional header
+            // blocks to the second and subsequent mutually protect try blocks. We can't just keep track of the EH
+            // region pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
             // pre-update 'try' begin/last blocks themselves.
             BasicBlock* mutualTryBeg  = eh->ebdTryBeg;
             BasicBlock* mutualTryLast = eh->ebdTryLast;
@@ -2301,6 +2310,7 @@ bool Compiler::fgNormalizeEHCase2()
 
                     if (ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
                     {
+                        // clang-format off
                         // Don't touch mutually-protect regions: their 'try' regions must remain identical!
                         // We want to continue the looping outwards, in case we have something like this:
                         //
@@ -2349,6 +2359,7 @@ bool Compiler::fgNormalizeEHCase2()
                         //
                         // In this case, all the 'try' start at the same block! Note that there are two sets of mutually-protect regions,
                         // separated by some nesting.
+                        // clang-format on
 
 #ifdef DEBUG
                         if (verbose)
@@ -2420,7 +2431,8 @@ bool Compiler::fgNormalizeEHCase2()
                         //               |      |-----------  BB04
                         //               |------------------  BB05
                         //
-                        // We'll loop twice, to create two header blocks, one for try2, and the second time for try3 (in that order).
+                        // We'll loop twice, to create two header blocks, one for try2, and the second time for try3
+                        // (in that order).
                         // After the first loop, we have:
                         //
                         //               try3   try2   try1
@@ -2431,8 +2443,8 @@ bool Compiler::fgNormalizeEHCase2()
                         //               |      |-----------  BB04
                         //               |------------------  BB05
                         //
-                        // And all the external edges have been changed to point at try2. On the next loop, we'll create a unique
-                        // header block for try3, and split the edges between try2 and try3, leaving us with:
+                        // And all the external edges have been changed to point at try2. On the next loop, we'll create
+                        // a unique header block for try3, and split the edges between try2 and try3, leaving us with:
                         //
                         //               try3   try2   try1
                         //               |----                BB07
@@ -2443,7 +2455,8 @@ bool Compiler::fgNormalizeEHCase2()
                         //               |      |-----------  BB04
                         //               |------------------  BB05
 
-                        BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep track of the next pred in case it gets deleted.
+                        BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep
+                                                  // track of the next pred in case it gets deleted.
                         for (BasicBlockList* pred = insertBeforeBlk->bbCheapPreds; pred != nullptr; pred = nextPred)
                         {
                             nextPred = pred->next;
@@ -2457,8 +2470,9 @@ bool Compiler::fgNormalizeEHCase2()
                                 fgAddCheapPred(newTryStart, predBlock);
                                 fgRemoveCheapPred(insertBeforeBlk, predBlock);
 
-                                // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will do nothing.
-                                // Since cheap preds contains dups (for switch duplicates), we will call this once per dup.
+                                // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will
+                                // do nothing. Since cheap preds contains dups (for switch duplicates), we will call
+                                // this once per dup.
                                 fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
 
 #ifdef DEBUG
@@ -2510,8 +2524,8 @@ bool Compiler::fgNormalizeEHCase2()
                     //        |-------------------------- BB08
                     //
                     // (Thus, try1 & try2 start at BB03, and are nested inside try3 & try4, which both start at BB01.)
-                    // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process it
-                    // and try4.
+                    // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process
+                    // it and try4.
 
                     break;
                 }
@@ -2528,9 +2542,9 @@ bool Compiler::fgNormalizeEHCase3()
     bool modified = false;
 
     //
-    // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try' regions).
-    // As above, there has to be EH region nesting for this to occur. However, since we need to consider handlers, there are more
-    // cases.
+    // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try'
+    // regions). As above, there has to be EH region nesting for this to occur. However, since we need to consider
+    // handlers, there are more cases.
     //
     // There are four cases to consider:
     //      (1) try     nested in try
@@ -2542,9 +2556,9 @@ bool Compiler::fgNormalizeEHCase3()
     // of an EH region (either 'try' or handler region), since that implies that its corresponding handler precedes it.
     // That will never happen in C#, but is legal in IL.
     //
-    // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and the
-    // nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try' must also
-    // be within the outer handler, and obviously cannot share the same 'last' block.
+    // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and
+    // the nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try'
+    // must also be within the outer handler, and obviously cannot share the same 'last' block.
     //
 
     for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
@@ -2567,8 +2581,9 @@ bool Compiler::fgNormalizeEHCase3()
             INDEBUG(const char* outerType = ""; const char* innerType = "";)
 
             // 'insertAfterBlk' is the place we will insert new "normalization" blocks. We don't know yet if we will
-            // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we determine
-            // the innermost region that is equivalent, we set this, and then update it incrementally as we loop outwards.
+            // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we
+            // determine the innermost region that is equivalent, we set this, and then update it incrementally as we
+            // loop outwards.
             BasicBlock* insertAfterBlk = nullptr;
 
             bool foundMatchingLastBlock = false;
@@ -2576,9 +2591,9 @@ bool Compiler::fgNormalizeEHCase3()
             // This is set to 'false' for mutual protect regions for which we will not insert a normalization block.
             bool insertNormalizationBlock = true;
 
-            // Keep track of what the 'try' index and handler index should be for any new normalization block that we insert.
-            // If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll need to update
-            // these as we go. For example:
+            // Keep track of what the 'try' index and handler index should be for any new normalization block that we
+            // insert. If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll
+            // need to update these as we go. For example:
             //      try { // EH#5
             //          ...
             //          catch { // EH#4
@@ -2613,14 +2628,16 @@ bool Compiler::fgNormalizeEHCase3()
             //          BB05 // try=5, hnd=0 (no enclosing hnd)
             //      }
             //
-            unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler warnings.
+            unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler
+                                                                  // warnings.
             unsigned nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
 
-            // We compare the outer region against the inner region's 'try' or handler, determined by the 'outerIsTryRegion'
-            // variable. Once we decide that, we know exactly the 'last' pointer that we will use to compare against
-            // all enclosing EH regions.
+            // We compare the outer region against the inner region's 'try' or handler, determined by the
+            // 'outerIsTryRegion' variable. Once we decide that, we know exactly the 'last' pointer that we will use to
+            // compare against all enclosing EH regions.
             //
-            // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each nesting level):
+            // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each
+            // nesting level):
             //
             //      try {
             //          ...
@@ -2659,6 +2676,7 @@ bool Compiler::fgNormalizeEHCase3()
                     if (EHblkDsc::ebdIsSameTry(ehOuter, ehInner))
                     {
                         // We can't touch this 'try', since it's mutual protect.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (verbose)
                         {
@@ -2701,7 +2719,8 @@ bool Compiler::fgNormalizeEHCase3()
             {
                 nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
 
-                // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting within a filter).
+                // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting 
+                // within a filter).
                 if (ehOuter->ebdHndLast == ehInner->ebdTryLast)
                 {
                     // Case (3) try nested in handler.
@@ -2843,6 +2862,8 @@ bool Compiler::fgNormalizeEHCase3()
                             if (innerIsTryRegion && ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
                             {
                                 // We can't touch this 'try', since it's mutual protect.
+                                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                                 if (verbose)
                                 {
@@ -2853,8 +2874,8 @@ bool Compiler::fgNormalizeEHCase3()
 
                                 insertNormalizationBlock = false;
 
-                                // We still need to update the 'last' pointer, in case someone inserted a normalization block before
-                                // the start of the mutual protect 'try' region.
+                                // We still need to update the 'last' pointer, in case someone inserted a normalization
+                                // block before the start of the mutual protect 'try' region.
                                 ehOuter->ebdTryLast = insertAfterBlk;
                             }
                             else
@@ -2903,8 +2924,8 @@ bool Compiler::fgNormalizeEHCase3()
                     }
                 }
 
-                // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share any
-                // 'last' blocks, so we're done. Note that we could have a situation like this:
+                // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share
+                // any 'last' blocks, so we're done. Note that we could have a situation like this:
                 //
                 //        try4   try3   try2   try1
                 //        |----  |      |      |      BB01
@@ -3307,22 +3328,25 @@ void                Compiler::fgVerifyHandlerTab()
 
             if (!EHblkDsc::ebdIsSameTry(HBtab, HBtabOuter))
             {
-                // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the blocks
-                // in the nested EH region. However, if funclets have been created, this is no longer true, since this 'try' might
-                // be in a handler that is pulled out to the funclet region, while the outer 'try' remains in the main function
-                // region.
+                // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the
+                // blocks in the nested EH region. However, if funclets have been created, this is no longer true, since
+                // this 'try' might be in a handler that is pulled out to the funclet region, while the outer 'try'
+                // remains in the main function region.
+                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if FEATURE_EH_FUNCLETS
                 if (fgFuncletsCreated)
                 {
-                    // If both the 'try' region and the outer 'try' region are in the main function area, then we can do the normal
-                    // nesting check. Otherwise, it's harder to find a useful assert to make about their relationship.
+                    // If both the 'try' region and the outer 'try' region are in the main function area, then we can
+                    // do the normal nesting check. Otherwise, it's harder to find a useful assert to make about their
+                    // relationship.
                     if ((bbNumTryLast < bbNumFirstFunclet) &&
                         (bbNumOuterTryLast < bbNumFirstFunclet))
                     {
                         if (multipleBegBlockNormalizationDone)
                         {
-                            assert(bbNumOuterTryBeg < bbNumTryBeg);     // Two 'try' regions can't start at the same block (by EH normalization).
+                            assert(bbNumOuterTryBeg < bbNumTryBeg);     // Two 'try' regions can't start at the same
+                                                                        // block (by EH normalization).
                         }
                         else
                         {
@@ -3330,7 +3354,8 @@ void                Compiler::fgVerifyHandlerTab()
                         }
                         if (multipleLastBlockNormalizationDone)
                         {
-                            assert(bbNumTryLast < bbNumOuterTryLast);   // Two 'try' regions can't end at the same block (by EH normalization).
+                            assert(bbNumTryLast < bbNumOuterTryLast);   // Two 'try' regions can't end at the same block
+                                                                        //(by EH normalization).
                         }
                         else
                         {
@@ -3338,7 +3363,8 @@ void                Compiler::fgVerifyHandlerTab()
                         }
                     }
 
-                    // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing try.
+                    // With funclets, all we can say about the handler blocks is that they are disjoint from the
+                    // enclosing try.
                     assert((bbNumHndLast < bbNumOuterTryBeg) || (bbNumOuterTryLast < bbNumHndBeg));
                 }
                 else
@@ -3346,13 +3372,15 @@ void                Compiler::fgVerifyHandlerTab()
                 {
                     if (multipleBegBlockNormalizationDone)
                     {
-                        assert(bbNumOuterTryBeg < bbNumTryBeg);     // Two 'try' regions can't start at the same block (by EH normalization).
+                        assert(bbNumOuterTryBeg < bbNumTryBeg);     // Two 'try' regions can't start at the same block
+                                                                    // (by EH normalization).
                     }
                     else
                     {
                         assert(bbNumOuterTryBeg <= bbNumTryBeg);
                     }
-                    assert(bbNumOuterTryBeg < bbNumHndBeg);         // An inner handler can never start at the same block as an outer 'try' (by IL rules).
+                    assert(bbNumOuterTryBeg < bbNumHndBeg);         // An inner handler can never start at the same
+                                                                    // block as an outer 'try' (by IL rules).
                     if (multipleLastBlockNormalizationDone)
                     {
                         // An inner EH region can't share a 'last' block with the outer 'try' (by EH normalization).
@@ -3369,7 +3397,8 @@ void                Compiler::fgVerifyHandlerTab()
         }
 
         // Check the handler region nesting, using ebdEnclosingHndIndex.
-        // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it later.
+        // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+        // later.
 
         if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
         {
@@ -3383,15 +3412,17 @@ void                Compiler::fgVerifyHandlerTab()
             assert(bbNumOuterHndLast != 0);
             assert(bbNumOuterHndBeg <= bbNumOuterHndLast);
 
-            // The outer handler must completely contain all the blocks in the EH region nested within it. However, if funclets have been created,
-            // it's harder to make any relationship asserts about the order of nested handlers, which also have been made into funclets.
+            // The outer handler must completely contain all the blocks in the EH region nested within it. However, if
+            // funclets have been created, it's harder to make any relationship asserts about the order of nested
+            // handlers, which also have been made into funclets.
 
 #if FEATURE_EH_FUNCLETS
             if (fgFuncletsCreated)
             {
                 if (handlerBegIsTryBegNormalizationDone)
                 {
-                    assert(bbNumOuterHndBeg < bbNumTryBeg);     // An inner 'try' can't start at the same block as an outer handler (by EH normalization).
+                    assert(bbNumOuterHndBeg < bbNumTryBeg);     // An inner 'try' can't start at the same block as an
+                                                                // outer handler (by EH normalization).
                 }
                 else
                 {
@@ -3399,14 +3430,16 @@ void                Compiler::fgVerifyHandlerTab()
                 }
                 if (multipleLastBlockNormalizationDone)
                 {
-                    assert(bbNumTryLast < bbNumOuterHndLast);   // An inner 'try' can't end at the same block as an outer handler (by EH normalization).
+                    assert(bbNumTryLast < bbNumOuterHndLast);   // An inner 'try' can't end at the same block as an
+                                                                // outer handler (by EH normalization).
                 }
                 else
                 {
                     assert(bbNumTryLast <= bbNumOuterHndLast);
                 }
 
-                // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing handler.
+                // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing
+                // handler.
                 assert((bbNumHndLast < bbNumOuterHndBeg) || (bbNumOuterHndLast < bbNumHndBeg));
             }
             else
@@ -3414,13 +3447,15 @@ void                Compiler::fgVerifyHandlerTab()
             {
                 if (handlerBegIsTryBegNormalizationDone)
                 {
-                    assert(bbNumOuterHndBeg < bbNumTryBeg);     // An inner 'try' can't start at the same block as an outer handler (by EH normalization).
+                    assert(bbNumOuterHndBeg < bbNumTryBeg);     // An inner 'try' can't start at the same block as an
+                                                                // outer handler (by EH normalization).
                 }
                 else
                 {
                     assert(bbNumOuterHndBeg <= bbNumTryBeg);
                 }
-                assert(bbNumOuterHndBeg < bbNumHndBeg);         // An inner handler can never start at the same block as an outer handler (by IL rules).
+                assert(bbNumOuterHndBeg < bbNumHndBeg);         // An inner handler can never start at the same block
+                                                                // as an outer handler (by IL rules).
                 if (multipleLastBlockNormalizationDone)
                 {
                     // An inner EH region can't share a 'last' block with the outer handler (by EH normalization).
@@ -3438,7 +3473,8 @@ void                Compiler::fgVerifyHandlerTab()
         // Set up blockTryBegSet and blockHndBegSet.
         // We might want to have this assert:
         //    if (fgNormalizeEHDone) assert(!blockTryBegSet[HBtab->ebdTryBeg->bbNum]);
-        // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice (or more).
+        // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice
+        // (or more).
         blockTryBegSet[HBtab->ebdTryBeg->bbNum] = true;
         assert(!blockHndBegSet[HBtab->ebdHndBeg->bbNum]);
         blockHndBegSet[HBtab->ebdHndBeg->bbNum] = true;
@@ -3526,11 +3562,13 @@ void                Compiler::fgVerifyHandlerTab()
              XTnum < compHndBBtabCount;
              XTnum++,   HBtab++)
         {
-            unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, ignoring 'mutual protect' trys
+            unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+                                                                           // ignoring 'mutual protect' trys
             if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
             {
-                // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the funclet that don't already
-                // have a more nested 'try' index because a 'try' is nested within the handler).
+                // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the
+                // funclet that don't already have a more nested 'try' index because a 'try' is nested within the
+                // handler).
 
                 BasicBlock* blockEnd;
                 for (block = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg), blockEnd = HBtab->ebdHndLast->bbNext; block != blockEnd; block = block->bbNext)
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
index e5092cfaa1..f18346adf1 100644
--- a/src/jit/jitgcinfo.h
+++ b/src/jit/jitgcinfo.h
@@ -198,8 +198,8 @@ public :
     void                gcMarkFilterVarsPinned();
 
 
-    // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead, depending
-    // on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE".  The subset of registers whose corresponding
+    // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead,
+    // depending on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE".  The subset of registers whose corresponding
     // bits are set in "byRefMask" contain by-refs rather than regular GC pointers. "*pPtrRegs" is the set of
     // registers currently known to contain pointers.  If "mode" is "ASSIGN_SLOTS", computes and records slot
     // ids for the registers.  If "mode" is "DO_WORK", informs "gcInfoEncoder" about the state transition,
@@ -334,6 +334,7 @@ public :
     //
     //  These record the info about the procedure in the info-block
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef JIT32_GCENCODER
 private:
@@ -390,7 +391,6 @@ public:
 #endif // !LEGACY_BACKEND
 };
 
-
 inline
 unsigned char encodeUnsigned(BYTE *dest, unsigned value)
 {
diff --git a/src/jit/jittelemetry.cpp b/src/jit/jittelemetry.cpp
index 1b79272991..f480759231 100644
--- a/src/jit/jittelemetry.cpp
+++ b/src/jit/jittelemetry.cpp
@@ -26,7 +26,8 @@
 //         (0xb3864c38, 0x4273, 0x58c5, 0x54, 0x5b, 0x8b, 0x36, 0x08, 0x34, 0x34, 0x71)); // Provider GUID
 //     int main(int argc, char* argv[]) // or DriverEntry for kernel-mode.
 //     {
-//         TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the NULLs in C++ code.
+//         TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the
+//                                                              // NULLs in C++ code.
 //         TraceLoggingWrite(g_hProvider,
 //            "MyEvent1",
 //            TraceLoggingString(argv[0], "arg0"),
@@ -115,8 +116,10 @@ TRACELOGGING_DEFINE_PROVIDER(g_hClrJitProvider, CLRJIT_PROVIDER_NAME, CLRJIT_PRO
 
 // Threshold to detect if we are hitting too many bad (noway) methods
 // over good methods per process to prevent logging too much data.
-static const double NOWAY_NOISE_RATIO                       = 0.6; // Threshold of (bad / total) beyond which we'd stop logging. We'd restart if the pass rate improves.
-static const unsigned NOWAY_SUFFICIENCY_THRESHOLD           = 25;  // Count of methods beyond which we'd apply percent threshold
+static const double NOWAY_NOISE_RATIO                       = 0.6; // Threshold of (bad / total) beyond which we'd stop
+                                                                   // logging. We'd restart if the pass rate improves.
+static const unsigned NOWAY_SUFFICIENCY_THRESHOLD           = 25;  // Count of methods beyond which we'd apply percent
+                                                                   // threshold
 
 // Initialize Telemetry State
 volatile bool     JitTelemetry::s_fProviderRegistered     = false;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index f0015b2e8f..918e7aefd3 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -919,8 +919,9 @@ void                Compiler::lvaInitUserArgs(InitVarDscInfo *      varDscInfo)
         }
 
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-        // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a multiple of 
-        // TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE, so round it up.
+        // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a
+        // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE,
+        // so round it up.
         compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
         compArgSize += argSize;
@@ -1783,8 +1784,9 @@ void   Compiler::lvaPromoteLongVars()
         bool isParam = varDsc->lvIsParam;
            
         for (unsigned index=0; index < 2; ++index)
-        {         
+        {
             // Grab the temp for the field local.
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             char    buf[200];
@@ -2114,8 +2116,8 @@ unsigned            Compiler::lvaLclSize(unsigned varNum)
     default:    // This must be a primitive var. Fall out of switch statement
         break;
     }
-    // We only need this Quirk for _TARGET_64BIT_
 #ifdef _TARGET_64BIT_
+    // We only need this Quirk for _TARGET_64BIT_
     if (lvaTable[varNum].lvQuirkToLong)
     {
         noway_assert(lvaTable[varNum].lvAddrExposed);
@@ -3329,6 +3331,7 @@ void                Compiler::lvaMarkLocalVars()
 
 #if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
 
+#ifndef DEBUG
     // Assign slot numbers to all variables.
     // If compiler generated local variables, slot numbers will be
     // invalid (out of range of info.compVarScopes).
@@ -3338,7 +3341,6 @@ void                Compiler::lvaMarkLocalVars()
 
     // We don't need to do this for IL, but this keeps lvSlotNum consistent.
 
-#ifndef DEBUG
     if (opts.compScopeInfo && (info.compVarScopesCount > 0))
 #endif
     {
@@ -3534,6 +3536,7 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
     return result;
 }
 
+// clang-format off
 /*****************************************************************************
  *
  *  Compute stack frame offsets for arguments, locals and optionally temps.
@@ -3877,6 +3880,7 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
  *      relative or stack pointer relative.
  *
  */
+// clang-format on
 
 void                Compiler::lvaAssignFrameOffsets(FrameLayoutState curState)
 {
@@ -4404,15 +4408,17 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
     }
     else
     {
-        // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. This is where they are always homed.
-        // So, they can be accessed with positive offset.
-        // On System V platforms, if the RA decides to home a register passed arg on the stack,
-        // it creates a stack location on the callee stack (like any other local var.) In such a case, the register passed, stack homed arguments
-        // are accessed using negative offsets and the stack passed arguments are accessed using positive offset (from the caller's stack.)
-        // For  System V platforms if there is no frame pointer the caller stack parameter offset should include the callee allocated space.
-        // If frame register is used, the callee allocated space should not be included for accessing the caller stack parameters.
-        // The last two requirements are met in lvaFixVirtualFrameOffsets method, which fixes the offsets, based on frame pointer existence, 
-        // existence of alloca instructions, ret address pushed, ets.
+        // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack.
+        // This is where they are always homed. So, they can be accessed with positive offset.
+        // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack
+        // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed
+        // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive
+        // offset (from the caller's stack.)
+        // For  System V platforms if there is no frame pointer the caller stack parameter offset should include the
+        // callee allocated space. If frame register is used, the callee allocated space should not be included for
+        // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets
+        // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret
+        // address pushed, ets.
 
         varDsc->lvStkOffs = *callerArgOffset;
         // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
@@ -4499,13 +4505,14 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
     if (varDsc->lvIsRegArg)
     {
         /* Argument is passed in a register, don't count it
-        * when updating the current offset on the stack */
+         * when updating the current offset on the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(_TARGET_ARMARCH_)
+#if DEBUG
         // TODO: Remove this noway_assert and replace occurrences of sizeof(void *) with argSize
         // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
         // 
-#if DEBUG
         noway_assert(argSize == sizeof(void *));
 #endif // DEBUG
 #endif
@@ -4569,10 +4576,11 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
                 // r1 VACookie -- argOffs = 0
                 // -------------------------
                 //
-                // Consider argOffs as if it accounts for number of prespilled registers before the current register.
-                // In the above example, for r2, it is r1 that is prespilled, but since r1 is accounted for by argOffs
-                // being 4, there should have been no skipping. Instead, if we didn't assign r1 to any variable, then
-                // argOffs would still be 0 which implies it is not accounting for r1, equivalently r1 is skipped.
+                // Consider argOffs as if it accounts for number of prespilled registers before the current
+                // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is
+                // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't
+                // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting
+                // for r1, equivalently r1 is skipped.
                 //
                 // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
                 int prevRegsSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
@@ -4659,12 +4667,13 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
             // r3    int             a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
             // r2    struct { int }  a1
             // r0-r1 struct { long } a0
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef PROFILING_SUPPORTED
             // On Arm under profiler, r0-r3 are always prespilled on stack.
-            // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct hfa2)
-            // In which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an argument offset
-            // less than size of preSpill.
+            // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct
+            // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an
+            // argument offset less than size of preSpill.
             //
             // For this reason the following conditions are asserted when not under profiler.
             if (!compIsProfilerHookNeeded())
@@ -4714,6 +4723,8 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize
     // For struct promoted parameters we need to set the offsets for both LclVars.
     // 
     // For a dependent promoted struct we also assign the struct fields stack offset 
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if !defined(_TARGET_64BIT_)
     if ((varDsc->TypeGet() == TYP_LONG) && varDsc->lvPromoted)
     {
@@ -4918,8 +4929,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
     if  (opts.compNeedSecurityCheck)
     {
-        /* This can't work without an explicit frame, so make sure */
 #ifdef JIT32_GCENCODER
+        /* This can't work without an explicit frame, so make sure */
         noway_assert(codeGen->isFramePointerUsed());
 #endif
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs);
@@ -5571,6 +5582,8 @@ void Compiler::lvaAlignFrame()
 
     // If this isn't the final frame layout, assume we have to push an extra QWORD
     // Just so the offsets are true upper limits.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef UNIX_AMD64_ABI
     // The compNeedToAlignFrame flag  is indicating if there is a need to align the frame.
     // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
@@ -5750,13 +5763,15 @@ AGAIN2:
             /* Figure out and record the stack offset of the temp */
 
             /* Need to align the offset? */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef  _TARGET_64BIT_
             if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
             {
                 // Calculate 'pad' as the number of bytes to align up 'stkOffs' to be a multiple of TARGET_POINTER_SIZE
-                // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte aligned)
-                // Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative value.
+                // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte
+                // aligned). Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative
+                // value.
                 //
                 int alignPad = (int)AlignmentPad((unsigned)-stkOffs, TARGET_POINTER_SIZE);
 
@@ -5979,8 +5994,8 @@ void   Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t
         }
         else
         {
-            // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home location.
-            // Otherwise, it's always on the stack.
+            // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home
+            // location. Otherwise, it's always on the stack.
             if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
                 lvaDumpFrameLocation(lclNum);
         }
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 3928c634bc..18ea49b4c4 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -87,17 +87,17 @@ void                 Compiler::fgMarkUseDef(GenTreeLclVarCommon *tree, GenTree *
         if  ((tree->gtFlags & GTF_VAR_DEF) != 0 &&
              (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
         {
-//          if  (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+            // if  (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
             VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
         }
         else
         {
-//          if  (!(fgCurDefSet & bitMask))
-//          {
-//               printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
-//               printTreeID(tree);
-//               printf("\n");
-//          }
+            // if  (!(fgCurDefSet & bitMask))
+            // {
+            //      printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
+            //      printTreeID(tree);
+            //      printf("\n");
+            // }
 
             /* We have the following scenarios:
              *   1. "x += something" - in this case x is flagged GTF_VAR_USEASG
@@ -1843,6 +1843,8 @@ SKIP_QMARK:
         }
 
         // Is this a use/def of a local variable?
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef LEGACY_BACKEND
         // Generally, the last use information is associated with the lclVar node.
         // However, for LEGACY_BACKEND, the information must be associated
@@ -2253,6 +2255,7 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_TP l
             // and we start computing life again from the op_ovf node (we go backwards). Note that we
             // don't need to update ref counts because we don't change them, we're only changing the
             // operation.
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             if  (verbose)
@@ -2261,6 +2264,7 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_TP l
             }
 #endif // DEBUG
 
+
             switch (asgNode->gtOper)
             {
             case GT_ASG_ADD:
@@ -2501,7 +2505,6 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_TP l
             else
             {
             NO_SIDE_EFFECTS:
-                /* No side effects - Remove the interior statement */
 #ifdef DEBUG
                 if (verbose)
                 {
@@ -2512,6 +2515,7 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_TP l
                     printf("\n");
                 }
 #endif // DEBUG
+                /* No side effects - Remove the interior statement */
                 fgUpdateRefCntForExtract(asgNode, NULL);
 
                 /* Change the assignment to a GT_NOP node */
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index f644b930ec..0e60244522 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -370,8 +370,8 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
  *    a data section where this array will live and will emit code that based on the switch index, will indirect and
  *    jump to the destination specified in the jump table.
  *
- *    For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch node
- *    for jump table based switches.
+ *    For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
+ *    node for jump table based switches.
  *    The overall structure of a GT_SWITCH_TABLE is:
  *
  *    GT_SWITCH_TABLE
@@ -410,9 +410,10 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
  *     else if (case == firstCase){ goto jumpTable[1]; }
  *     else if (case == secondCase) { goto jumptable[2]; } and so on.
  *
- *     This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer require
- *     internal temporaries to maintain the index we're evaluating plus we're using existing code from LinearCodeGen
- *     to implement this instead of implement all the control flow constructs using InstrDscs and InstrGroups downstream.
+ *     This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
+ *     require internal temporaries to maintain the index we're evaluating plus we're using existing code from
+ *     LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
+ *     InstrGroups downstream.
  */
 
 void Lowering::LowerSwitch(GenTreePtr* pTree)
@@ -879,6 +880,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
 
             if (info->structDesc.eightByteCount == 1)
             {
+                // clang-format off
                 // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
                 //
                 // Here the IR for this operation:
@@ -906,11 +908,13 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
                 //    (3, 4)[000071] ------------arg0 in rdi + --*  putarg_reg int    RV
                 //    N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
                 //
+                // clang-format on
 
                 putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
             }
             else if (info->structDesc.eightByteCount == 2)
             {
+                // clang-format off
                 // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
                 //
                 // lowering call :
@@ -939,6 +943,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
                 //     (3, 4)[000073] ------------arg0 in rsi + --*  putarg_reg long
                 //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
                 //
+                // clang-format on
 
                 assert(arg->OperGet() == GT_LIST);
                 GenTreeArgList* argListPtr = arg->AsArgList();
@@ -1553,8 +1558,8 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
 // control expr |  +--*  const(h)  long   0x7ffe8e910e98 ftn REG NA
 //              \--*  call      void   System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
 // 
-// In this case, the GT_PUTARG_REG src is a nested call. We need to put the embedded statement after that call (as shown).
-// We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
+// In this case, the GT_PUTARG_REG src is a nested call. We need to put the embedded statement after that call
+// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
 //
 // Params:
 //    callNode        - tail call node
@@ -1636,7 +1641,8 @@ void Lowering::LowerFastTailCall(GenTreeCall *call)
     // The below condition cannot be asserted in lower because fgSimpleLowering() 
     // can add a new basic block for range check failure which becomes
     // fgLastBB with block number > loop header block number.
-    //assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
+    // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || 
+    //         !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
 
     // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
     // a method returns.  This is a case of caller method has both PInvokes and tail calls.
@@ -2223,9 +2229,6 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
 
 GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
 {
-    // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
-    // Hence we should never see this type of call in lower.
-
 #ifdef _TARGET_X86_
     if (call->gtCallCookie != nullptr)
     {
@@ -2233,6 +2236,9 @@ GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
     }
 #endif
 
+    // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
+    // Hence we should never see this type of call in lower.
+
     noway_assert(call->gtCallCookie == nullptr);
 
     return nullptr;
@@ -2373,9 +2379,11 @@ GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
 //  +10h    +08h    m_Next                            offsetOfFrameLink       method prolog
 //  +18h    +0Ch    m_Datum                           offsetOfCallTarget      call site
 //  +20h    n/a     m_StubSecretArg                                           not set by JIT
-//  +28h    +10h    m_pCallSiteSP                     offsetOfCallSiteSP      x86: call site, and zeroed in method prolog;
-//                                                                            non-x86: method prolog (SP remains constant in function,
-//                                                                              after prolog: no localloc and PInvoke in same function)
+//  +28h    +10h    m_pCallSiteSP                     offsetOfCallSiteSP      x86: call site, and zeroed in method
+//                                                                              prolog;
+//                                                                            non-x86: method prolog (SP remains
+//                                                                              constant in function, after prolog: no
+//                                                                              localloc and PInvoke in same function)
 //  +30h    +14h    m_pCallerReturnAddress            offsetOfReturnAddress   call site
 //  +38h    +18h    m_pCalleeSavedFP                  offsetOfCalleeSavedFP   not set by JIT
 //          +1Ch    JIT retval spill area (int)                               before call_gc    ???
@@ -2413,6 +2421,7 @@ void Lowering::InsertPInvokeMethodProlog()
     // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
     //     TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
     // for x86, don't pass the secretArg.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_X86_
     GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
@@ -2516,8 +2525,8 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock *returnBB
     // Gentree of the last top level stmnt should match.
     assert(lastTopLevelStmtExpr == lastExpr);   
 
-    // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution order
-    // so that it is guaranteed that there will be no further PInvokes after that point in the method.
+    // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
+    // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
     //
     // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN.  After inserting PME, execution order would be
     //           Op1, PME, GT_RETURN
@@ -2530,13 +2539,14 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock *returnBB
     // Example3: GT_JMP.  After inserting PME execution order would be: PME, GT_JMP
     //           That is after PME, args for GT_JMP call will be setup.
 
-    // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a PInvoke
-    // call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke.  Though this is redundant, it is harmeless.
+    // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
+    // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke.  Though this is redundant,
+    // it is harmeless.
     // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
     // PInvokes.  Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
-    // will be live-in to a BBJ_RETURN block without any uses.  Long term we need to fix liveness for x64 case to properly
-    // extend the life of compLvFrameListRoot var.
-    // 
+    // will be live-in to a BBJ_RETURN block without any uses.  Long term we need to fix liveness for x64 case to
+    // properly extend the life of compLvFrameListRoot var.
+    //
     // Thread.offsetOfGcState = 0/1 
     // That is [tcb + offsetOfGcState] = 1
     GenTree* storeGCState = SetGCState(1);
@@ -2840,8 +2850,8 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
     //     // Call the JIT_PINVOKE_END helper
     //     JIT_PINVOKE_END(&opaqueFrame);
     //
-    // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target platform.
-    // They may be changed in the future such that they preserve all register values.
+    // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
+    // platform. They may be changed in the future such that they preserve all register values.
 
     GenTree* result = nullptr;
     void* addr = nullptr;
@@ -3000,8 +3010,6 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
 {
     assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
 
-    GenTree* result = nullptr;
-
     // An x86 JIT which uses full stub dispatch must generate only
     // the following stub dispatch calls:
     //
@@ -3015,7 +3023,9 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
     //
     // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
     // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
-    
+
+    GenTree* result = nullptr;
+
 #ifdef _TARGET_64BIT_
     // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
     // exception. Therefore, JIT needs to emit an explicit null check.  Note that Jit64 too generates
@@ -3824,9 +3834,9 @@ void Lowering::DoPhase()
         }
     }
 
-    // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the appropriate spots.
-    // NOTE: there is a minor optimization opportunity here, as we still create p/invoke data structures and setup/teardown
-    // even if we've eliminated all p/invoke calls due to dead code elimination.
+    // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
+    // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
+    // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
     if (comp->info.compCallUnmanaged)
     {
         InsertPInvokeMethodProlog();
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index faa5925027..340bdf8561 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -400,7 +400,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
 
         case GT_CAST:
             {
-                // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
+                // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+                //                register.
                 //         see CodeGen::genIntToIntCast() 
 
                 info->srcCount = 1;
@@ -1229,12 +1230,12 @@ Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode)
         GenTreePtr blockSize = initBlkNode->Size();
         GenTreePtr   initVal = initBlkNode->InitVal();
 
+#if 0
         // TODO-ARM64-CQ: Currently we generate a helper call for every
         // initblk we encounter.  Later on we should implement loop unrolling
         // code sequences to improve CQ.
         // For reference see the code in LowerXArch.cpp.
 
-#if 0
         // If we have an InitBlk with constant block size we can speed this up by unrolling the loop.
         if (blockSize->IsCnsIntOrI() && 
             blockSize->gtIntCon.gtIconVal <= INITBLK_UNROLL_LIMIT &&
@@ -1330,11 +1331,12 @@ Lowering::TreeNodeInfoInitBlockStore(GenTreeBlkOp* blkNode)
         GenTreePtr blockSize = cpBlkNode->Size();
         GenTreePtr   srcAddr = cpBlkNode->Source();
 
+#if 0
         // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
         // we should unroll the loop to improve CQ.
 
         // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
-#if 0
+
         if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
         {
             assert(!blockSize->IsIconHandle());
@@ -1888,7 +1890,8 @@ void Lowering::LowerRotate(GenTreePtr tree)
         }
         else
         {
-            GenTreePtr tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+            GenTreePtr tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType),
+                    rotateLeftIndexNode);
             rotateLeftIndexNode->InsertAfterSelf(tmp);
             tree->gtOp.gtOp2 = tmp;
         }
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 3580b43e0b..26443f08ed 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -1111,6 +1111,8 @@ Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
     RegisterType registerType = call->TypeGet();
 
     // Set destination candidates for return value of the call.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_X86_
     if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
     {
@@ -1165,11 +1167,11 @@ Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
         }
     }
 
-    // First, count reg args
 #if FEATURE_VARARG
     bool callHasFloatRegArgs = false;
 #endif // !FEATURE_VARARG
     
+    // First, count reg args
     for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
     {
         assert(list->IsList());
@@ -1316,12 +1318,12 @@ Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
             short internalIntCount = 0;
             if (remainingSlots > 0)
             {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                 // This TYP_STRUCT argument is also passed in the outgoing argument area
                 // We need a register to address the TYP_STRUCT
-                // And we may need 2
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                 internalIntCount = 1;
 #else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                // And we may need 2
                 internalIntCount = 2;
 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
             }
@@ -2638,6 +2640,7 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
     bool rev;
     bool modifiedSources = false;    
 
+#ifdef FEATURE_SIMD
     // If indirTree is of TYP_SIMD12, don't mark addr as contained
     // so that it always get computed to a register.  This would
     // mean codegen side logic doesn't need to handle all possible
@@ -2645,7 +2648,6 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
     // 
     // TODO-XArch-CQ: handle other addr mode expressions that could be marked
     // as contained.
-#ifdef FEATURE_SIMD
     if (indirTree->TypeGet() == TYP_SIMD12)
     {
         // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
@@ -3031,9 +3033,9 @@ void Lowering::LowerCmp(GenTreePtr tree)
                             GenTreePtr andOp1 = op1->gtOp.gtOp1;
                             if (andOp1->isMemoryOp())
                             {
-                                // If the type of value memoryOp (andOp1) is not the same as the type of constant (andOp2)
-                                // check to see whether it is safe to mark AndOp1 as contained.  For e.g. in the following
-                                // case it is not safe to mark andOp1 as contained
+                                // If the type of value memoryOp (andOp1) is not the same as the type of constant
+                                // (andOp2) check to see whether it is safe to mark AndOp1 as contained.  For e.g. in
+                                // the following case it is not safe to mark andOp1 as contained
                                 //    AndOp1 = signed byte and andOp2 is an int constant of value 512.
                                 //
                                 // If it is safe, we update the type and value of andOp2 to match with andOp1.
@@ -3674,8 +3676,8 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
 
         // If it is a GT_LCL_VAR, it still needs the reg to hold the address. 
         // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
-        // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit time.
-        // Also, we don't need a reg for GT_CLS_VAR_ADDR.
+        // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+        // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
         if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
         {
             m_lsra->clearOperandCounts(indirDst);
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 266d68ec60..7d117485db 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -24,7 +24,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     "Internal registers" are registers used during the code sequence generated for the node.
     The register lifetimes must obey the following lifetime model:
     - First, any internal registers are defined.
-    - Next, any source registers are used (and are then freed if they are last use and are not identified as "delayRegFree").
+    - Next, any source registers are used (and are then freed if they are last use and are not identified as
+      "delayRegFree").
     - Next, the internal registers are used (and are then freed).
     - Next, any registers in the kill set for the instruction are killed.
     - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
@@ -61,22 +62,25 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         - If a lclVar node currently lives in some register, it may not be desirable to move it
           (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
           but needs to be in a specific arg register for a call).
-        - In other cases there may be conflicts on the restrictions placed by the defining node and the node which consumes it
+        - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
+          consumes it
       - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
-        then LSRA is free to annotate the node with a different register.  The code generator must issue the appropriate move.
-      - However, if such a node is constrained to a set of registers, and its current location does not satisfy that requirement,
-        LSRA must insert a GT_COPY node between the node and its parent.  The gtRegNum on the GT_COPY node must satisfy the
-        register requirement of the parent.
+        then LSRA is free to annotate the node with a different register.  The code generator must issue the appropriate
+        move.
+      - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
+        requirement, LSRA must insert a GT_COPY node between the node and its parent.  The gtRegNum on the GT_COPY node
+        must satisfy the register requirement of the parent.
     - GenTree::gtRsvdRegs has a set of registers used for internal temps.
-    - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been evaluated.
+    - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
+      evaluated.
       - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
         In the new backend perhaps this should change (see also the note below under CodeGen).
     - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
       - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
       - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
         with the register to which the variable must be reloaded.
-      - For other nodes, since they represent both the def and use, if the value must be reloaded to a different register,
-        LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
+      - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
+        register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
 
     Local variable table (LclVarDsc):
     - LclVarDsc::lvRegister is set to true if a local variable has the
@@ -821,9 +825,9 @@ LinearScan::newRefPosition(Interval* theInterval,
     // Spill info
     newRP->isFixedRegRef = isFixedRegister;
 
+#ifndef _TARGET_AMD64_
     // We don't need this for AMD because the PInvoke method epilog code is explicit
     // at register allocation time.
-#ifndef _TARGET_AMD64_
     if (theInterval != nullptr &&
         theInterval->isLocalVar &&
         compiler->info.compCallUnmanaged &&
@@ -2044,6 +2048,8 @@ void LinearScan::identifyCandidates()
         // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
         // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
         // above).
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
         // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType vars.
         if (varDsc->lvType == LargeVectorType)
@@ -2083,6 +2089,7 @@ void LinearScan::identifyCandidates()
     // registers current include the number of fp vars, whether there are loops, and whether there are
     // multiple exits.  These have been selected somewhat empirically, but there is probably room for
     // more tuning.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (VERBOSE)
@@ -2099,6 +2106,7 @@ void LinearScan::identifyCandidates()
         }
     }
 #endif
+
     JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n",
             floatVarCount,
             compiler->fgHasLoops,
@@ -2145,7 +2153,8 @@ void LinearScan::identifyCandidates()
 void
 LinearScan::initVarRegMaps()
 {
-    assert(compiler->lvaTrackedFixed);   // We should have already set this to prevent us from adding any new tracked variables.
+    assert(compiler->lvaTrackedFixed);   // We should have already set this to prevent us from adding any new tracked
+                                         // variables.
 
     // The compiler memory allocator requires that the allocation be an
     // even multiple of int-sized objects
@@ -2779,8 +2788,8 @@ LinearScan::buildInternalRegisterDefsForNode(GenTree *tree,
     int internalIntCount = tree->gtLsraInfo.internalIntCount;
     regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
 
-    // If the number of internal integer registers required is the same as the number of candidate integer registers in the candidate set, 
-    // then they must be handled as fixed registers.
+    // If the number of internal integer registers required is the same as the number of candidate integer registers in
+    // the candidate set, then they must be handled as fixed registers.
     // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
     bool fixedRegs = false;
     regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
@@ -2893,9 +2902,9 @@ LinearScan::buildUpperVectorSaveRefPositions(GenTree *tree,
                 tempInterval->isInternal = true;
                 RefPosition *pos = newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
                 // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
-                // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding RefTypeUpperVectorSaveUse
-                // RefPosition.  We will then restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
-                // of tempInterval.
+                // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
+                // RefTypeUpperVectorSaveUse RefPosition.  We will then restore the relatedInterval onto varInterval,
+                // and set varInterval as the relatedInterval of tempInterval.
                 tempInterval->relatedInterval = varInterval->relatedInterval;
                 varInterval->relatedInterval = tempInterval;
             }
@@ -3582,13 +3591,15 @@ LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
 //    returned is in fact a predecessor.
 //
 // Notes:
-//    This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be one of:
+//    This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
+//    one of:
 //      LSRA_BLOCK_BOUNDARY_PRED    - Use the register locations of a predecessor block (default)
 //      LSRA_BLOCK_BOUNDARY_LAYOUT  - Use the register locations of the previous block in layout order.
 //                                    This is the only case where this actually returns a different block.
 //      LSRA_BLOCK_BOUNDARY_ROTATE  - Rotate the register locations from a predecessor.
 //                                    For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
-//                                    the register locations will be "rotated" to stress the resolution and allocation code.
+//                                    the register locations will be "rotated" to stress the resolution and allocation
+//                                    code.
 
 BasicBlock*
 LinearScan::findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
@@ -4836,9 +4847,9 @@ LinearScan::tryAllocateFreeReg(Interval *currentInterval, RefPosition *refPositi
                     foundBetterCandidate = true;
                 }
             } 
-            // If both cover the range, prefer a register that is killed sooner (leaving the longer range register available).
-            // If both cover the range and also getting killed at the same location, prefer the one which is same as previous
-            // assignment.
+            // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
+            // available). If both cover the range and also getting killed at the same location, prefer the one which
+            // is same as previous assignment.
             else if (nextPhysRefLocation > lastLocation)
             {
                 if (nextPhysRefLocation < bestLocation)
@@ -5130,8 +5141,8 @@ LinearScan::allocateBusyReg(Interval* current,
             isBetterLocation = (nextLocation <= farthestLocation);
         }
         else
-        // the below if-stmt is associated with this else
 #endif
+        // This if-stmt is associated with the above else
         if (recentAssignedRefWeight < farthestRefPosWeight)
         {
             isBetterLocation = true;
@@ -5775,8 +5786,8 @@ LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocation
             //    In this case, we will normally change it to REG_STK.  We will update its "spilled" status when we
             //    encounter it in resolveLocalRef().
             // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register.  This is
-            //     because the copyReg RefPosition will not have recorded the "home" register, yet downstream RefPositions
-            //     rely on the correct "home" register.
+            //     because the copyReg RefPosition will not have recorded the "home" register, yet downstream
+            //     RefPositions rely on the correct "home" register.
             // 3. This variable was spilled before we reached the end of predBB.  In this case, both targetReg and
             //    predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
             //    as reload during allocation time if necessary (note that by the time we actually reach the next
@@ -6584,8 +6595,8 @@ LinearScan::allocateRegisters()
                         currentInterval->hasConflictingDefUse));
 
                 // It's already in a register, but not one we need.
-                // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that the
-                // needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
+                // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
+                // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
                 // The reason we need special handling for the "delayRegFree" case is that we need to mark the
                 // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement).
                 // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs
@@ -6795,6 +6806,8 @@ LinearScan::allocateRegisters()
     }
 
     // Free registers to clear associated intervals for resolution phase
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     if (getLsraExtendLifeTimes())
     {
@@ -8854,12 +8867,12 @@ LinearScan::resolveEdge(BasicBlock*      fromBlock,
         break;
     }
 
+#ifndef _TARGET_XARCH_
     // We record tempregs for beginning and end of each block.
     // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
     // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
     // modifies the varToRegMaps so we don't have all the correct registers at the time
     // we need to get the tempReg.
-#ifndef _TARGET_XARCH_
     regNumber tempRegInt = (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
 #endif // !_TARGET_XARCH_
     regNumber tempRegFlt = REG_NA;
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index 9ce2bd79c7..f492e1ac39 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -327,6 +327,7 @@ public:
     //
     // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
     // table is never resized. It is also limited by the size of the index, currently an unsigned char.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_ARM64_)
     static const int numMasks = 128;
@@ -445,11 +446,13 @@ private:
                                       LSRA_LIMIT_CALLER                 = 0x2,
                                       LSRA_LIMIT_SMALL_SET              = 0x3,
                                       LSRA_LIMIT_MASK                   = 0x3 };
+
     // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
     // registers, so as to get different coverage than limiting to callee or caller.
     // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
     // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
     // Hence the "SmallFPSet" has 5 elements.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_AMD64_)
 #ifdef UNIX_AMD64_ABI
@@ -513,9 +516,9 @@ private:
     LsraExtendLifetimes         getLsraExtendLifeTimes()        { return (LsraExtendLifetimes) (lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK); }
     bool                        extendLifetimes()               { return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES; }
 
-    // This controls whether variables locations should be set to the previous block in layout order (LSRA_BLOCK_BOUNDARY_LAYOUT),
-    // or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED - the default),
-    // or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
+    // This controls whether variables locations should be set to the previous block in layout order
+    // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
+    // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
     enum LsraBlockBoundaryLocations { LSRA_BLOCK_BOUNDARY_PRED          = 0,
                                       LSRA_BLOCK_BOUNDARY_LAYOUT        = 0x100,
                                       LSRA_BLOCK_BOUNDARY_ROTATE        = 0x200,
diff --git a/src/jit/lsra_reftypes.h b/src/jit/lsra_reftypes.h
index 20780bd1cf..841b78c881 100644
--- a/src/jit/lsra_reftypes.h
+++ b/src/jit/lsra_reftypes.h
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
 //  memberName - enum member name
 //  memberValue - enum member value
 //  shortName - short name string
@@ -19,3 +20,4 @@
     DEF_REFTYPE(RefTypeUpperVectorSaveDef, (0x40 | RefTypeDef), "UVSv"    )
     DEF_REFTYPE(RefTypeUpperVectorSaveUse, (0x40 | RefTypeUse), "UVRs"    )
     DEF_REFTYPE(RefTypeKillGCRefs        , 0x80               , "KlGC"    )
+// clang-format on
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index c8835ead82..5572d8cb3e 100755
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -199,6 +199,8 @@ GenTreePtr          Compiler::fgMorphCast(GenTreePtr tree)
         }
 
         // do we need to do it in two steps R -> I, '-> smallType
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
         if (dstSize < genTypeSize(TYP_INT))
         {
@@ -1501,13 +1503,14 @@ void fgArgInfo::ArgsComplete()
         }
 
 #ifndef LEGACY_BACKEND
+#if FEATURE_MULTIREG_ARGS
         // For RyuJIT backend we will expand a Multireg arg into a GT_LIST 
         // with multiple indirections, so here we consider spilling it into a tmp LclVar.
         //
         // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
         // so we skip this for ARM32 until it is ported to use RyuJIT backend
         //
-#if FEATURE_MULTIREG_ARGS
+
         bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
 
         if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
@@ -1666,6 +1669,13 @@ void fgArgInfo::SortArgs()
 {
     assert(argsComplete == true);
 
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\nSorting the arguments:\n");
+    }
+#endif
+
     /* Shuffle the arguments around before we build the gtCallLateArgs list.
        The idea is to move all "simple" arguments like constants and local vars
        to the end of the table, and move the complex arguments towards the beginning
@@ -1685,13 +1695,6 @@ void fgArgInfo::SortArgs()
            +------------------------------------+  <--- argTable[0]
      */
 
-#ifdef DEBUG
-    if (compiler->verbose)
-    {
-        printf("\nSorting the arguments:\n");
-    }
-#endif
-
     /* Set the beginning and end for the new argument table */
     unsigned curInx;
     int      regCount      = 0;
@@ -2149,7 +2152,9 @@ void fgArgInfo::EvalArgsToTemps()
             else
             {
                 // Create a temp assignment for the argument
-                //  Put the temp in the gtCallLateArgs list
+                // Put the temp in the gtCallLateArgs list
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                 if (compiler->verbose)
                 {
@@ -2788,6 +2793,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
         // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
         // in the implementation of fast tail call.
         // *********** END NOTE *********
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
         // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
@@ -3007,6 +3013,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
 #endif // _TARGET_X86_
 
     /* Morph the user arguments */
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(_TARGET_ARM_)
 
@@ -3027,7 +3034,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
     //          HFA a3,     // passed in f4/f5/f6
     //          double a4,  // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
     //          HFA a5,     // passed in f10/f11/f12
-    //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill slots
+    //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
+    //                      // slots
     //          float a7,   // passed in f1 (back-filled)
     //          float a8,   // passed in f7 (back-filled)
     //          float a9,   // passed in f13 (back-filled)
@@ -3260,8 +3268,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
         else  // !lateArgsComputed
         {
             //
-            // Figure out the size of the argument. This is either in number of registers, or number of TARGET_POINTER_SIZE
-            // stack slots, or the sum of these if the argument is split between the registers and the stack.
+            // Figure out the size of the argument. This is either in number of registers, or number of
+            // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
+            // the stack.
             //
             if (argx->IsArgPlaceHolderNode() || (!isStructArg))
             {
@@ -3418,12 +3427,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
 
                     // The following if-then-else needs to be carefully refactored.
                     // Basically the else portion wants to turn a struct load (a GT_OBJ)
-                    // into a GT_IND of the appropriate size. 
+                    // into a GT_IND of the appropriate size.
                     // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
                     // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined  (Why?)
                     // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
                     // It also can't do this if we have a HFA arg, 
                     // unless we have a 1-elem HFA in which case we want to do the optimization.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_X86_
 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -3620,8 +3630,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
                                     // constituent fields, and so we have to re-assemble it
                                     copyBlkClass = objClass;
 #ifdef _TARGET_ARM_
-                                    // Alignment constraints may cause us not to use (to "skip") some argument registers.
-                                    // Add those, if any, to the skipped (int) arg reg mask.
+                                    // Alignment constraints may cause us not to use (to "skip") some argument
+                                    // registers. Add those, if any, to the skipped (int) arg reg mask.
                                     fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
 #endif // _TARGET_ARM_
                                 }
@@ -3661,11 +3671,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
                         }
                         else
                         {
-                            // if the valuetype size is not a multiple of sizeof(void*),
+                            // If the valuetype size is not a multiple of sizeof(void*),
                             // we must copyblk to a temp before doing the obj to avoid
                             // the obj reading memory past the end of the valuetype
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
-                        // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
+                            // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
                             copyBlkClass = objClass;
                             size = roundupSize / TARGET_POINTER_SIZE;   // Normalize size to number of pointer sized items
 #else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
@@ -3845,9 +3857,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
         // Now we know if the argument goes in registers or not and how big it is,
         // whether we had to just compute it or this is a re-morph call and we looked it up.
         //
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_ARM_
-
         // If we ever allocate a floating point argument to the stack, then all
         // subsequent HFA/float/double arguments go on the stack.
         if (!isRegArg && passUsingFloatRegs)
@@ -4228,9 +4240,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
 
 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
 
+#ifndef LEGACY_BACKEND
     // In the future we can migrate UNIX_AMD64 to use this
     // method instead of fgMorphSystemVStructArgs
-#ifndef LEGACY_BACKEND
+
     // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
     if (hasMultiregStructArgs)
     {
@@ -4470,6 +4483,8 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
     // Currently only ARM64 is using this method to morph the MultiReg struct args
     //  in the future AMD64_UNIX and for HFAs ARM32, will also use this method
     //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef _TARGET_ARM_
     NYI_ARM("fgMorphMultiregStructArgs");
 #endif
@@ -4762,8 +4777,8 @@ GenTreePtr    Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPt
 
                 if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
                 {
-                    // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer registers
-                    // So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
+                    // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
+                    // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
                     //
                     JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", varNum);
                     //
@@ -5579,8 +5594,8 @@ GenTreePtr          Compiler::fgMorphArrayIndex(GenTreePtr tree)
 
     GenTreePtr addr;
 
-    // Widen 'index' on 64-bit targets
 #ifdef _TARGET_64BIT_
+    // Widen 'index' on 64-bit targets
     if (index->TypeGet() != TYP_I_IMPL)
     {
         if (index->OperGet() == GT_CNS_INT)
@@ -6057,7 +6072,9 @@ GenTreePtr          Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma
         // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
         // large).  To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
         // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
-        /// This is left here to point out how to implement it.
+        // This is left here to point out how to implement it.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
 
         // If the objRef is a GT_ADDR node, it, itself, never requires null checking.  The expression
@@ -6528,8 +6545,6 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
        return;
     }
 
-    // Success!
-
 #ifdef DEBUG
     if  (verbose)
     {
@@ -6553,6 +6568,8 @@ bool                Compiler::fgCanFastTailCall(GenTreeCall* callee)
     // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
     // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
     // currently not copying/setting callSig.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     if (callee->IsTailPrefixedCall())
     {
@@ -7836,11 +7853,12 @@ NO_TAIL_CALL:
 
     // If this is a 'regular' call, mark the basic block as
     // having a call (for computing full interruptibility).
-    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
     // Amd64 note: If this is a fast tail call then don't count it as a call
     // since we don't insert GC-polls but instead make the method fully GC
     // interruptible.
-#ifdef _TARGET_AMD64_
     if (!call->IsFastTailCall())
 #endif
     {
@@ -7872,11 +7890,12 @@ NO_TAIL_CALL:
         {
             noway_assert(call->TypeGet() == TYP_INT);
 
-            // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType objects.
-            // Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to GT_NE/GT_NE:
-            // One important invariance that should never change is that type equivalency is always equivalent to object
-            // identity equality for runtime type objects in reflection. This is also reflected in RuntimeTypeHandle::TypeEquals.
-            // If this invariance would ever be broken, we need to remove the optimization below.
+            // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
+            // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
+            // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
+            // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
+            // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
+            // optimization below.
 
             GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
             GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
@@ -7894,7 +7913,8 @@ NO_TAIL_CALL:
     }
 
     // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
-    GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require copy-back).
+    GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
+                                   // copy-back).
     unsigned retValTmpNum = BAD_VAR_NUM;
     CORINFO_CLASS_HANDLE structHnd = nullptr;
     if (call->HasRetBufArg() &&
@@ -8310,8 +8330,8 @@ ONE_SIMPLE_ASG:
             // "dependently" promoted, so it will be in the right memory location.  One possible
             // further reason for avoiding field-wise stores is that the struct might have alignment-induced
             // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
-            // concern, then we could compromise, and say that address-exposed + fields do not completely cover the memory
-            // of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
+            // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
+            // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
             if (varTypeIsStruct(lclVarTree) &&
                 (lvaTable[lclNum].lvPromoted || lclVarIsSIMDType(lclNum)))
             {
@@ -8972,9 +8992,10 @@ GenTreePtr          Compiler::fgMorphCopyBlock(GenTreePtr tree)
 #if CPU_USES_BLOCK_MOVE
             compBlkOpUsed = true;
 #endif
+#ifdef CPBLK_UNROLL_LIMIT
             // Note that the unrolling of CopyBlk is only implemented on some platforms
             // Currently that includes x64 and Arm64 but not x64 or Arm32
-#ifdef CPBLK_UNROLL_LIMIT
+
             // If we have a CopyObj with a dest on the stack
             // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
             // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
@@ -9103,7 +9124,8 @@ GenTreePtr          Compiler::fgMorphCopyBlock(GenTreePtr tree)
                         // be a definition.
                         addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
                         assert(lvaTable[addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum].lvLclBlockOpAddr == 1);                     
-                        addrSpillIsStackDest = true;  // addrSpill represents the address of LclVar[varNum] in our local stack frame 
+                        addrSpillIsStackDest = true;  // addrSpill represents the address of LclVar[varNum] in our
+                                                      // local stack frame 
                     }
                 }
             }
@@ -10158,6 +10180,7 @@ NO_MUL_64RSLT:
         // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
         // type handles and instances of System.Type
         // If this invariant is ever broken, the optimization will need updating
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef LEGACY_BACKEND
         if ( op1->gtOper == GT_CALL &&
@@ -10246,6 +10269,7 @@ NO_MUL_64RSLT:
                     if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
                     {
                         // Method Table tree
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef LEGACY_BACKEND
                         GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
 #else
@@ -10577,6 +10601,7 @@ NO_MUL_64RSLT:
                         //
                     REMOVE:
                         // The data at optAssertionTabPrivate[i] is to be removed
+                        CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                         if (verbose)
                         {
@@ -12132,8 +12157,8 @@ CM_ADD_OP:
         //
         if (!optValnumCSE_phase)
         {
-            //Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this is
-            //all we need.
+            // Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this
+            // is all we need.
 
             GenTreePtr op1SideEffects = NULL;
             // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
@@ -12475,9 +12500,9 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
         bool            dstIsSafeLclVar;
 
     case GT_ASG:
-
         /* We'll convert "a = a <op> x" into "a <op>= x"                     */
         /*     and also  "a = x <op> a" into "a <op>= x" for communative ops */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !LONG_ASG_OPS
         if  (typ == TYP_LONG)
@@ -12678,9 +12703,10 @@ ASG_OP:
                         // lost when assigning the op result to a small type var,
                         // but it may not be ok for the right shift operation where the higher bits
                         // could be shifted into the lower bits and preserved.
-                        // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) == (sbyte)x >>signed y))
-                        // as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) == (ubyte)x >>unsigned y), but
-                        // signed right shift of an unigned small type may give the wrong result:
+                        // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) == 
+                        // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) == 
+                        // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the wrong
+                        // result:
                         // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
                         // but  (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
                         // The result becomes correct if we use >>unsigned instead of >>signed.
@@ -12877,7 +12903,7 @@ ASG_OP:
                 ssize_t     ishf = op2->gtIntConCommon.IconValue();
                 ssize_t     iadd = cns->gtIntConCommon.IconValue();
 
-//                  printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
+                // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
 
                 /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
 
@@ -13431,6 +13457,7 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
                             // (x >>> y) | (x << (-y + N))
                             // where N == bitsize(x), M is const, and
                             // M & (N - 1) == N - 1
+                            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_64BIT_
                             if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
@@ -13557,7 +13584,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
             case GT_SUB: helper = CPX_R4_SUB; break;
             case GT_MUL: helper = CPX_R4_MUL; break;
             case GT_DIV: helper = CPX_R4_DIV; break;
-//              case GT_MOD: helper = CPX_R4_REM; break;
+            // case GT_MOD: helper = CPX_R4_REM; break;
 
             case GT_EQ : helper = CPX_R4_EQ ; break;
             case GT_NE : helper = CPX_R4_NE ; break;
@@ -13648,7 +13675,9 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
                 return tree;
             }
 
+
             /* This is a (real) return value -- check its type */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
@@ -14588,7 +14617,8 @@ void                Compiler::fgMorphStmts(BasicBlock * block,
             GenTreeCall* call = morph->AsCall();
             // Could either be 
             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
-            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
+            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing 
+            //     a jmp.
             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || 
                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags &  BBF_HAS_JMP)));
         }
@@ -14597,10 +14627,12 @@ void                Compiler::fgMorphStmts(BasicBlock * block,
             /* This must be a tail call that caused a GCPoll to get
                injected.  We haven't actually morphed the call yet
                but the flag still got set, clear it here...  */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
             tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
 #endif
+
             noway_assert(compTailCallUsed);
             noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
             noway_assert(stmt->gtNext == NULL);
@@ -14609,7 +14641,8 @@ void                Compiler::fgMorphStmts(BasicBlock * block,
 
             // Could either be 
             //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
-            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing a jmp.
+            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+            //     a jmp.
             noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || 
                          (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && (compCurBB->bbFlags &  BBF_HAS_JMP)));
         }
@@ -14980,7 +15013,8 @@ void                Compiler::fgMorphBlocks()
                  (genReturnBB != block) &&
                  ((block->bbFlags & BBF_HAS_JMP) == 0))
              {
-                /* We'll jump to the genReturnBB */
+                 /* We'll jump to the genReturnBB */
+                 CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(_TARGET_X86_)
                 if (info.compFlags & CORINFO_FLG_SYNCH)
@@ -15083,10 +15117,8 @@ void                Compiler::fgMorphBlocks()
 
 void                Compiler::fgSetOptions()
 {
-
-    /* Should we force fully interruptible code ? */
-
 #ifdef DEBUG
+    /* Should we force fully interruptible code ? */
     if (JitConfig.JitFullyInt() ||
         compStressCompile(STRESS_GENERIC_VARN, 30))
     {
@@ -15215,7 +15247,7 @@ void                Compiler::fgSetOptions()
         codeGen->setFramePointerRequiredGCInfo(true);
     }
 
-//  printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
+    // printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
 }
 
 
@@ -16139,11 +16171,12 @@ void                Compiler::fgPromoteStructs()
                 }
 
                 // 
-                // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of the the 
-                // incoming register into the stack frame slot.
+                // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+                // the the incoming register into the stack frame slot.
                 // In that case, we would like to avoid promortion.
                 // However we haven't yet computed the lvRefCnt values so we can't do that.
                 // 
+                CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if 0
                 // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
@@ -16246,12 +16279,13 @@ Compiler::fgWalkResult      Compiler::fgMorphStructField(GenTreePtr tree, fgWalk
                 {
                     // Normed struct
                     // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
-                    // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 bytes).
-                    // Normally, the type of the local var and the type of GT_FIELD are equivalent. However, there
-                    // is one extremely rare case where that won't be true. An enum type is a special value type
-                    // that contains exactly one element of a primitive integer type (that, for CLS programs is named "value__").
-                    // The VM tells us that a local var of that enum type is the primitive type of the enum's single field.
-                    // It turns out that it is legal for IL to access this field using ldflda or ldfld. For example:
+                    // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+                    // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+                    // there is one extremely rare case where that won't be true. An enum type is a special value type
+                    // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+                    // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+                    // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+                    // ldfld. For example:
                     //
                     //  .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
                     //  {
@@ -16334,6 +16368,8 @@ Compiler::fgWalkResult      Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkD
             tree->gtLclFld.SetLclNum(fieldLclIndex);
 
             // We need to keep the types 'compatible'.  If we can switch back to a GT_LCL_VAR
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef ARM_SOFTFP
             assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
 #else
@@ -16753,7 +16789,6 @@ Compiler::fgWalkResult      Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr
             // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
             return WALK_SKIP_SUBTREES;
         }
-        // Otherwise...
 #ifdef FEATURE_SIMD
         if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
         {
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index c424e7e178..58440d3bd0 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -1528,7 +1528,8 @@ public:
 #endif
                     cse_def_cost = 2;
                     cse_use_cost = 2;
-                    extra_yes_cost = BB_UNITY_WEIGHT * 2;   // Extra cost in case we have to spill/restore a caller saved register  
+                    extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
+                                                          // saved register
                 }
             }
             else // Conservative CSE promotion
@@ -1554,7 +1555,8 @@ public:
 #endif
                     cse_def_cost = 3;
                     cse_use_cost = 3;
-                    extra_yes_cost = BB_UNITY_WEIGHT * 4;   // Extra cost in case we have to spill/restore a caller saved register  
+                    extra_yes_cost = BB_UNITY_WEIGHT * 4;   // Extra cost in case we have to spill/restore a caller
+                                                            // saved register
                 }
 
                 // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
@@ -1580,7 +1582,8 @@ public:
         // estimate the cost from lost codesize reduction if we do not perform the CSE
         if (candidate->Size() > cse_use_cost)
         {
-            Compiler::CSEdsc* dsc = candidate->CseDsc();    // We need to retrieve the actual use count, not the weighted count
+            Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
+                                                         // weighted count
             extra_no_cost = candidate->Size() - cse_use_cost;
             extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
         }
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 21ebdc22a9..c135515c42 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -1237,10 +1237,10 @@ void                Compiler::optRecordLoop(BasicBlock *    head,
         // Record iterator.
         optLoopTable[loopInd].lpIterTree = incr;
 
+#if COUNT_LOOPS
         // Save the initial value of the iterator - can be lclVar or constant
         // Flag the loop accordingly.
 
-#if COUNT_LOOPS
         iterLoopCount++;
 #endif
 
@@ -1408,7 +1408,8 @@ void                Compiler::optFindNaturalLoops()
     /* We will use the following terminology:
      * HEAD    - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
                  Not part of the looping of the loop.
-     * FIRST   - the lexically first basic block (in bbNext order) within this loop.  (May be part of a nested loop, but not the outer loop. ???)
+     * FIRST   - the lexically first basic block (in bbNext order) within this loop.  (May be part of a nested loop,
+     *           but not the outer loop. ???)
      * TOP     - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
      * BOTTOM  - the lexically last block in the loop (i.e. the block from which we jump to the top)
      * EXIT    - the loop exit or the block right after the bottom
@@ -1678,8 +1679,8 @@ void                Compiler::optFindNaturalLoops()
                  * The example above is not a loop since we bail after the first iteration
                  *
                  * The condition we have to check for is
-                 *  1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is reachable,
-                 *     it can only be reached through ENTRY, therefore we have a way back to ENTRY
+                 *  1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
+                 *     reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
                  *
                  *  2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
                  *     loop bottom then we cannot iterate
@@ -2142,9 +2143,9 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
         BasicBlock* topPredBlock = topPred->flBlock;
 
         // Skip if topPredBlock is in the loop.
-        // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization,
-        // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we
-        // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+        // Note that this uses block number to detect membership in the loop. We are adding blocks during
+        // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
+        // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
         if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
         {
             JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
@@ -2926,6 +2927,7 @@ void                Compiler::optUnrollLoops()
             }
 
             /* Looks like a good idea to unroll this loop, let's do it! */
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef  DEBUG
             if (verbose)
@@ -4190,8 +4192,8 @@ bool                Compiler::optIsLoopClonable(unsigned loopInd)
 
     // We've previously made a decision whether to have separate return epilogs, or branch to one.
     // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
-    // (I thought this was x86-specific, but it's not if-d.  On other architectures, the decision should be made as a heuristic tradeoff; 
-    // perhaps we're just choosing to live with 4 as the limit.)
+    // (I thought this was x86-specific, but it's not if-d.  On other architectures, the decision should be made as a
+    // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
     if (fgReturnCount + loopRetCount > 4)
     {
         JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
@@ -4445,9 +4447,9 @@ void                Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* c
                                           /*extendRegion*/true);
 
         BasicBlock::CloneBlockState(this, newBlk, blk);
-        // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect.  Eventually, we should probably insert the
-        // cloned loop in the loop table.  For now, however, we'll just make these blocks be part of the surrounding loop, if one 
-        // exists -- the parent of the loop we're cloning.
+        // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect.  Eventually, we should probably insert
+        // the cloned loop in the loop table.  For now, however, we'll just make these blocks be part of the surrounding
+        // loop, if one exists -- the parent of the loop we're cloning.
         newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
 
         if (newFirst == nullptr) newFirst = newBlk;
@@ -4725,6 +4727,7 @@ bool                Compiler::optNarrowTree(GenTreePtr     tree,
         switch (oper)
         {
         /* Constants can usually be narrowed by changing their value */
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_64BIT_
             __int64  lval;  
@@ -5468,8 +5471,8 @@ void                    Compiler::optHoistLoopCode()
     }
 #endif
 
-    // Test Data stuff..
 #ifdef DEBUG
+    // Test Data stuff..
     // If we have no test data, early out.
     if (m_nodeTestData == NULL) return;
     NodeToTestDataMap* testData = GetNodeTestData();
@@ -5496,6 +5499,7 @@ void                    Compiler::optHoistLoopCode()
 void                    Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
 {
     // Do this loop, then recursively do all nested loops.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if LOOP_HOIST_STATS
     // Record stats
@@ -7140,9 +7144,9 @@ bool                Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2,
 
         /* NOTE: Caller ensures that this variable has only one def */
 
-//      printf("limit [%d]:\n", add1); gtDispTree(op1);
-//      printf("size  [%d]:\n", add2); gtDispTree(op2);
-//      printf("\n");
+        // printf("limit [%d]:\n", add1); gtDispTree(op1);
+        // printf("size  [%d]:\n", add2); gtDispTree(op2);
+        // printf("\n");
 
     }
 
@@ -7666,6 +7670,7 @@ bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
             {
                 // If the array address has been taken, don't do the optimization
                 // (this restriction can be lowered a bit, but i don't think it's worth it)
+                CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
                 if (verbose)
                 {
@@ -7673,7 +7678,6 @@ bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
                     gtDispTree(pArray);
                 }
 #endif
-
                 return false;
             }
         }
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
index 6a832dc8ae..5c32f85c29 100644
--- a/src/jit/rangecheck.cpp
+++ b/src/jit/rangecheck.cpp
@@ -732,8 +732,8 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP ass
     }
 }
 
-// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi arguments.
-// If not a phi argument, check if we assertions about local variables.
+// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi
+// arguments. If not a phi argument, check if we assertions about local variables.
 void RangeCheck::MergeAssertion(BasicBlock* block, GenTreePtr stmt, GenTreePtr op, SearchPath* path, Range* pRange DEBUGARG(int indent))
 {
     JITDUMP("Merging assertions from pred edges of BB%02d for op(%p) $%03x\n", block->bbNum, dspPtr(op), op->gtVNPair.GetConservative());
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 6c322fb88d..fa7e03d911 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -1168,9 +1168,10 @@ void Rationalizer::RewriteObj(GenTreePtr* ppTree, Compiler::fgWalkData* data)
     Compiler* comp = data->compiler;
     GenTreeObj* obj = (*ppTree)->AsObj();
 
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     // For UNIX struct passing, we can have Obj nodes for arguments.
     // For other cases, we should never see a non-SIMD type here.
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
     if (!varTypeIsSIMD(obj))
     {
         return;
@@ -1461,7 +1462,7 @@ Compiler::fgWalkResult Rationalizer::SimpleTransformHelper(GenTree **ppTree, Com
         {
             // We are changing the child from GT_LCL_VAR TO GT_LCL_VAR_ADDR.
             // Therefore gtType of the child needs to be changed to a TYP_BYREF
-
+            CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef DEBUG
             if (child->gtOper == GT_LCL_VAR)
             {
diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h
index 7d3b00b051..e1e7df140c 100644
--- a/src/jit/rationalize.h
+++ b/src/jit/rationalize.h
@@ -123,7 +123,7 @@ private:
     static void       DuplicateCommaProcessOneTree (Compiler* comp, Rationalizer* irt, BasicBlock* block, GenTree* tree);
 
     static void       FixupIfCallArg               (GenTreeStack* parentStack,
-                                                    GenTree* oldChild, 
+                                                    GenTree* oldChild,
                                                     GenTree* newChild);
 
     static void       FixupIfSIMDLocal             (Compiler* comp, GenTreeLclVarCommon* tree);
@@ -133,20 +133,20 @@ private:
                                                     GenTreePtr rhs);
 
     Location RewriteTopLevelComma(Location loc);
-    
+
     // SIMD related transformations
     static void RewriteObj(GenTreePtr* ppTree, Compiler::fgWalkData* data);
     static void RewriteCopyBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data);
     static void RewriteInitBlk(GenTreePtr* ppTree, Compiler::fgWalkData* data);
 
-    // Intrinsic related    
+    // Intrinsic related
     static void RewriteNodeAsCall(GenTreePtr* ppTree, Compiler::fgWalkData* data,
         CORINFO_METHOD_HANDLE callHnd,
 #ifdef FEATURE_READYTORUN_COMPILER
         CORINFO_CONST_LOOKUP entryPoint,
 #endif
         GenTreeArgList* args);
-    static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);    
+    static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);
 };
 
 inline Rationalizer::Rationalizer(Compiler* _comp)
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 4ae9009b6b..482a590e1e 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -894,11 +894,13 @@ static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
             BitScanForward(&reg, (DWORD)regmask);
             return rpGetPredictForReg((regNumber)reg);
         }
-        /* It has multiple bits set */
+
 #if defined(_TARGET_ARM_)
+        /* It has multiple bits set */
         else if (regmask == (RBM_R0 | RBM_R1))   { result = PREDICT_PAIR_R0R1; }
         else if (regmask == (RBM_R2 | RBM_R3))   { result = PREDICT_PAIR_R2R3; }
 #elif defined(_TARGET_X86_)
+        /* It has multiple bits set */
         else if (regmask == (RBM_EAX | RBM_EDX)) { result = PREDICT_PAIR_EAXEDX; }
         else if (regmask == (RBM_ECX | RBM_EBX)) { result = PREDICT_PAIR_ECXEBX; }
 #endif
@@ -2845,14 +2847,15 @@ ASG_COMMON:
 #if defined(DEBUG) || !NOGC_WRITE_BARRIERS
                 {
 #ifdef _TARGET_ARM_
-                //
-                // For the ARM target we have an optimized JIT Helper 
-                // that only trashes a subset of the callee saved registers
-                //
 #ifdef DEBUG
                 if (verbose)
                     printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
 #endif
+                //
+                // For the ARM target we have an optimized JIT Helper 
+                // that only trashes a subset of the callee saved registers
+                //
+
                 // NOTE: Adding it to the gtUsedRegs will cause the interference to
                 // be added appropriately
 
@@ -2865,13 +2868,14 @@ ASG_COMMON:
                 op1->gtUsedRegs  |= RBM_R0; 
                 op2->gtUsedRegs  |= RBM_R1;
 #else // _TARGET_ARM_
-                // We have to call a normal JIT helper to perform the Write Barrier Assignment
-                // It will trash the callee saved registers
 
 #ifdef DEBUG
                 if (verbose)
                     printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
 #endif
+                // We have to call a normal JIT helper to perform the Write Barrier Assignment
+                // It will trash the callee saved registers
+
                 tree->gtUsedRegs |= RBM_CALLEE_TRASH;
 #endif // _TARGET_ARM_
                 }
@@ -2963,11 +2967,9 @@ ASG_COMMON:
                 {
                     predictReg = PREDICT_SCRATCH_REG;
                 }
-                //
+#ifdef _TARGET_ARM_
                 // If we are widening an int into a long using a targeted register pair we
                 // should retarget so that the low part get loaded into the appropriate register
-                //
-#ifdef _TARGET_ARM_
                 else if (predictReg == PREDICT_PAIR_R0R1)
                 {
                     predictReg   = PREDICT_REG_R0;
@@ -2980,6 +2982,8 @@ ASG_COMMON:
                 }
 #endif
 #ifdef _TARGET_X86_
+                // If we are widening an int into a long using a targeted register pair we
+                // should retarget so that the low part get loaded into the appropriate register
                 else if (predictReg == PREDICT_PAIR_EAXEDX)
                 {
                     predictReg   = PREDICT_REG_EAX;
@@ -3171,6 +3175,7 @@ GENERIC_UNARY:
         case GT_NOP:
             // these unary operators do not write new values
             // and thus won't need a scratch register
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if OPT_BOOL_OPS
             if  (!op1)
@@ -3255,6 +3260,8 @@ GENERIC_UNARY:
                     // We will compute a new regMask that holds the register(s)
                     // that we will load the indirection into.
                     //
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef _TARGET_64BIT_
                     if (type == TYP_LONG)
                     {
@@ -4133,9 +4140,12 @@ HANDLE_SHIFT_COUNT:
             /* Evaluate the <else> subtree */
             // First record the post-then liveness, and reset the current liveness to the else
             // branch liveness.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
             VARSET_TP VARSET_INIT(this, postThenLive, compCurLife);
 #endif
+
             VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
 
             rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
@@ -4685,11 +4695,12 @@ HANDLE_SHIFT_COUNT:
 
                 if (promotedStructLocal != NULL)
                 {
-                    // All or a portion of this struct will be placed in the argument registers indicated by "curArgMask".
-                    // We build in knowledge of the order in which the code is generated here, so that the second arg to be evaluated
-                    // interferes with the reg for the first, the third with the regs for the first and second, etc.
-                    // But since we always place the stack slots before placing the register slots we do not add inteferences
-                    // for any part of the struct that gets passed on the stack.
+                    // All or a portion of this struct will be placed in the argument registers indicated by
+                    // "curArgMask". We build in knowledge of the order in which the code is generated here, so
+                    // that the second arg to be evaluated interferes with the reg for the first, the third with
+                    // the regs for the first and second, etc. But since we always place the stack slots before
+                    // placing the register slots we do not add inteferences for any part of the struct that gets
+                    // passed on the stack.
 
                     argPredictReg = PREDICT_NONE;  // We will target the indivual fields into registers but not the whole struct
                     regMaskTP prevArgMask = RBM_NONE;
@@ -4935,9 +4946,8 @@ HANDLE_SHIFT_COUNT:
         }
         }
 
-        
-        // Mark required registers for emitting tailcall profiler callback as used
 #if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+        // Mark required registers for emitting tailcall profiler callback as used
         if (compIsProfilerHookNeeded() &&
             tree->gtCall.IsTailCall() &&
             (tree->gtCall.gtCallType == CT_USER_FUNC))
@@ -5858,8 +5868,6 @@ ENREG_VAR:;
     noway_assert(refCntEBP == 0);
 #endif
 
-    /* Determine how the EBP register should be used */
-
 #ifdef DEBUG
     if (verbose)
     {
@@ -5878,7 +5886,11 @@ ENREG_VAR:;
     }
 #endif
 
+    /* Determine how the EBP register should be used */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if DOUBLE_ALIGN
+
     if (!codeGen->isFramePointerRequired())
     {
         noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
@@ -5947,6 +5959,8 @@ ENREG_VAR:;
             if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
             {
                 /* It's probably better to use EBP as a frame pointer */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                 if (verbose)
                     printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
@@ -5978,6 +5992,8 @@ ENREG_VAR:;
             if (refCntWtdEBP > refCntWtdStkDbl * 2)
             {
                 /* It's probably better to use EBP to enregister integer variables */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                 if (verbose)
                     printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
@@ -5985,14 +6001,15 @@ ENREG_VAR:;
                 goto NO_DOUBLE_ALIGN;
             }
 
-            /*
-               OK we passed all of the benefit tests
-               so we'll predict a double aligned frame
-            */
 #ifdef DEBUG
             if  (verbose)
                 printf("; Predicting to create a double-aligned frame\n");
 #endif
+            /*
+               OK we passed all of the benefit tests
+               so we'll predict a double aligned frame
+            */
+
             rpFrameType = FT_DOUBLE_ALIGN_FRAME;
             goto REVERSE_EBP_ENREG;
         }
@@ -6004,6 +6021,7 @@ NO_DOUBLE_ALIGN:
     if  (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
     {
 #ifdef _TARGET_XARCH_
+        // clang-format off
         /*  If we are using EBP to enregister variables then
             will we actually save bytes by setting up an EBP frame?
 
@@ -6026,6 +6044,7 @@ NO_DOUBLE_ALIGN:
         // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
         // to set up an EBP frame in the prolog and epilog
         #define EBP_FRAME_SETUP_SIZE  5
+        // clang-format on
 
         if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
         {
@@ -6040,6 +6059,7 @@ NO_DOUBLE_ALIGN:
             if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
             {
                 /* It's not be a good idea to use EBP in our predictions */
+                CLANG_FORMAT_COMMENT_ANCHOR;
 #ifdef  DEBUG
                 if (verbose && (refCntEBP > 0))
                     printf("; Predicting that it's not worth using EBP to enregister variables\n");
@@ -6692,6 +6712,8 @@ void                Compiler::raMarkStkVars()
          lclNum++  , varDsc++)
     {
         // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef LEGACY_BACKEND
         varDsc->lvOnFrame = false;
 #endif // LEGACY_BACKEND
@@ -6743,12 +6765,12 @@ void                Compiler::raMarkStkVars()
 
 #ifdef DEBUGGING_SUPPORT
 
+#ifdef DEBUG
             /* For debugging, note that we have to reserve space even for
                unused variables if they are ever in scope. However, this is not
                an issue as fgExtendDbgLifetimes() adds an initialization and
                variables in scope will not have a zero ref-cnt.
              */
-#ifdef DEBUG
             if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
             {
                 for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
@@ -6810,7 +6832,8 @@ void                Compiler::raMarkStkVars()
         noway_assert(lvaLclSize(lclNum) != 0);
 #endif // FEATURE_FIXED_OUT_ARGS
 
-        varDsc->lvOnFrame = true;  // Our prediction is that the final home for this local variable will be in the stack frame
+        varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
+                                  // stack frame
 
     NOT_STK:;
         varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
diff --git a/src/jit/register.h b/src/jit/register.h
index d5f8b44920..9e351037fd 100644
--- a/src/jit/register.h
+++ b/src/jit/register.h
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
+
 /*****************************************************************************/
 /*****************************************************************************/
 #ifndef REGDEF
@@ -118,3 +120,5 @@ REGDEF(STK,    16+XMMBASE,  0x0000,       "STK"  )
 #undef  REGALIAS
 #undef  XMMMASK
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/register_arg_convention.cpp b/src/jit/register_arg_convention.cpp
index 9ddb7008f4..429c585f8d 100644
--- a/src/jit/register_arg_convention.cpp
+++ b/src/jit/register_arg_convention.cpp
@@ -40,11 +40,12 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
 
     if (!isBackFilled)
     {
-        // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // For System V the reg type counters should be independent.
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+        // For System V the reg type counters should be independent.
         nextReg(TYP_INT, numRegs);
         nextReg(TYP_FLOAT, numRegs);
 #else
+        // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
         nextReg(type, numRegs);
 #endif
     }
@@ -96,7 +97,8 @@ unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment)
     }
 #endif // _TARGET_ARM_
 
-    assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type));  // if equal, then we aligned the last slot, and the arg can't be enregistered
+    assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type));  // if equal, then we aligned the last slot, and the
+                                                                    // arg can't be enregistered
     regArgNum(type) += cAlignSkipped;
 
     return cAlignSkipped;
diff --git a/src/jit/registerarm.h b/src/jit/registerarm.h
index 950c81462c..38b82c26f2 100644
--- a/src/jit/registerarm.h
+++ b/src/jit/registerarm.h
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
+
 /*****************************************************************************/
 /*****************************************************************************/
 #ifndef REGDEF
@@ -80,3 +82,5 @@ REGDEF(STK,  32+FPBASE, 0x0000,      "STK")
 #undef  REGDEF
 #undef  REGALIAS
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerarm64.h b/src/jit/registerarm64.h
index 4d3f448558..f53197259c 100644
--- a/src/jit/registerarm64.h
+++ b/src/jit/registerarm64.h
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
+
 /*****************************************************************************/
 /*****************************************************************************/
 #ifndef REGDEF
@@ -108,3 +110,5 @@ REGDEF(STK,   1+NBASE, 0x0000,    "STK", "STK")
 #undef  REGDEF
 #undef  REGALIAS
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerxmm.h b/src/jit/registerxmm.h
index fe73e7939c..4c34261ba8 100644
--- a/src/jit/registerxmm.h
+++ b/src/jit/registerxmm.h
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+// clang-format off
 /*****************************************************************************/
 /*****************************************************************************/
 #ifndef REGDEF
@@ -43,3 +44,5 @@ REGDEF(XMM15,   15,  XMMMASK(16), "xmm15" )
 /*****************************************************************************/
 #undef  REGDEF
 /*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp
index 480a4e6b77..9d31913450 100644
--- a/src/jit/regset.cpp
+++ b/src/jit/regset.cpp
@@ -943,7 +943,7 @@ void                RegSet::rsMarkRegPairUsed(GenTreePtr tree)
 
     /* Can't mark a register pair more than once as used */
 
-//    assert((regMask & rsMaskUsed) == 0);
+    // assert((regMask & rsMaskUsed) == 0);
 
     /* Mark the registers as 'used' */
 
@@ -1019,7 +1019,8 @@ RegSet::SpillDsc *        RegSet::rsGetSpillInfo(GenTreePtr tree,
     /* Normally, trees are unspilled in the order of being spilled due to
        the post-order walking of trees during code-gen. However, this will
        not be true for something like a GT_ARR_ELEM node */
- 
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef LEGACY_BACKEND
     SpillDsc* multi = rsSpillDesc[reg];
 #endif // LEGACY_BACKEND
@@ -1320,15 +1321,15 @@ void                RegTracker::rsTrackRegLclVar(regNumber reg, unsigned var)
 
 #endif
 
-    /* Record the new value for the register. ptr var needed for
-     * lifetime extension
-     */
-
 #ifdef  DEBUG
     if  (compiler->verbose) 
         printf("\t\t\t\t\t\t\tThe register %s now holds V%02u\n", compiler->compRegVarName(reg), var);
 #endif
 
+    /* Record the new value for the register. ptr var needed for
+     * lifetime extension
+     */
+
     rsRegValues[reg].rvdKind      = RV_LCL_VAR;
 
     // If this is a cast of a 64 bit int, then we must have the low 32 bits.
@@ -3146,8 +3147,8 @@ TempDsc * Compiler::tmpGetTemp(var_types type)
         }
     }
 
-    /* Do we need to allocate a new temp */
 #ifdef DEBUG
+    /* Do we need to allocate a new temp */
     bool isNewTemp = false;    
 #endif // DEBUG
 
diff --git a/src/jit/regset.h b/src/jit/regset.h
index a2ea3fe8a2..11a824cbe2 100644
--- a/src/jit/regset.h
+++ b/src/jit/regset.h
@@ -30,7 +30,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-
 /*****************************************************************************
 *
 *  Keep track of the current state of each register. This is intended to be
diff --git a/src/jit/sharedfloat.cpp b/src/jit/sharedfloat.cpp
index 2864c54681..16f2de2b09 100644
--- a/src/jit/sharedfloat.cpp
+++ b/src/jit/sharedfloat.cpp
@@ -250,8 +250,6 @@ void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
 
     if (bValue)
     {
-        // Mark as used
-
 #ifdef  DEBUG
         if  (m_rsCompiler->verbose)
         {
@@ -262,6 +260,7 @@ void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
         }
 #endif
 
+        // Mark as used
         assert((rsGetMaskLock() & regMask) == 0);
 
 #if FEATURE_STACK_FP_X87
@@ -285,8 +284,6 @@ void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
     }
     else
     {
-        // Mark as free
-
 #ifdef DEBUG
         if  (m_rsCompiler->verbose)
         {
@@ -297,6 +294,7 @@ void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
         }
 #endif
 
+        // Mark as free
         assert((rsGetMaskUsed() & regMask) == regMask);
 
         // Are we freeing a multi-use registers?
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 78b5439c1c..cb8dc90f23 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -1166,7 +1166,8 @@ GenTreePtr  Compiler::impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd,
 
     // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc)
     // Select(op1, op2, op3)        = (op2 & op1) | (op3 & !op1)
-    //                              = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1), SIMDIntrinsicBitwiseAndNot(op3, op1))
+    //                              = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1),
+    //                                                       SIMDIntrinsicBitwiseAndNot(op3, op1))
     //
     // If Op1 has side effect, create an assignment to a temp
     GenTree* tmp = op1;
@@ -1577,9 +1578,10 @@ bool Compiler::areArgumentsContiguous(GenTreePtr op1, GenTreePtr op2)
 //      return the address node.
 //
 // TODO-CQ: 
-//      1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field are located contiguously or not.
-//      In future we should support more cases.
-//      2.Though it happens to just work fine front-end phases are not aware of GT_LEA node.  Therefore, convert these to use GT_ADDR .   
+//      1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field
+//         are located contiguously or not. In future we should support more cases.
+//      2. Though it happens to just work fine front-end phases are not aware of GT_LEA node.  Therefore, convert these
+//         to use GT_ADDR.
 GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize)
 {
     assert(tree->OperGet() == GT_FIELD || tree->OperGet() == GT_INDEX);
@@ -1600,11 +1602,12 @@ GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simd
             // so that this sturct won't be promoted.
             // e.g. s.x x is a field, and s is a struct, then we should set the s's lvUsedInSIMDIntrinsic as true.
             // so that s won't be promoted.
-            // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that s1 can be promoted, so that s2 can be promoted.
-            // The reason for that is if we don't allow s1 to be promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
+            // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that
+            // s1 can be promoted, so that s2 can be promoted. The reason for that is if we don't allow s1 to be
+            // promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
             // TODO-CQ:
-            //  In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's address is used for 
-            //  initializing the vector, then s1 can be promoted but s2 can't. 
+            //  In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's
+            //  address is used for initializing the vector, then s1 can be promoted but s2 can't. 
             if(varTypeIsSIMD(obj) && obj->OperIsLocal())
             {
                 setLclRelatedToSIMDIntrinsic(obj);
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index f6218bdee2..2c22d48267 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -476,9 +476,9 @@ CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg
                 instruction ins = ins_Store(type);
                 if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
                 {
-                    // In general, when we use a three-operands move instruction, we want to merge the src with itself.
-                    // This is an exception in that we actually want the "merge" behavior, so we must specify it with
-                    // all 3 operands.
+                    // In general, when we use a three-operands move instruction, we want to merge the src with
+                    // itself. This is an exception in that we actually want the "merge" behavior, so we must
+                    // specify it with all 3 operands.
                     inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
                 }
                 else
@@ -1326,14 +1326,16 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
         if (baseType == TYP_FLOAT)
         {
             // v0 = v1 * v2
-            // tmp = v0                                       // v0  = (3, 2, 1, 0) - each element is given by its position
+            // tmp = v0                                       // v0  = (3, 2, 1, 0) - each element is given by its
+            //                                                // position
             // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1))      // tmp = (2, 3, 0, 1)
             // v0 = v0 + tmp                                  // v0  = (3+2, 2+3, 1+0, 0+1)
             // tmp = v0                                       
             // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3))      // tmp = (0+1, 1+0, 2+3, 3+2)
             // v0 = v0 + tmp                                  // v0  = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
             //                                                // Essentially horizontal addtion of all elements.
-            //                                                // We could achieve the same using SSEv3 instruction HADDPS.
+            //                                                // We could achieve the same using SSEv3 instruction
+            //                                                // HADDPS.
             //
             inst_RV_RV(INS_mulps, targetReg, op2Reg);
             inst_RV_RV(INS_movaps, tmpReg, targetReg);
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
index bfc9182780..a44fb9d0a1 100644
--- a/src/jit/simdintrinsiclist.h
+++ b/src/jit/simdintrinsiclist.h
@@ -8,6 +8,7 @@
 #endif
 /*****************************************************************************/
 
+// clang-format off
 #ifdef FEATURE_SIMD
 
     /*
@@ -141,3 +142,4 @@ SIMD_INTRINSIC(nullptr,                     false,       Invalid,
 #endif //!_TARGET_AMD64_
 
 #endif //FEATURE_SIMD
+// clang-format on
diff --git a/src/jit/smcommon.cpp b/src/jit/smcommon.cpp
index 43eadecf2c..da5f271fc9 100644
--- a/src/jit/smcommon.cpp
+++ b/src/jit/smcommon.cpp
@@ -21,9 +21,9 @@ const char * const  smOpcodeNames[] =
 
 const SM_OPCODE s_CodeSeqs[][MAX_CODE_SEQUENCE_LENGTH] =
 { 
-    // ==== Single opcode states ==== 
 
 #define SMOPDEF(smname,string) {smname, CODE_SEQUENCE_END},
+// ==== Single opcode states ==== 
 #include "smopcode.def"
 #undef SMOPDEF    
     
@@ -68,8 +68,9 @@ const SM_OPCODE s_CodeSeqs[][MAX_CODE_SEQUENCE_LENGTH] =
     {SM_CONV_R4,       SM_MUL,        CODE_SEQUENCE_END},    
     {SM_CONV_R4,       SM_DIV,        CODE_SEQUENCE_END},    
 
-    // {SM_CONV_R8,       SM_ADD,        CODE_SEQUENCE_END},  // Removed since it collides with ldelem.r8 in Math.InternalRound
-    // {SM_CONV_R8,       SM_SUB,        CODE_SEQUENCE_END},  // Just remove the SM_SUB as well.  
+    // {SM_CONV_R8,       SM_ADD,        CODE_SEQUENCE_END},  // Removed since it collides with ldelem.r8 in
+                                                              // Math.InternalRound
+    // {SM_CONV_R8,       SM_SUB,        CODE_SEQUENCE_END},  // Just remove the SM_SUB as well.
     {SM_CONV_R8,       SM_MUL,        CODE_SEQUENCE_END},    
     {SM_CONV_R8,       SM_DIV,        CODE_SEQUENCE_END}, 
 
diff --git a/src/jit/smdata.cpp b/src/jit/smdata.cpp
index 4774ce0673..8cf86e59c1 100644
--- a/src/jit/smdata.cpp
+++ b/src/jit/smdata.cpp
@@ -13,6 +13,7 @@
 //
 // States in the state machine
 //
+// clang-format off
 const SMState g_SMStates[] = 
 {
  // {term, len, lng, prev, SMOpcode and SMOpcodeName           , offsets  }           //  state ID and name
@@ -267,6 +268,7 @@ const SMState g_SMStates[] =
     {   1,   5, 195,  247, (SM_OPCODE) 74 /* add             */,       0  },          //  state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
     {   1,   5, 195,  247, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
 };
+// clang-format on
 
 static_assert_no_msg(NUM_SM_STATES == sizeof(g_SMStates)/sizeof(g_SMStates[0]));
 
@@ -275,6 +277,7 @@ const SMState * gp_SMStates = g_SMStates;
 //
 // JumpTableCells in the state machine
 //
+// clang-format off
 const JumpTableCell g_SMJumpTableCells[] = 
 {
  // {src, dest  }
@@ -697,6 +700,7 @@ const JumpTableCell g_SMJumpTableCells[] =
     {  0,    0  },   // cell# 416
     {  0,    0  },   // cell# 417
 };
+// clang-format on
 
 const JumpTableCell * gp_SMJumpTableCells = g_SMJumpTableCells;
 
diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp
index d546a8160a..9d7762ee8f 100644
--- a/src/jit/ssabuilder.cpp
+++ b/src/jit/ssabuilder.cpp
@@ -447,7 +447,8 @@ void SsaBuilder::ComputeDominators(BasicBlock** postOrder, int count, BlkToBlkSe
 
 #ifdef SSA_FEATURE_DOMARR
     // Allocate space for constant time computation of (a DOM b?) query.
-    unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so add 1.
+    unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so
+                                                      // add 1.
     m_pDomPreOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
     m_pDomPostOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
 
@@ -748,17 +749,17 @@ void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count)
                 // Check if we've already inserted a phi node.
                 if (GetPhiNode(bbInDomFront, lclNum) == NULL)
                 {
-                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of j.
-                    // So insert a phi node at l.
+                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+                    // j. So insert a phi node at l.
                     JITDUMP("Inserting phi definition for V%02u at start of BB%02u.\n", lclNum, bbInDomFront->bbNum);
 
                     GenTreePtr phiLhs  = m_pCompiler->gtNewLclvNode(lclNum, m_pCompiler->lvaTable[lclNum].TypeGet());
 
-                    // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG nodes.
-                    // However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
-                    // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in SsaBuilder::AssignPhiNodeRhsVariables()
-                    // and in SsaBuilder::AddDefToHandlerPhis()
-                    //
+                    // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG
+                    // nodes. However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
+                    // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in
+                    // SsaBuilder::AssignPhiNodeRhsVariables() and in SsaBuilder::AddDefToHandlerPhis()
+
                     GenTreePtr phiRhs = m_pCompiler->gtNewOperNode(GT_PHI, m_pCompiler->lvaTable[lclNum].TypeGet(), nullptr);
 
                     GenTreePtr phiAsg = m_pCompiler->gtNewAssignNode(phiLhs, phiRhs);
@@ -786,8 +787,8 @@ void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count)
                 // Check if we've already inserted a phi node.
                 if (bbInDomFront->bbHeapSsaPhiFunc == NULL)
                 {
-                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of j.
-                    // So insert a phi node at l.
+                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+                    // j. So insert a phi node at l.
                     JITDUMP("Inserting phi definition for Heap at start of BB%02u.\n", bbInDomFront->bbNum);
                     bbInDomFront->bbHeapSsaPhiFunc = BasicBlock::EmptyHeapPhiDef;
                 }
@@ -1528,8 +1529,8 @@ void SsaBuilder::RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenam
 
         if (!blockWrk.m_processed)
         {
-            // Push the block back on the stack with "m_processed" true, to record the fact that when its children have been
-            // (recursively) processed, we still need to call BlockPopStacks on it.
+            // Push the block back on the stack with "m_processed" true, to record the fact that when its children have
+            // been (recursively) processed, we still need to call BlockPopStacks on it.
             blocksToDo->push_back(BlockWork(block, true));
 
             // Walk the block give counts to DEFs and give top of stack count for USEs.
diff --git a/src/jit/ssabuilder.h b/src/jit/ssabuilder.h
index 6304d1b06b..cfffe58208 100644
--- a/src/jit/ssabuilder.h
+++ b/src/jit/ssabuilder.h
@@ -132,8 +132,8 @@ private:
     // iterated dominance frontiers.  (Recall that the dominance frontier of a block B is the set of blocks
     // B3 such that there exists some B2 s.t. B3 is a successor of B2, and B dominates B2.  Note that this dominance
     // need not be strict -- B2 and B may be the same node.  The iterated dominance frontier is formed by a closure
-    // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance frontier
-    // of all elements of the set.)
+    // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance
+    // frontier of all elements of the set.)
     BlkToBlkSetMap* ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count);
 
     // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
@@ -157,9 +157,9 @@ private:
     // Requires "pRenameState" to be non-NULL and be currently used for variables renaming.
     void BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState);
 
-    // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers. Requires "pRenameState"
-    // to be non-NULL and be currently used for variables renaming.  Assumes that "isPhiDefn" implies that any definition occurring within "tree"
-    // is a phi definition.
+    // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers.
+    // Requires "pRenameState" to be non-NULL and be currently used for variables renaming.  Assumes that "isPhiDefn"
+    // implies that any definition occurring within "tree" is a phi definition.
     void TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn);
 
     // Assumes that "block" contains a definition for local var "lclNum", with SSA number "count".
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
index 060544c607..a1788b7a02 100644
--- a/src/jit/stackfp.cpp
+++ b/src/jit/stackfp.cpp
@@ -2953,11 +2953,11 @@ BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBloc
     assert(compiler->compMayHaveTransitionBlocks);
     assert(compiler->compHndBBtabCount == 0);
     
-    // Create a temp block
     #ifdef DEBUG
     compiler->fgSafeBasicBlockCreation = true;
     #endif 
     
+    // Create a temp block
     BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
 
     #ifdef DEBUG
diff --git a/src/jit/target.h b/src/jit/target.h
index 984cf1b0ca..71de326dc2 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -330,6 +330,7 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
 
 /*****************************************************************************/
 
+// clang-format off
 #if defined(_TARGET_X86_)
 
   #define CPU_LOAD_STORE_ARCH      0
@@ -365,7 +366,6 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
   #define CPOBJ_NONGC_SLOTS_LIMIT  4       // For CpObj code generation, this is the the threshold of the number 
                                            // of contiguous non-gc slots that trigger generating rep movsq instead of 
                                            // sequences of movsq instructions
-
                                            // The way we're currently disabling rep movs/stos is by setting a limit less than
                                            // its unrolling counterparts.  When lower takes the decision on which one to make it
                                            // always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
@@ -380,7 +380,8 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
   #define FEATURE_MULTIREG_STRUCT_PROMOTE  0  // True when we want to promote fields of a multireg struct into registers
   #define FEATURE_FASTTAILCALL     0       // Tail calls made as epilog+jmp
   #define FEATURE_TAILCALL_OPT     0       // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls.
-  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when
+                                           // the flags need to be set
 #ifdef LEGACY_BACKEND
   #define FEATURE_MULTIREG_ARGS_OR_RET  0  // Support for passing and/or returning single values in more than one register
   #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
@@ -399,23 +400,30 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
   #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
 
 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
-  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the
+                                           // RBM_CALLEE_TRASH registers
 #else
-  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using ASM barriers we definitely don't have NOGC barriers).
+  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using
+                                           // ASM barriers we definitely don't have NOGC barriers).
 #endif
   #define USER_ARGS_COME_LAST      0
   #define EMIT_TRACK_STACK_DEPTH   1
-  #define TARGET_POINTER_SIZE      4       // equal to sizeof(void*) and the managed pointer size in bytes for this target
-  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define TARGET_POINTER_SIZE      4       // equal to sizeof(void*) and the managed pointer size in bytes for this
+                                           // target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
+                                           // filter-handler, fault) and directly execute 'finally' clauses.
   #define FEATURE_EH_FUNCLETS      0
-  #define FEATURE_EH_CALLFINALLY_THUNKS 0  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define FEATURE_EH_CALLFINALLY_THUNKS 0  // Generate call-to-finally code in "thunks" in the enclosing EH region,
+                                           // protected by "cloned finally" clauses.
 #ifndef LEGACY_BACKEND
   #define FEATURE_STACK_FP_X87     0
 #else // LEGACY_BACKEND
   #define FEATURE_STACK_FP_X87     1       // Use flat register file model    
 #endif // LEGACY_BACKEND
-  #define FEATURE_X87_DOUBLES      0       // FP tree temps always use x87 doubles (when 1) or can be double or float (when 0).
-  #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+  #define FEATURE_X87_DOUBLES      0       // FP tree temps always use x87 doubles (when 1) or can be double or float
+                                           // (when 0).
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based
+                                           // frames for most methods
   #define CSE_CONSTS               1       // Enable if we want to CSE constants
 
 #ifndef LEGACY_BACKEND
@@ -590,7 +598,8 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
   // Registers used by PInvoke frame setup
   #define REG_PINVOKE_FRAME        REG_EDI      // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_FRAME        RBM_EDI
-  #define REG_PINVOKE_TCB          REG_ESI      // ESI is set to Thread Control Block (TCB) on return from CORINFO_HELP_INIT_PINVOKE_FRAME helper
+  #define REG_PINVOKE_TCB          REG_ESI      // ESI is set to Thread Control Block (TCB) on return from
+                                                // CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_TCB          RBM_ESI
   #define REG_PINVOKE_SCRATCH      REG_EAX      // EAX is trashed by CORINFO_HELP_INIT_PINVOKE_FRAME helper
   #define RBM_PINVOKE_SCRATCH      RBM_EAX
@@ -1827,6 +1836,7 @@ C_ASSERT((FEATURE_TAILCALL_OPT == 0) || (FEATURE_FASTTAILCALL == 1));
   #define RBM_NON_BYTE_REGS        RBM_NONE
   #define RBM_BYTE_REG_FLAG        RBM_NONE
 #endif
+// clang-format on
 
 /*****************************************************************************/
 class Target
diff --git a/src/jit/typelist.h b/src/jit/typelist.h
index 5e46b78040..7722dcb1b4 100644
--- a/src/jit/typelist.h
+++ b/src/jit/typelist.h
@@ -30,6 +30,7 @@
 DEF_TP(tn      ,nm        , jitType,     verType, sz,sze,asze, st,al, tf,            howUsed     )
 */
 
+// clang-format off
 DEF_TP(UNDEF   ,"<UNDEF>" , TYP_UNDEF,   TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
 DEF_TP(VOID    ,"void"    , TYP_VOID,    TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
 
@@ -70,6 +71,7 @@ DEF_TP(SIMD32   ,"simd32" , TYP_SIMD32,  TI_STRUCT,32,32, 32,   8,16, VTF_S,
 #endif // FEATURE_SIMD
 
 DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN,  TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
+// clang-format on
 
 #undef  GCS
 #undef  BRS
diff --git a/src/jit/unwind.h b/src/jit/unwind.h
index 0327e97c24..c773193a96 100644
--- a/src/jit/unwind.h
+++ b/src/jit/unwind.h
@@ -25,7 +25,8 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 40;
 #define UWC_END 0xFF            // "end" unwind code
 #define UW_MAX_FRAGMENT_SIZE_BYTES          (1U << 19)
 #define UW_MAX_CODE_WORDS_COUNT             15      // Max number that can be encoded in the "Code Words" field of the .pdata record
-#define UW_MAX_EPILOG_START_INDEX           0xFFU   // Max number that can be encoded in the "Epilog Start Index" field of the .pdata record
+#define UW_MAX_EPILOG_START_INDEX           0xFFU   // Max number that can be encoded in the "Epilog Start Index" field
+                                                    // of the .pdata record
 #elif defined(_TARGET_ARM64_)
 const unsigned MAX_PROLOG_SIZE_BYTES = 100;
 const unsigned MAX_EPILOG_SIZE_BYTES = 100;
@@ -36,10 +37,14 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 100;
 #define UW_MAX_EPILOG_START_INDEX           0x3FFU
 #endif // _TARGET_ARM64_
 
-#define UW_MAX_EPILOG_COUNT                 31      // Max number that can be encoded in the "Epilog count" field of the .pdata record
-#define UW_MAX_EXTENDED_CODE_WORDS_COUNT    0xFFU       // Max number that can be encoded in the "Extended Code Words" field of the .pdata record
-#define UW_MAX_EXTENDED_EPILOG_COUNT        0xFFFFU     // Max number that can be encoded in the "Extended Epilog Count" field of the .pdata record
-#define UW_MAX_EPILOG_START_OFFSET          0x3FFFFU    // Max number that can be encoded in the "Epilog Start Offset" field of the .pdata record
+#define UW_MAX_EPILOG_COUNT                 31          // Max number that can be encoded in the "Epilog count" field
+                                                        // of the .pdata record
+#define UW_MAX_EXTENDED_CODE_WORDS_COUNT    0xFFU       // Max number that can be encoded in the "Extended Code Words"
+                                                        // field of the .pdata record
+#define UW_MAX_EXTENDED_EPILOG_COUNT        0xFFFFU     // Max number that can be encoded in the "Extended Epilog Count"
+                                                        // field of the .pdata record
+#define UW_MAX_EPILOG_START_OFFSET          0x3FFFFU    // Max number that can be encoded in the "Epilog Start Offset"
+                                                        // field of the .pdata record
 
 //
 // Forward declaration of class defined in emit.h
@@ -692,7 +697,8 @@ private:
     // set of epilogs, for this function/funclet.
     bool                ufiInProlog;
 
-    static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0;     // Something unlikely to be the fill pattern for uninitialized memory
+    static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0;     // Something unlikely to be the fill pattern for
+                                                                    // uninitialized memory
     unsigned            ufiInitialized;
 
 #endif // DEBUG
@@ -816,7 +822,8 @@ private:
 
 #ifdef DEBUG
 
-    static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1;     // Something unlikely to be the fill pattern for uninitialized memory
+    static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1;     // Something unlikely to be the fill pattern for
+                                                                    // uninitialized memory
     unsigned            uwiInitialized;
 
 #endif // DEBUG
diff --git a/src/jit/unwindarm.cpp b/src/jit/unwindarm.cpp
index 29cbdcfa0e..1dcfd06f6b 100644
--- a/src/jit/unwindarm.cpp
+++ b/src/jit/unwindarm.cpp
@@ -590,9 +590,11 @@ void            UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes
         // The prolog codes that are already at the end of the array need to get moved to the middle,
         // with space for the non-matching epilog codes to follow.
 
+
         memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes), &upcMem[upcCodeSlot], prologBytes);
 
         // Note that the three UWC_END padding bytes still exist at the end of the array.
+        CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
         // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
@@ -925,7 +927,8 @@ void            UnwindFragmentInfo::FinalizeOffset()
 {
     if (ufiEmitLoc == NULL)
     {
-        ufiStartOffset = 0;    // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        ufiStartOffset = 0;
     }
     else
     {
@@ -1071,7 +1074,8 @@ void            UnwindFragmentInfo::MergeCodes()
     assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
 
     unsigned epilogCount = 0;
-    unsigned epilogCodeBytes = 0;   // The total number of unwind code bytes used by epilogs that don't match the prolog codes
+    unsigned epilogCodeBytes = 0;   // The total number of unwind code bytes used by epilogs that don't match the
+                                    // prolog codes
     unsigned epilogIndex = ufiPrologCodes.Size();   // The "Epilog Start Index" for the next non-matching epilog codes
     UnwindEpilogInfo* pEpi;
 
@@ -1157,7 +1161,8 @@ void            UnwindFragmentInfo::MergeCodes()
 
     DWORD finalSize =
         headerBytes
-        + codeBytes;                                    // Size of actual unwind codes, aligned up to 4-byte words, including end padding if necessary
+        + codeBytes;                                    // Size of actual unwind codes, aligned up to 4-byte words,
+                                                        // including end padding if necessary
 
     // Construct the final unwind information.
 
@@ -1389,7 +1394,8 @@ void            UnwindFragmentInfo::Reserve(BOOL isFunclet, bool isHotCode)
 //      funKind:       funclet kind
 //      pHotCode:      hot section code buffer
 //      pColdCode:     cold section code buffer
-//      funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a function/funclet.
+//      funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+//                     function/funclet.
 //      isHotCode:     are we allocating the unwind info for the hot code section?
 
 void            UnwindFragmentInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
@@ -1618,7 +1624,8 @@ void            UnwindInfo::Split()
 
     if (uwiFragmentLast->ufiEmitLoc == NULL)
     {
-        startOffset = 0;    // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        startOffset = 0;
     }
     else
     {
@@ -1662,6 +1669,7 @@ void            UnwindInfo::Split()
     // the actual offsets of the splits since we haven't issued the instructions yet, so store
     // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
     // like we do for the function length and epilog offsets.
+    CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef DEBUG
     if (uwiComp->verbose)
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
index e55436246f..e3eb978a6d 100644
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -37,6 +37,7 @@ const char *                    Target::g_tgtPlatformName = "Windows";
 
 #define DECLARE_DATA
 
+// clang-format off
 extern
 const signed char       opcodeSizes[] =
 {
@@ -85,7 +86,7 @@ const signed char       opcodeSizes[] =
     #undef InlineSwitch_size
     #undef InlinePhi_size
 };
-
+// clang-format on
 
 const BYTE          varTypeClassification[] =
 {
@@ -334,6 +335,8 @@ void                dspRegMask(regMaskTP regMask, size_t minSiz)
                 sep = " ";
 
                 // What kind of separator should we use for this range (if it is indeed going to be a range)?
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
 #if defined(_TARGET_AMD64_)
                 // For AMD64, create ranges for int registers R8 through R15, but not the "old" registers.
                 if (regNum >= REG_R8)
@@ -364,12 +367,14 @@ void                dspRegMask(regMaskTP regMask, size_t minSiz)
 #error Unsupported or unset target architecture
 #endif // _TARGET_*
             }
-            // We've already printed a register. Is this the end of a range?
+
 #if defined(_TARGET_ARM64_)
+            // We've already printed a register. Is this the end of a range?
             else if ((regNum == REG_INT_LAST)
                      || (regNum == REG_R17) // last register before TEB
                      || (regNum == REG_R28)) // last register before FP
 #else // _TARGET_ARM64_
+            // We've already printed a register. Is this the end of a range?
             else if (regNum == REG_INT_LAST)
 #endif // _TARGET_ARM64_
             {
@@ -513,7 +518,8 @@ dumpSingleInstr(const BYTE* const codeAddr, IL_OFFSET offs, const char* prefix)
 {
     const BYTE  *        opcodePtr = codeAddr + offs;
     const BYTE  *   startOpcodePtr = opcodePtr;
-    const unsigned ALIGN_WIDTH = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for most things
+    const unsigned ALIGN_WIDTH = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for
+                                        // most things
 
     if (prefix != NULL)
         printf("%s", prefix);
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index 87982a3bd7..9165fcac20 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -1227,8 +1227,8 @@ ValueNum ValueNumStore::VNForMapSelectWork(ValueNumKind vnk,
                                            unsigned* pBudget,
                                            bool* pUsedRecursiveVN)
 {
-    // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
 TailCall:
+    // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
     assert(arg0VN != NoVN && arg1VN != NoVN);
     assert(arg0VN == VNNormVal(arg0VN));  // Arguments carry no exceptions.
     assert(arg1VN == VNNormVal(arg1VN));  // Arguments carry no exceptions.
@@ -2115,6 +2115,8 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
     assert(VNFuncArity(func) == 3);
 
     // Function arguments carry no exceptions.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
     if (func != VNF_PhiDef)
     {
@@ -4662,8 +4664,8 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
 
             unsigned lclNum = lclVarTree->GetLclNum();
 
-            // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have SSA names
-            // in which to store VN's on defs.  We'll yield unique VN's when we read from them.
+            // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have
+            // SSA names in which to store VN's on defs.  We'll yield unique VN's when we read from them.
             if (!fgExcludeFromSsa(lclNum))
             {
                 unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
@@ -4695,8 +4697,8 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
                 }
 #endif // DEBUG
             }
-            // Initblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we want to be
-            // able to give VN's to.
+            // Initblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we
+            // want to be able to give VN's to.
             tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
         }
         else
@@ -4859,8 +4861,8 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
             // TODO-CQ: Why not be complete, and get this case right?
             fgMutateHeap(tree DEBUGARG("COPYBLK - non local"));
         }
-        // Copyblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we want to be
-        // able to give VN's to.
+        // Copyblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we want
+        // to be able to give VN's to.
         tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
     }
 }
@@ -4952,9 +4954,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
                         // We don't want to fabricate arbitrary value numbers to things we can't reason about.
                         // So far, we know about two of these cases:
                         // Case 1) We have a local var who has never been defined but it's seen as a use.
-                        //         This is the case of storeIndir(addr(lclvar)) = expr.  In this case since we only take the 
-                        //         address of the variable, this doesn't mean it's a use nor we have to initialize it, so in this
-                        //         very rare case, we fabricate a value number.
+                        //         This is the case of storeIndir(addr(lclvar)) = expr.  In this case since we only
+                        //         take the address of the variable, this doesn't mean it's a use nor we have to
+                        //         initialize it, so in this very rare case, we fabricate a value number.
                         // Case 2) Local variables that represent structs which are assigned using CpBlk.
                         GenTree* nextNode = lcl->gtNext;
                         assert((nextNode->gtOper == GT_ADDR && nextNode->gtOp.gtOp1 == lcl) ||
@@ -4964,10 +4966,11 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
                     assert(lcl->gtVNPair.BothDefined());
                 }
 
-                // TODO-Review: For the short term, we have a workaround for copyblk/initblk.  Those that use addrSpillTemp will have a statement like
-                // "addrSpillTemp = addr(local)."  If we previously decided that this block operation defines the local, we will have
-                // labeled the "local" node as a DEF (or USEDEF).  This flag propogates to the "local" on the RHS.  So we'll assume that
-                // this is correct, and treat it as a def (to a new, unique VN).
+                // TODO-Review: For the short term, we have a workaround for copyblk/initblk.  Those that use
+                // addrSpillTemp will have a statement like "addrSpillTemp = addr(local)."  If we previously decided
+                // that this block operation defines the local, we will have labeled the "local" node as a DEF
+                // (or USEDEF).  This flag propogates to the "local" on the RHS.  So we'll assume that this is correct,
+                // and treat it as a def (to a new, unique VN).
                 else if ((lcl->gtFlags & GTF_VAR_DEF) != 0)
                 {
                     LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
@@ -5056,8 +5059,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
                 //   2: volatile read s;
                 //   3: read s;
                 //
-                // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated in between them)... 
-                // but we *should* be able to prove that the values read in 2 and 3 are the same.  
+                // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated
+                // in between them)... but we *should* be able to prove that the values read in 2 and 3 are the same.
                 //
 
                 ValueNumPair clsVarVNPair;
@@ -5372,10 +5375,11 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
                             {
                                 FieldSeqNode* fieldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[1]);
 
-                                // Either "arg" is the address of (part of) a local itself, or the assignment is an "indirect assignment",
-                                // where an outer comma expression assigned the address of a local to a temp, and that temp is our lhs, and
-                                // we recorded this in a table when we made the indirect assignment...or else we have a "rogue" PtrToLoc, one
-                                // that should have made the local in question address-exposed.  Assert on that.
+                                // Either "arg" is the address of (part of) a local itself, or the assignment is an
+                                // "indirect assignment", where an outer comma expression assigned the address of a
+                                // local to a temp, and that temp is our lhs, and we recorded this in a table when we
+                                // made the indirect assignment...or else we have a "rogue" PtrToLoc, one that should
+                                // have made the local in question address-exposed.  Assert on that.
                                 GenTreeLclVarCommon* lclVarTree = NULL;
                                 bool                 isEntire   = false;
                                 unsigned             lclDefSsaNum = SsaConfig::RESERVED_SSA_NUM;
@@ -5924,6 +5928,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
                     else if (fldSeq2 != nullptr)
                     {
                         // Get the first (instance or static) field from field seq.  Heap[field] will yield the "field map".
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifdef DEBUG
                         CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq2->m_fieldHnd);
                         if (obj != nullptr)
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
index abc97c4cef..49eb845694 100644
--- a/src/jit/valuenum.h
+++ b/src/jit/valuenum.h
@@ -170,8 +170,8 @@ private:
     template<typename T>
     static T EvalOp(VNFunc vnf, T v0);
 
-    // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and returns (T)0.
-    // Otherwise, returns vnf(v0, v1).
+    // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and
+    // returns (T)0. Otherwise, returns vnf(v0, v1).
     template<typename T>
     T EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
 
@@ -218,8 +218,8 @@ private:
     unsigned m_numMapSels;
 #endif
 
-    // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level MapSelect
-    // application.
+    // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level
+    // MapSelect application.
     unsigned m_mapSelectBudget;
 
 public:
@@ -440,7 +440,7 @@ public:
     // Get a new, unique value number for an expression that we're not equating to some function.
     ValueNum VNForExpr(var_types typ = TYP_UNKNOWN);
 
-    // This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
+// This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
 #define FEATURE_VN_TRACE_APPLY_SELECTORS 1
 
     // Return the value number corresponding to constructing "MapSelect(map, f0)", where "f0" is the
@@ -502,11 +502,11 @@ public:
     // When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount.
     ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN);
 
-    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer with
-    // a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
+    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer
+    // with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
     ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB);
-    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the pointer form
-    // extended with "fieldSeq"; or else NoVN.
+    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the
+    // pointer form extended with "fieldSeq"; or else NoVN.
     ValueNum ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fieldSeq);
 
     // Queries on value numbers.
@@ -760,7 +760,7 @@ public:
     bool VNIsValid(ValueNum vn);
 
 #ifdef DEBUG
-    // This controls whether we recursively call vnDump on function arguments.
+// This controls whether we recursively call vnDump on function arguments.
 #define FEATURE_VN_DUMP_FUNC_ARGS 0
 
     // Prints, to standard out, a representation of "vn".
@@ -792,10 +792,11 @@ public:
     static bool        isReservedVN(ValueNum);
 
 #define VALUENUM_SUPPORT_MERGE 0
+#if VALUENUM_SUPPORT_MERGE
     // If we're going to support the Merge operation, and do it right, we really need to use an entire
     // egraph data structure, so that we can do congruence closure, and discover congruences implied
     // by the eq-class merge.
-#if VALUENUM_SUPPORT_MERGE
+
     // It may be that we provisionally give two expressions distinct value numbers, then later discover
     // that the values of the expressions are provably equal.  We allow the two value numbers to be
     // "merged" -- after the merge, they represent the same abstract value.
@@ -848,8 +849,9 @@ private:
     // "m_typ" and "m_attribs".  These properties determine the interpretation of "m_defs", as discussed below.
     struct Chunk
     {
-        // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other members.
-        // The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the next one to allocate).
+        // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other
+        // members. The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the
+        // next one to allocate).
         void*    m_defs;
         unsigned m_numUsed;
 
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
index 8d71fb166a..c374f421ae 100644
--- a/src/jit/valuenumfuncs.h
+++ b/src/jit/valuenumfuncs.h
@@ -6,6 +6,7 @@
 // Defines the functions understood by the value-numbering system.
 // ValueNumFuncDef(<name of function>, <arity (1-4)>, <is-commutative (for arity = 2)>, <non-null (for gc functions)>, <is-shared-static>)
 
+// clang-format off
 ValueNumFuncDef(MapStore, 3, false, false, false)
 ValueNumFuncDef(MapSelect, 2, false, false, false)
 
@@ -135,7 +136,7 @@ ValueNumFuncDef(MOD_UN, 2, false, false, false)
 ValueNumFuncDef(StrCns, 2, false, true, false)
 
 ValueNumFuncDef(Unbox, 2, false, true, false)
-
+// clang-format on
 
 
 #undef ValueNumFuncDef
diff --git a/src/jit/varset.h b/src/jit/varset.h
index 90681de9ac..a84dc166ad 100644
--- a/src/jit/varset.h
+++ b/src/jit/varset.h
@@ -175,9 +175,8 @@ const unsigned lclMAX_ALLSET_TRACKED = UInt64Bits;
 typedef   AllVarSetOps::ValArgType ALLVARSET_VALARG_TP;
 typedef   AllVarSetOps::RetValType ALLVARSET_VALRET_TP;
 
-
-// Initialize "varName" to "initVal."  Copies contents, not references; if "varName" is uninitialized, allocates a var set
-// for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
+// Initialize "varName" to "initVal."  Copies contents, not references; if "varName" is uninitialized, allocates a var
+// set for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
 #define VARSET_INIT(comp, varName, initVal) varName(VarSetOps::MakeCopy(comp, initVal))
 #define ALLVARSET_INIT(comp, varName, initVal) varName(AllVarSetOps::MakeCopy(comp, initVal))
author	Michelle McDaniel <adiaaida@gmail.com>	2016-06-24 14:10:52 -0700
committer	Michelle McDaniel <adiaaida@gmail.com>	2016-07-29 09:16:39 -0700
commit	a1fa8c14cae981369dccf2f361b01959b215ad8f (patch)
tree	cfbd2c5020bacd9422a64338f513350057e3cae0
parent	a812669c5737a336745f42c099a1a8a6e1aafa4f (diff)
download	coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.tar.gz coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.tar.bz2 coreclr-a1fa8c14cae981369dccf2f361b01959b215ad8f.zip