48 files changed, 59767 insertions, 0 deletions
diff --git a/src/gc/.gitmirrorall b/src/gc/.gitmirrorall
new file mode 100644
index 0000000000..9ee5c57b99
--- /dev/null
+++ b/src/gc/.gitmirrorall
@@ -0,0 +1 @@
+This folder will be mirrored by the Git-TFS Mirror recursively.
+\ No newline at end of file
diff --git a/src/gc/CMakeLists.txt b/src/gc/CMakeLists.txt
new file mode 100644
index 0000000000..61e1ced727
--- /dev/null
+++ b/src/gc/CMakeLists.txt
@@ -0,0 +1,45 @@
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) 
+include_directories(BEFORE ${CLR_DIR}/src/vm)
+include_directories(BEFORE ${CLR_DIR}/src/vm/${ARCH_SOURCES_DIR})
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+  add_compile_options(-fPIC)
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+if(CMAKE_CONFIGURATION_TYPES)
+  foreach (Config DEBUG CHECKED)  
+     set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS $<$<CONFIG:${Config}>:WRITE_BARRIER_CHECK=1>)
+  endforeach (Config)
+else()
+  if(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL DEBUG OR UPPERCASE_CMAKE_BUILD_TYPE STREQUAL CHECKED)
+    add_definitions(-DWRITE_BARRIER_CHECK=1)
+  endif(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL DEBUG OR UPPERCASE_CMAKE_BUILD_TYPE STREQUAL CHECKED)
+endif(CMAKE_CONFIGURATION_TYPES)
+
+set( GC_SOURCES_DAC_AND_WKS_COMMON
+  gccommon.cpp
+  gcscan.cpp
+  gcsvr.cpp
+  gcwks.cpp
+  handletable.cpp
+  handletablecore.cpp
+  handletablescan.cpp
+  objecthandle.cpp
+  softwarewritewatch.cpp)
+
+set( GC_SOURCES_WKS
+  ${GC_SOURCES_DAC_AND_WKS_COMMON}
+  gceesvr.cpp
+  gceewks.cpp
+  handletablecache.cpp)
+
+set( GC_SOURCES_DAC
+  ${GC_SOURCES_DAC_AND_WKS_COMMON})
+
+convert_to_absolute_path(GC_SOURCES_WKS ${GC_SOURCES_WKS})
+convert_to_absolute_path(GC_SOURCES_DAC ${GC_SOURCES_DAC})
+
+add_subdirectory(wks)
+add_subdirectory(dac)
diff --git a/src/gc/dac/CMakeLists.txt b/src/gc/dac/CMakeLists.txt
new file mode 100644
index 0000000000..1f1c9ebe5c
--- /dev/null
+++ b/src/gc/dac/CMakeLists.txt
@@ -0,0 +1,2 @@
+include(${CLR_DIR}/dac.cmake)
+add_library_clr(gc_dac STATIC ${GC_SOURCES_DAC})
diff --git a/src/gc/env/common.cpp b/src/gc/env/common.cpp
new file mode 100644
index 0000000000..313a4e4875
--- /dev/null
+++ b/src/gc/env/common.cpp
@@ -0,0 +1,9 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// common.cpp : source file that includes just the standard includes
+// GCSample.pch will be the pre-compiled header
+// common.obj will contain the pre-compiled type information
+
+#include "common.h"
diff --git a/src/gc/env/common.h b/src/gc/env/common.h
new file mode 100644
index 0000000000..32c0d93577
--- /dev/null
+++ b/src/gc/env/common.h
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// common.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#ifndef _CRT_SECURE_NO_WARNINGS
+ #define _CRT_SECURE_NO_WARNINGS
+#endif // _CRT_SECURE_NO_WARNINGS
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <memory.h>
+
+#include <new>
+
+#ifdef PLATFORM_UNIX
+#include <pthread.h>
+#endif
+
+using namespace std;
diff --git a/src/gc/env/etmdummy.h b/src/gc/env/etmdummy.h
new file mode 100644
index 0000000000..2b47a46e4e
--- /dev/null
+++ b/src/gc/env/etmdummy.h
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define FireEtwGCStart(Count, Reason) 0
+#define FireEtwGCStart_V1(Count, Depth, Reason, Type, ClrInstanceID) 0
+#define FireEtwGCStart_V2(Count, Depth, Reason, Type, ClrInstanceID, ClientSequenceNumber) 0
+#define FireEtwGCEnd(Count, Depth) 0
+#define FireEtwGCEnd_V1(Count, Depth, ClrInstanceID) 0
+#define FireEtwGCRestartEEEnd() 0
+#define FireEtwGCRestartEEEnd_V1(ClrInstanceID) 0
+#define FireEtwGCHeapStats(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount) 0
+#define FireEtwGCHeapStats_V1(GenerationSize0, TotalPromotedSize0, GenerationSize1, TotalPromotedSize1, GenerationSize2, TotalPromotedSize2, GenerationSize3, TotalPromotedSize3, FinalizationPromotedSize, FinalizationPromotedCount, PinnedObjectCount, SinkBlockCount, GCHandleCount, ClrInstanceID) 0
+#define FireEtwGCCreateSegment(Address, Size, Type) 0
+#define FireEtwGCCreateSegment_V1(Address, Size, Type, ClrInstanceID) 0
+#define FireEtwGCFreeSegment(Address) 0
+#define FireEtwGCFreeSegment_V1(Address, ClrInstanceID) 0
+#define FireEtwGCRestartEEBegin() 0
+#define FireEtwGCRestartEEBegin_V1(ClrInstanceID) 0
+#define FireEtwGCSuspendEEEnd() 0
+#define FireEtwGCSuspendEEEnd_V1(ClrInstanceID) 0
+#define FireEtwGCSuspendEEBegin(Reason) 0
+#define FireEtwGCSuspendEEBegin_V1(Reason, Count, ClrInstanceID) 0
+#define FireEtwGCAllocationTick(AllocationAmount, AllocationKind) 0
+#define FireEtwGCAllocationTick_V1(AllocationAmount, AllocationKind, ClrInstanceID) 0
+#define FireEtwGCAllocationTick_V2(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex) 0
+#define FireEtwGCAllocationTick_V3(AllocationAmount, AllocationKind, ClrInstanceID, AllocationAmount64, TypeID, TypeName, HeapIndex, Address) 0
+#define FireEtwGCCreateConcurrentThread() 0
+#define FireEtwGCCreateConcurrentThread_V1(ClrInstanceID) 0
+#define FireEtwGCTerminateConcurrentThread() 0
+#define FireEtwGCTerminateConcurrentThread_V1(ClrInstanceID) 0
+#define FireEtwGCFinalizersEnd(Count) 0
+#define FireEtwGCFinalizersEnd_V1(Count, ClrInstanceID) 0
+#define FireEtwGCFinalizersBegin() 0
+#define FireEtwGCFinalizersBegin_V1(ClrInstanceID) 0
+#define FireEtwBulkType(Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkRootEdge(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkRootConditionalWeakTableElementEdge(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkNode(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkEdge(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCSampledObjectAllocationHigh(Address, TypeID, ObjectCountForTypeSample, TotalSizeForTypeSample, ClrInstanceID) 0
+#define FireEtwGCBulkSurvivingObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkMovedObjectRanges(Index, Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCGenerationRange(Generation, RangeStart, RangeUsedLength, RangeReservedLength, ClrInstanceID) 0
+#define FireEtwGCMarkStackRoots(HeapNum, ClrInstanceID) 0
+#define FireEtwGCMarkFinalizeQueueRoots(HeapNum, ClrInstanceID) 0
+#define FireEtwGCMarkHandles(HeapNum, ClrInstanceID) 0
+#define FireEtwGCMarkOlderGenerationRoots(HeapNum, ClrInstanceID) 0
+#define FireEtwFinalizeObject(TypeID, ObjectID, ClrInstanceID) 0
+#define FireEtwSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) 0
+#define FireEtwDestroyGCHandle(HandleID, ClrInstanceID) 0
+#define FireEtwGCSampledObjectAllocationLow(Address, TypeID, ObjectCountForTypeSample, TotalSizeForTypeSample, ClrInstanceID) 0
+#define FireEtwPinObjectAtGCTime(HandleID, ObjectID, ObjectSize, TypeName, ClrInstanceID) 0
+#define FireEtwGCTriggered(Reason, ClrInstanceID) 0
+#define FireEtwGCBulkRootCCW(Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkRCW(Count, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwGCBulkRootStaticVar(Count, AppDomainID, ClrInstanceID, Values_Len_, Values) 0
+#define FireEtwWorkerThreadCreate(WorkerThreadCount, RetiredWorkerThreads) 0
+#define FireEtwWorkerThreadTerminate(WorkerThreadCount, RetiredWorkerThreads) 0
+#define FireEtwWorkerThreadRetire(WorkerThreadCount, RetiredWorkerThreads) 0
+#define FireEtwWorkerThreadUnretire(WorkerThreadCount, RetiredWorkerThreads) 0
+#define FireEtwIOThreadCreate(IOThreadCount, RetiredIOThreads) 0
+#define FireEtwIOThreadCreate_V1(IOThreadCount, RetiredIOThreads, ClrInstanceID) 0
+#define FireEtwIOThreadTerminate(IOThreadCount, RetiredIOThreads) 0
+#define FireEtwIOThreadTerminate_V1(IOThreadCount, RetiredIOThreads, ClrInstanceID) 0
+#define FireEtwIOThreadRetire(IOThreadCount, RetiredIOThreads) 0
+#define FireEtwIOThreadRetire_V1(IOThreadCount, RetiredIOThreads, ClrInstanceID) 0
+#define FireEtwIOThreadUnretire(IOThreadCount, RetiredIOThreads) 0
+#define FireEtwIOThreadUnretire_V1(IOThreadCount, RetiredIOThreads, ClrInstanceID) 0
+#define FireEtwThreadpoolSuspensionSuspendThread(ClrThreadID, CpuUtilization) 0
+#define FireEtwThreadpoolSuspensionResumeThread(ClrThreadID, CpuUtilization) 0
+#define FireEtwThreadPoolWorkerThreadStart(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadStop(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadRetirementStart(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadRetirementStop(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadAdjustmentSample(Throughput, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadAdjustmentAdjustment(AverageThroughput, NewWorkerThreadCount, Reason, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadAdjustmentStats(Duration, Throughput, ThreadWave, ThroughputWave, ThroughputErrorEstimate, AverageThroughputErrorEstimate, ThroughputRatio, Confidence, NewControlSetting, NewThreadWaveMagnitude, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkerThreadWait(ActiveWorkerThreadCount, RetiredWorkerThreadCount, ClrInstanceID) 0
+#define FireEtwThreadPoolWorkingThreadCount(Count, ClrInstanceID) 0
+#define FireEtwThreadPoolEnqueue(WorkID, ClrInstanceID) 0
+#define FireEtwThreadPoolDequeue(WorkID, ClrInstanceID) 0
+#define FireEtwThreadPoolIOEnqueue(NativeOverlapped, Overlapped, MultiDequeues, ClrInstanceID) 0
+#define FireEtwThreadPoolIODequeue(NativeOverlapped, Overlapped, ClrInstanceID) 0
+#define FireEtwThreadPoolIOPack(NativeOverlapped, Overlapped, ClrInstanceID) 0
+#define FireEtwThreadCreating(ID, ClrInstanceID) 0
+#define FireEtwThreadRunning(ID, ClrInstanceID) 0
+#define FireEtwExceptionThrown() 0
+#define FireEtwExceptionThrown_V1(ExceptionType, ExceptionMessage, ExceptionEIP, ExceptionHRESULT, ExceptionFlags, ClrInstanceID) 0
+#define FireEtwExceptionCatchStart(EntryEIP, MethodID, MethodName, ClrInstanceID) 0
+#define FireEtwExceptionCatchStop() 0
+#define FireEtwExceptionFinallyStart(EntryEIP, MethodID, MethodName, ClrInstanceID) 0
+#define FireEtwExceptionFinallyStop() 0
+#define FireEtwExceptionFilterStart(EntryEIP, MethodID, MethodName, ClrInstanceID) 0
+#define FireEtwExceptionFilterStop() 0
+#define FireEtwExceptionThrownStop() 0
+#define FireEtwContention() 0
+#define FireEtwContentionStart_V1(ContentionFlags, ClrInstanceID) 0
+#define FireEtwContentionStop(ContentionFlags, ClrInstanceID) 0
+#define FireEtwCLRStackWalk(ClrInstanceID, Reserved1, Reserved2, FrameCount, Stack) 0
+#define FireEtwAppDomainMemAllocated(AppDomainID, Allocated, ClrInstanceID) 0
+#define FireEtwAppDomainMemSurvived(AppDomainID, Survived, ProcessSurvived, ClrInstanceID) 0
+#define FireEtwThreadCreated(ManagedThreadID, AppDomainID, Flags, ManagedThreadIndex, OSThreadID, ClrInstanceID) 0
+#define FireEtwThreadTerminated(ManagedThreadID, AppDomainID, ClrInstanceID) 0
+#define FireEtwThreadDomainEnter(ManagedThreadID, AppDomainID, ClrInstanceID) 0
+#define FireEtwILStubGenerated(ClrInstanceID, ModuleID, StubMethodID, StubFlags, ManagedInteropMethodToken, ManagedInteropMethodNamespace, ManagedInteropMethodName, ManagedInteropMethodSignature, NativeMethodSignature, StubMethodSignature, StubMethodILCode) 0
+#define FireEtwILStubCacheHit(ClrInstanceID, ModuleID, StubMethodID, ManagedInteropMethodToken, ManagedInteropMethodNamespace, ManagedInteropMethodName, ManagedInteropMethodSignature) 0
+#define FireEtwDCStartCompleteV2() 0
+#define FireEtwDCEndCompleteV2() 0
+#define FireEtwMethodDCStartV2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodDCEndV2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodDCStartVerboseV2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodDCEndVerboseV2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodLoad(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodLoad_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID) 0
+#define FireEtwMethodLoad_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodUnload(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodUnload_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID) 0
+#define FireEtwMethodUnload_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodLoadVerbose(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodLoadVerbose_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID) 0
+#define FireEtwMethodLoadVerbose_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodUnloadVerbose(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodUnloadVerbose_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID) 0
+#define FireEtwMethodUnloadVerbose_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodJittingStarted(MethodID, ModuleID, MethodToken, MethodILSize, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodJittingStarted_V1(MethodID, ModuleID, MethodToken, MethodILSize, MethodNamespace, MethodName, MethodSignature, ClrInstanceID) 0
+#define FireEtwMethodJitInliningSucceeded(MethodBeingCompiledNamespace, MethodBeingCompiledName, MethodBeingCompiledNameSignature, InlinerNamespace, InlinerName, InlinerNameSignature, InlineeNamespace, InlineeName, InlineeNameSignature, ClrInstanceID) 0
+#define FireEtwMethodJitInliningFailed(MethodBeingCompiledNamespace, MethodBeingCompiledName, MethodBeingCompiledNameSignature, InlinerNamespace, InlinerName, InlinerNameSignature, InlineeNamespace, InlineeName, InlineeNameSignature, FailAlways, FailReason, ClrInstanceID) 0
+#define FireEtwMethodJitTailCallSucceeded(MethodBeingCompiledNamespace, MethodBeingCompiledName, MethodBeingCompiledNameSignature, CallerNamespace, CallerName, CallerNameSignature, CalleeNamespace, CalleeName, CalleeNameSignature, TailPrefix, TailCallType, ClrInstanceID) 0
+#define FireEtwMethodJitTailCallFailed(MethodBeingCompiledNamespace, MethodBeingCompiledName, MethodBeingCompiledNameSignature, CallerNamespace, CallerName, CallerNameSignature, CalleeNamespace, CalleeName, CalleeNameSignature, TailPrefix, FailReason, ClrInstanceID) 0
+#define FireEtwMethodILToNativeMap(MethodID, ReJITID, MethodExtent, CountOfMapEntries, ILOffsets, NativeOffsets, ClrInstanceID) 0
+#define FireEtwModuleDCStartV2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwModuleDCEndV2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwDomainModuleLoad(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwDomainModuleLoad_V1(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleLoad(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwModuleLoad_V1(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleLoad_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) 0
+#define FireEtwModuleUnload(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwModuleUnload_V1(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleUnload_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) 0
+#define FireEtwAssemblyLoad(AssemblyID, AppDomainID, AssemblyFlags, FullyQualifiedAssemblyName) 0
+#define FireEtwAssemblyLoad_V1(AssemblyID, AppDomainID, BindingID, AssemblyFlags, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwAssemblyUnload(AssemblyID, AppDomainID, AssemblyFlags, FullyQualifiedAssemblyName) 0
+#define FireEtwAssemblyUnload_V1(AssemblyID, AppDomainID, BindingID, AssemblyFlags, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwAppDomainLoad(AppDomainID, AppDomainFlags, AppDomainName) 0
+#define FireEtwAppDomainLoad_V1(AppDomainID, AppDomainFlags, AppDomainName, AppDomainIndex, ClrInstanceID) 0
+#define FireEtwAppDomainUnload(AppDomainID, AppDomainFlags, AppDomainName) 0
+#define FireEtwAppDomainUnload_V1(AppDomainID, AppDomainFlags, AppDomainName, AppDomainIndex, ClrInstanceID) 0
+#define FireEtwModuleRangeLoad(ClrInstanceID, ModuleID, RangeBegin, RangeSize, RangeType) 0
+#define FireEtwStrongNameVerificationStart(VerificationFlags, ErrorCode, FullyQualifiedAssemblyName) 0
+#define FireEtwStrongNameVerificationStart_V1(VerificationFlags, ErrorCode, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwStrongNameVerificationStop(VerificationFlags, ErrorCode, FullyQualifiedAssemblyName) 0
+#define FireEtwStrongNameVerificationStop_V1(VerificationFlags, ErrorCode, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwAuthenticodeVerificationStart(VerificationFlags, ErrorCode, ModulePath) 0
+#define FireEtwAuthenticodeVerificationStart_V1(VerificationFlags, ErrorCode, ModulePath, ClrInstanceID) 0
+#define FireEtwAuthenticodeVerificationStop(VerificationFlags, ErrorCode, ModulePath) 0
+#define FireEtwAuthenticodeVerificationStop_V1(VerificationFlags, ErrorCode, ModulePath, ClrInstanceID) 0
+#define FireEtwRuntimeInformationStart(ClrInstanceID, Sku, BclMajorVersion, BclMinorVersion, BclBuildNumber, BclQfeNumber, VMMajorVersion, VMMinorVersion, VMBuildNumber, VMQfeNumber, StartupFlags, StartupMode, CommandLine, ComObjectGuid, RuntimeDllPath) 0
+#define FireEtwIncreaseMemoryPressure(BytesAllocated, ClrInstanceID) 0
+#define FireEtwDecreaseMemoryPressure(BytesFreed, ClrInstanceID) 0
+#define FireEtwGCMarkWithType(HeapNum, ClrInstanceID, Type, Bytes) 0
+#define FireEtwGCJoin_V2(Heap, JoinTime, JoinType, ClrInstanceID, JoinID) 0
+#define FireEtwGCPerHeapHistory_V3(ClrInstanceID, FreeListAllocated, FreeListRejected, EndOfSegAllocated, CondemnedAllocated, PinnedAllocated, PinnedAllocatedAdvance, RunningFreeListEfficiency, CondemnReasons0, CondemnReasons1, CompactMechanisms, ExpandMechanisms, HeapIndex, ExtraGen0Commit, Count, Values_Len_, Values) 0
+#define FireEtwGCGlobalHeapHistory_V2(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID, PauseMode, MemoryPressure) 0
+#define FireEtwDebugIPCEventStart() 0
+#define FireEtwDebugIPCEventEnd() 0
+#define FireEtwDebugExceptionProcessingStart() 0
+#define FireEtwDebugExceptionProcessingEnd() 0
+#define FireEtwCodeSymbols(ModuleId, TotalChunks, ChunkNumber, ChunkLength, Chunk, ClrInstanceID) 0
+#define FireEtwCLRStackWalkDCStart(ClrInstanceID, Reserved1, Reserved2, FrameCount, Stack) 0
+#define FireEtwMethodDCStart(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodDCStart_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID) 0
+#define FireEtwMethodDCStart_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodDCEnd(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags) 0
+#define FireEtwMethodDCEnd_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID) 0
+#define FireEtwMethodDCEnd_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodDCStartVerbose(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodDCStartVerbose_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID) 0
+#define FireEtwMethodDCStartVerbose_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID, ReJITID) 0
+#define FireEtwMethodDCEndVerbose(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature) 0
+#define FireEtwMethodDCEndVerbose_V1(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID) 0
+#define FireEtwMethodDCEndVerbose_V2(MethodID, ModuleID, MethodStartAddress, MethodSize, MethodToken, MethodFlags, MethodNamespace, MethodName, MethodSignature, ClrInstanceID, ReJITID) 0
+#define FireEtwDCStartComplete() 0
+#define FireEtwDCStartComplete_V1(ClrInstanceID) 0
+#define FireEtwDCEndComplete() 0
+#define FireEtwDCEndComplete_V1(ClrInstanceID) 0
+#define FireEtwDCStartInit() 0
+#define FireEtwDCStartInit_V1(ClrInstanceID) 0
+#define FireEtwDCEndInit() 0
+#define FireEtwDCEndInit_V1(ClrInstanceID) 0
+#define FireEtwMethodDCStartILToNativeMap(MethodID, ReJITID, MethodExtent, CountOfMapEntries, ILOffsets, NativeOffsets, ClrInstanceID) 0
+#define FireEtwMethodDCEndILToNativeMap(MethodID, ReJITID, MethodExtent, CountOfMapEntries, ILOffsets, NativeOffsets, ClrInstanceID) 0
+#define FireEtwDomainModuleDCStart(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwDomainModuleDCStart_V1(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwDomainModuleDCEnd(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwDomainModuleDCEnd_V1(ModuleID, AssemblyID, AppDomainID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleDCStart(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwModuleDCStart_V1(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleDCStart_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) 0
+#define FireEtwModuleDCEnd(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath) 0
+#define FireEtwModuleDCEnd_V1(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID) 0
+#define FireEtwModuleDCEnd_V2(ModuleID, AssemblyID, ModuleFlags, Reserved1, ModuleILPath, ModuleNativePath, ClrInstanceID, ManagedPdbSignature, ManagedPdbAge, ManagedPdbBuildPath, NativePdbSignature, NativePdbAge, NativePdbBuildPath) 0
+#define FireEtwAssemblyDCStart(AssemblyID, AppDomainID, AssemblyFlags, FullyQualifiedAssemblyName) 0
+#define FireEtwAssemblyDCStart_V1(AssemblyID, AppDomainID, BindingID, AssemblyFlags, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwAssemblyDCEnd(AssemblyID, AppDomainID, AssemblyFlags, FullyQualifiedAssemblyName) 0
+#define FireEtwAssemblyDCEnd_V1(AssemblyID, AppDomainID, BindingID, AssemblyFlags, FullyQualifiedAssemblyName, ClrInstanceID) 0
+#define FireEtwAppDomainDCStart(AppDomainID, AppDomainFlags, AppDomainName) 0
+#define FireEtwAppDomainDCStart_V1(AppDomainID, AppDomainFlags, AppDomainName, AppDomainIndex, ClrInstanceID) 0
+#define FireEtwAppDomainDCEnd(AppDomainID, AppDomainFlags, AppDomainName) 0
+#define FireEtwAppDomainDCEnd_V1(AppDomainID, AppDomainFlags, AppDomainName, AppDomainIndex, ClrInstanceID) 0
+#define FireEtwThreadDC(ManagedThreadID, AppDomainID, Flags, ManagedThreadIndex, OSThreadID, ClrInstanceID) 0
+#define FireEtwModuleRangeDCStart(ClrInstanceID, ModuleID, RangeBegin, RangeSize, RangeType) 0
+#define FireEtwModuleRangeDCEnd(ClrInstanceID, ModuleID, RangeBegin, RangeSize, RangeType) 0
+#define FireEtwRuntimeInformationDCStart(ClrInstanceID, Sku, BclMajorVersion, BclMinorVersion, BclBuildNumber, BclQfeNumber, VMMajorVersion, VMMinorVersion, VMBuildNumber, VMQfeNumber, StartupFlags, StartupMode, CommandLine, ComObjectGuid, RuntimeDllPath) 0
+#define FireEtwStressLogEvent(Facility, LogLevel, Message) 0
+#define FireEtwStressLogEvent_V1(Facility, LogLevel, Message, ClrInstanceID) 0
+#define FireEtwCLRStackWalkStress(ClrInstanceID, Reserved1, Reserved2, FrameCount, Stack) 0
+#define FireEtwGCDecision(DoCompact) 0
+#define FireEtwGCDecision_V1(DoCompact, ClrInstanceID) 0
+#define FireEtwGCSettings(SegmentSize, LargeObjectSegmentSize, ServerGC) 0
+#define FireEtwGCSettings_V1(SegmentSize, LargeObjectSegmentSize, ServerGC, ClrInstanceID) 0
+#define FireEtwGCOptimized(DesiredAllocation, NewAllocation, GenerationNumber) 0
+#define FireEtwGCOptimized_V1(DesiredAllocation, NewAllocation, GenerationNumber, ClrInstanceID) 0
+#define FireEtwGCPerHeapHistory() 0
+#define FireEtwGCPerHeapHistory_V1(ClrInstanceID) 0
+#define FireEtwGCGlobalHeapHistory(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms) 0
+#define FireEtwGCGlobalHeapHistory_V1(FinalYoungestDesired, NumHeaps, CondemnedGeneration, Gen0ReductionCount, Reason, GlobalMechanisms, ClrInstanceID) 0
+#define FireEtwGCJoin(Heap, JoinTime, JoinType) 0
+#define FireEtwGCJoin_V1(Heap, JoinTime, JoinType, ClrInstanceID) 0
+#define FireEtwPrvGCMarkStackRoots(HeapNum) 0
+#define FireEtwPrvGCMarkStackRoots_V1(HeapNum, ClrInstanceID) 0
+#define FireEtwPrvGCMarkFinalizeQueueRoots(HeapNum) 0
+#define FireEtwPrvGCMarkFinalizeQueueRoots_V1(HeapNum, ClrInstanceID) 0
+#define FireEtwPrvGCMarkHandles(HeapNum) 0
+#define FireEtwPrvGCMarkHandles_V1(HeapNum, ClrInstanceID) 0
+#define FireEtwPrvGCMarkCards(HeapNum) 0
+#define FireEtwPrvGCMarkCards_V1(HeapNum, ClrInstanceID) 0
+#define FireEtwBGCBegin(ClrInstanceID) 0
+#define FireEtwBGC1stNonConEnd(ClrInstanceID) 0
+#define FireEtwBGC1stConEnd(ClrInstanceID) 0
+#define FireEtwBGC2ndNonConBegin(ClrInstanceID) 0
+#define FireEtwBGC2ndNonConEnd(ClrInstanceID) 0
+#define FireEtwBGC2ndConBegin(ClrInstanceID) 0
+#define FireEtwBGC2ndConEnd(ClrInstanceID) 0
+#define FireEtwBGCPlanEnd(ClrInstanceID) 0
+#define FireEtwBGCSweepEnd(ClrInstanceID) 0
+#define FireEtwBGCDrainMark(Objects, ClrInstanceID) 0
+#define FireEtwBGCRevisit(Pages, Objects, IsLarge, ClrInstanceID) 0
+#define FireEtwBGCOverflow(Min, Max, Objects, IsLarge, ClrInstanceID) 0
+#define FireEtwBGCAllocWaitBegin(Reason, ClrInstanceID) 0
+#define FireEtwBGCAllocWaitEnd(Reason, ClrInstanceID) 0
+#define FireEtwGCFullNotify(GenNumber, IsAlloc) 0
+#define FireEtwGCFullNotify_V1(GenNumber, IsAlloc, ClrInstanceID) 0
+#define FireEtwEEStartupStart() 0
+#define FireEtwEEStartupStart_V1(ClrInstanceID) 0
+#define FireEtwEEStartupEnd() 0
+#define FireEtwEEStartupEnd_V1(ClrInstanceID) 0
+#define FireEtwEEConfigSetup() 0
+#define FireEtwEEConfigSetup_V1(ClrInstanceID) 0
+#define FireEtwEEConfigSetupEnd() 0
+#define FireEtwEEConfigSetupEnd_V1(ClrInstanceID) 0
+#define FireEtwLdSysBases() 0
+#define FireEtwLdSysBases_V1(ClrInstanceID) 0
+#define FireEtwLdSysBasesEnd() 0
+#define FireEtwLdSysBasesEnd_V1(ClrInstanceID) 0
+#define FireEtwExecExe() 0
+#define FireEtwExecExe_V1(ClrInstanceID) 0
+#define FireEtwExecExeEnd() 0
+#define FireEtwExecExeEnd_V1(ClrInstanceID) 0
+#define FireEtwMain() 0
+#define FireEtwMain_V1(ClrInstanceID) 0
+#define FireEtwMainEnd() 0
+#define FireEtwMainEnd_V1(ClrInstanceID) 0
+#define FireEtwApplyPolicyStart() 0
+#define FireEtwApplyPolicyStart_V1(ClrInstanceID) 0
+#define FireEtwApplyPolicyEnd() 0
+#define FireEtwApplyPolicyEnd_V1(ClrInstanceID) 0
+#define FireEtwLdLibShFolder() 0
+#define FireEtwLdLibShFolder_V1(ClrInstanceID) 0
+#define FireEtwLdLibShFolderEnd() 0
+#define FireEtwLdLibShFolderEnd_V1(ClrInstanceID) 0
+#define FireEtwPrestubWorker() 0
+#define FireEtwPrestubWorker_V1(ClrInstanceID) 0
+#define FireEtwPrestubWorkerEnd() 0
+#define FireEtwPrestubWorkerEnd_V1(ClrInstanceID) 0
+#define FireEtwGetInstallationStart() 0
+#define FireEtwGetInstallationStart_V1(ClrInstanceID) 0
+#define FireEtwGetInstallationEnd() 0
+#define FireEtwGetInstallationEnd_V1(ClrInstanceID) 0
+#define FireEtwOpenHModule() 0
+#define FireEtwOpenHModule_V1(ClrInstanceID) 0
+#define FireEtwOpenHModuleEnd() 0
+#define FireEtwOpenHModuleEnd_V1(ClrInstanceID) 0
+#define FireEtwExplicitBindStart() 0
+#define FireEtwExplicitBindStart_V1(ClrInstanceID) 0
+#define FireEtwExplicitBindEnd() 0
+#define FireEtwExplicitBindEnd_V1(ClrInstanceID) 0
+#define FireEtwParseXml() 0
+#define FireEtwParseXml_V1(ClrInstanceID) 0
+#define FireEtwParseXmlEnd() 0
+#define FireEtwParseXmlEnd_V1(ClrInstanceID) 0
+#define FireEtwInitDefaultDomain() 0
+#define FireEtwInitDefaultDomain_V1(ClrInstanceID) 0
+#define FireEtwInitDefaultDomainEnd() 0
+#define FireEtwInitDefaultDomainEnd_V1(ClrInstanceID) 0
+#define FireEtwInitSecurity() 0
+#define FireEtwInitSecurity_V1(ClrInstanceID) 0
+#define FireEtwInitSecurityEnd() 0
+#define FireEtwInitSecurityEnd_V1(ClrInstanceID) 0
+#define FireEtwAllowBindingRedirs() 0
+#define FireEtwAllowBindingRedirs_V1(ClrInstanceID) 0
+#define FireEtwAllowBindingRedirsEnd() 0
+#define FireEtwAllowBindingRedirsEnd_V1(ClrInstanceID) 0
+#define FireEtwEEConfigSync() 0
+#define FireEtwEEConfigSync_V1(ClrInstanceID) 0
+#define FireEtwEEConfigSyncEnd() 0
+#define FireEtwEEConfigSyncEnd_V1(ClrInstanceID) 0
+#define FireEtwFusionBinding() 0
+#define FireEtwFusionBinding_V1(ClrInstanceID) 0
+#define FireEtwFusionBindingEnd() 0
+#define FireEtwFusionBindingEnd_V1(ClrInstanceID) 0
+#define FireEtwLoaderCatchCall() 0
+#define FireEtwLoaderCatchCall_V1(ClrInstanceID) 0
+#define FireEtwLoaderCatchCallEnd() 0
+#define FireEtwLoaderCatchCallEnd_V1(ClrInstanceID) 0
+#define FireEtwFusionInit() 0
+#define FireEtwFusionInit_V1(ClrInstanceID) 0
+#define FireEtwFusionInitEnd() 0
+#define FireEtwFusionInitEnd_V1(ClrInstanceID) 0
+#define FireEtwFusionAppCtx() 0
+#define FireEtwFusionAppCtx_V1(ClrInstanceID) 0
+#define FireEtwFusionAppCtxEnd() 0
+#define FireEtwFusionAppCtxEnd_V1(ClrInstanceID) 0
+#define FireEtwFusion2EE() 0
+#define FireEtwFusion2EE_V1(ClrInstanceID) 0
+#define FireEtwFusion2EEEnd() 0
+#define FireEtwFusion2EEEnd_V1(ClrInstanceID) 0
+#define FireEtwSecurityCatchCall() 0
+#define FireEtwSecurityCatchCall_V1(ClrInstanceID) 0
+#define FireEtwSecurityCatchCallEnd() 0
+#define FireEtwSecurityCatchCallEnd_V1(ClrInstanceID) 0
+#define FireEtwCLRStackWalkPrivate(ClrInstanceID, Reserved1, Reserved2, FrameCount, Stack) 0
+#define FireEtwModuleRangeLoadPrivate(ClrInstanceID, ModuleID, RangeBegin, RangeSize, RangeType, IBCType, SectionType) 0
+#define FireEtwBindingPolicyPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingPolicyPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingNgenPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingNgenPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingLookupAndProbingPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingLookupAndProbingPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingDownloadPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwBindingDownloadPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderAssemblyInitPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderAssemblyInitPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderMappingPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderMappingPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderDeliverEventsPhaseStart(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwLoaderDeliverEventsPhaseEnd(AppDomainID, LoadContextID, FromLoaderCache, DynamicLoad, AssemblyCodebase, AssemblyName, ClrInstanceID) 0
+#define FireEtwEvidenceGenerated(Type, AppDomain, ILImage, ClrInstanceID) 0
+#define FireEtwModuleTransparencyComputationStart(Module, AppDomainID, ClrInstanceID) 0
+#define FireEtwModuleTransparencyComputationEnd(Module, AppDomainID, IsAllCritical, IsAllTransparent, IsTreatAsSafe, IsOpportunisticallyCritical, SecurityRuleSet, ClrInstanceID) 0
+#define FireEtwTypeTransparencyComputationStart(Type, Module, AppDomainID, ClrInstanceID) 0
+#define FireEtwTypeTransparencyComputationEnd(Type, Module, AppDomainID, IsAllCritical, IsAllTransparent, IsCritical, IsTreatAsSafe, ClrInstanceID) 0
+#define FireEtwMethodTransparencyComputationStart(Method, Module, AppDomainID, ClrInstanceID) 0
+#define FireEtwMethodTransparencyComputationEnd(Method, Module, AppDomainID, IsCritical, IsTreatAsSafe, ClrInstanceID) 0
+#define FireEtwFieldTransparencyComputationStart(Field, Module, AppDomainID, ClrInstanceID) 0
+#define FireEtwFieldTransparencyComputationEnd(Field, Module, AppDomainID, IsCritical, IsTreatAsSafe, ClrInstanceID) 0
+#define FireEtwTokenTransparencyComputationStart(Token, Module, AppDomainID, ClrInstanceID) 0
+#define FireEtwTokenTransparencyComputationEnd(Token, Module, AppDomainID, IsCritical, IsTreatAsSafe, ClrInstanceID) 0
+#define FireEtwNgenBindEvent(ClrInstanceID, BindingID, ReasonCode, AssemblyName) 0
+#define FireEtwFailFast(FailFastUserMessage, FailedEIP, OSExitCode, ClrExitCode, ClrInstanceID) 0
+#define FireEtwPrvFinalizeObject(TypeID, ObjectID, ClrInstanceID, TypeName) 0
+#define FireEtwCCWRefCountChange(HandleID, ObjectID, COMInterfacePointer, NewRefCount, AppDomainID, ClassName, NameSpace, Operation, ClrInstanceID) 0
+#define FireEtwPrvSetGCHandle(HandleID, ObjectID, Kind, Generation, AppDomainID, ClrInstanceID) 0
+#define FireEtwPrvDestroyGCHandle(HandleID, ClrInstanceID) 0
+#define FireEtwFusionMessageEvent(ClrInstanceID, Prepend, Message) 0
+#define FireEtwFusionErrorCodeEvent(ClrInstanceID, Category, ErrorCode) 0
+#define FireEtwPinPlugAtGCTime(PlugStart, PlugEnd, GapBeforeSize, ClrInstanceID) 0
+#define FireEtwAllocRequest(LoaderHeapPtr, MemoryAddress, RequestSize, Unused1, Unused2, ClrInstanceID) 0
+#define FireEtwMulticoreJit(ClrInstanceID, String1, String2, Int1, Int2, Int3) 0
+#define FireEtwMulticoreJitMethodCodeReturned(ClrInstanceID, ModuleID, MethodID) 0
+#define FireEtwIInspectableRuntimeClassName(TypeName, ClrInstanceID) 0
+#define FireEtwWinRTUnbox(TypeName, SecondTypeName, ClrInstanceID) 0
+#define FireEtwCreateRCW(TypeName, ClrInstanceID) 0
+#define FireEtwRCWVariance(TypeName, InterfaceTypeName, VariantInterfaceTypeName, ClrInstanceID) 0
+#define FireEtwRCWIEnumerableCasting(TypeName, SecondTypeName, ClrInstanceID) 0
+#define FireEtwCreateCCW(TypeName, ClrInstanceID) 0
+#define FireEtwCCWVariance(TypeName, InterfaceTypeName, VariantInterfaceTypeName, ClrInstanceID) 0
+#define FireEtwObjectVariantMarshallingToNative(TypeName, Int1, ClrInstanceID) 0
+#define FireEtwGetTypeFromGUID(TypeName, SecondTypeName, ClrInstanceID) 0
+#define FireEtwGetTypeFromProgID(TypeName, SecondTypeName, ClrInstanceID) 0
+#define FireEtwConvertToCallbackEtw(TypeName, SecondTypeName, ClrInstanceID) 0
+#define FireEtwBeginCreateManagedReference(ClrInstanceID) 0
+#define FireEtwEndCreateManagedReference(ClrInstanceID) 0
+#define FireEtwObjectVariantMarshallingToManaged(TypeName, Int1, ClrInstanceID) 0
diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h
new file mode 100644
index 0000000000..a94f1a6394
--- /dev/null
+++ b/src/gc/env/gcenv.base.h
@@ -0,0 +1,626 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+#ifndef __GCENV_BASE_INCLUDED__
+#define __GCENV_BASE_INCLUDED__
+//
+// Sets up basic environment for CLR GC
+//
+
+#define FEATURE_REDHAWK 1
+#define FEATURE_CONSERVATIVE_GC 1
+
+#define GCENV_INCLUDED
+
+#define REDHAWK_PALIMPORT extern "C"
+#define REDHAWK_PALAPI __stdcall
+
+#ifndef _MSC_VER
+#define __stdcall
+#ifdef __clang__
+#define __forceinline __attribute__((always_inline)) inline
+#else // __clang__
+#define __forceinline inline
+#endif // __clang__
+#endif // !_MSC_VER
+
+#ifndef SIZE_T_MAX
+#define SIZE_T_MAX ((size_t)-1)
+#endif
+#ifndef SSIZE_T_MAX
+#define SSIZE_T_MAX ((ptrdiff_t)(SIZE_T_MAX / 2))
+#endif
+
+#ifndef _INC_WINDOWS
+// -----------------------------------------------------------------------------------------------------------
+//
+// Aliases for Win32 types
+//
+
+typedef uint32_t BOOL;
+typedef uint32_t DWORD;
+
+// -----------------------------------------------------------------------------------------------------------
+// HRESULT subset.
+
+#ifdef PLATFORM_UNIX
+typedef int32_t HRESULT;
+#else
+// this must exactly match the typedef used by windows.h
+typedef long HRESULT;
+#endif
+
+#define SUCCEEDED(_hr)          ((HRESULT)(_hr) >= 0)
+#define FAILED(_hr)             ((HRESULT)(_hr) < 0)
+
+inline HRESULT HRESULT_FROM_WIN32(unsigned long x)
+{
+    return (HRESULT)(x) <= 0 ? (HRESULT)(x) : (HRESULT) (((x) & 0x0000FFFF) | (7 << 16) | 0x80000000);
+}
+
+#define S_OK                    0x0
+#define S_FALSE                 0x1
+#define E_FAIL                  0x80004005
+#define E_OUTOFMEMORY           0x8007000E
+#define E_UNEXPECTED            0x8000FFFF
+#define E_NOTIMPL               0x80004001
+#define E_INVALIDARG            0x80070057
+
+#define NOERROR                 0x0
+#define ERROR_TIMEOUT           1460
+
+#define TRUE true
+#define FALSE false
+
+#define CALLBACK __stdcall
+#define FORCEINLINE __forceinline
+
+#define INFINITE 0xFFFFFFFF
+
+#define ZeroMemory(Destination,Length) memset((Destination),0,(Length))
+
+#ifndef _countof
+#define _countof(_array) (sizeof(_array)/sizeof(_array[0]))
+#endif
+
+#ifndef min
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#define C_ASSERT(cond) static_assert( cond, #cond )
+
+#define UNREFERENCED_PARAMETER(P)          (void)(P)
+
+#ifdef PLATFORM_UNIX
+#define  _vsnprintf vsnprintf
+#define sprintf_s snprintf
+#define swprintf_s swprintf
+#endif
+
+#ifdef UNICODE
+#define _tcslen wcslen
+#define _tcscpy wcscpy
+#define _stprintf_s swprintf_s
+#define _tfopen _wfopen
+#else
+#define _tcslen strlen
+#define _tcscpy strcpy
+#define _stprintf_s sprintf_s
+#define _tfopen fopen
+#endif
+
+#define WINAPI __stdcall
+
+typedef DWORD (WINAPI *PTHREAD_START_ROUTINE)(void* lpThreadParameter);
+
+#define WAIT_OBJECT_0           0
+#define WAIT_TIMEOUT            258
+#define WAIT_FAILED             0xFFFFFFFF
+
+#if defined(_MSC_VER) 
+ #if defined(_ARM_)
+
+  __forceinline void YieldProcessor() { }
+  extern "C" void __emit(const unsigned __int32 opcode);
+  #pragma intrinsic(__emit)
+  #define MemoryBarrier() { __emit(0xF3BF); __emit(0x8F5F); }
+
+ #elif defined(_ARM64_)
+
+  extern "C" void __yield(void);
+  #pragma intrinsic(__yield)
+  __forceinline void YieldProcessor() { __yield();}
+
+  extern "C" void __dmb(const unsigned __int32 _Type);
+  #pragma intrinsic(__dmb)
+  #define MemoryBarrier() { __dmb(_ARM64_BARRIER_SY); }
+
+ #elif defined(_AMD64_)
+  
+  extern "C" void
+  _mm_pause (
+      void
+      );
+  
+  extern "C" void
+  _mm_mfence (
+      void
+      );
+
+  #pragma intrinsic(_mm_pause)
+  #pragma intrinsic(_mm_mfence)
+  
+  #define YieldProcessor _mm_pause
+  #define MemoryBarrier _mm_mfence
+
+ #elif defined(_X86_)
+  
+  #define YieldProcessor() __asm { rep nop }
+
+  __forceinline void MemoryBarrier()
+  {
+      int32_t Barrier;
+      __asm {
+          xchg Barrier, eax
+      }
+  }
+
+ #else // !_ARM_ && !_AMD64_ && !_X86_
+  #error Unsupported architecture
+ #endif
+#else // _MSC_VER
+
+#endif // _MSC_VER
+
+typedef struct _PROCESSOR_NUMBER {
+    uint16_t Group;
+    uint8_t Number;
+    uint8_t Reserved;
+} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
+
+#endif // _INC_WINDOWS
+
+// -----------------------------------------------------------------------------------------------------------
+//
+// The subset of the contract code required by the GC/HandleTable sources. If Redhawk moves to support
+// contracts these local definitions will disappear and be replaced by real implementations.
+//
+
+#define LEAF_CONTRACT
+#define LIMITED_METHOD_CONTRACT
+#define LIMITED_METHOD_DAC_CONTRACT
+#define WRAPPER_CONTRACT
+#define WRAPPER_NO_CONTRACT
+#define STATIC_CONTRACT_LEAF
+#define STATIC_CONTRACT_DEBUG_ONLY
+#define STATIC_CONTRACT_NOTHROW
+#define STATIC_CONTRACT_CAN_TAKE_LOCK
+#define STATIC_CONTRACT_SO_TOLERANT
+#define STATIC_CONTRACT_GC_NOTRIGGER
+#define STATIC_CONTRACT_MODE_COOPERATIVE
+#define CONTRACTL
+#define CONTRACT(_expr)
+#define CONTRACT_VOID
+#define THROWS
+#define NOTHROW
+#define INSTANCE_CHECK
+#define MODE_COOPERATIVE
+#define MODE_ANY
+#define SO_INTOLERANT
+#define SO_TOLERANT
+#define GC_TRIGGERS
+#define GC_NOTRIGGER
+#define CAN_TAKE_LOCK
+#define SUPPORTS_DAC
+#define FORBID_FAULT
+#define CONTRACTL_END
+#define CONTRACT_END
+#define TRIGGERSGC()
+#define WRAPPER(_contract)
+#define DISABLED(_contract)
+#define INJECT_FAULT(_expr)
+#define INJECTFAULT_HANDLETABLE 0x1
+#define INJECTFAULT_GCHEAP 0x2
+#define FAULT_NOT_FATAL()
+#define BEGIN_DEBUG_ONLY_CODE
+#define END_DEBUG_ONLY_CODE
+#define BEGIN_GETTHREAD_ALLOWED
+#define END_GETTHREAD_ALLOWED
+#define LEAF_DAC_CONTRACT
+#define PRECONDITION(_expr)
+#define POSTCONDITION(_expr)
+#define RETURN return
+#define CONDITIONAL_CONTRACT_VIOLATION(_violation, _expr)
+
+// -----------------------------------------------------------------------------------------------------------
+//
+// Data access macros
+//
+#ifdef DACCESS_COMPILE
+#include "daccess.h"
+#else // DACCESS_COMPILE
+typedef uintptr_t TADDR;
+
+#define PTR_TO_TADDR(ptr) ((TADDR)(ptr))
+
+#define DPTR(type) type*
+#define SPTR(type) type*
+
+#define GVAL_DECL(type, var) \
+    extern type var
+#define GVAL_IMPL(type, var) \
+    type var
+#define GVAL_IMPL_INIT(type, var, init) \
+    type var = init
+
+#define GPTR_DECL(type, var) \
+    extern type* var
+#define GPTR_IMPL(type, var) \
+    type* var
+#define GPTR_IMPL_INIT(type, var, init) \
+    type* var = init
+
+#define SPTR_DECL(type, var) \
+    static type* var
+#define SPTR_IMPL(type, cls, var) \
+    type * cls::var
+#define SPTR_IMPL_NS(type, ns, cls, var) \
+    type * cls::var
+#define SPTR_IMPL_NS_INIT(type, ns, cls, var, init) \
+    type * cls::var = init
+
+#define SVAL_DECL(type, var) \
+    static type var
+#define SVAL_IMPL_NS(type, ns, cls, var) \
+    type cls::var
+#define SVAL_IMPL_NS_INIT(type, ns, cls, var, init) \
+    type cls::var = init
+
+#define GARY_DECL(type, var, size) \
+    extern type var[size]
+#define GARY_IMPL(type, var, size) \
+    type var[size]
+
+struct _DacGlobals;
+#endif // DACCESS_COMPILE
+
+typedef DPTR(size_t)    PTR_size_t;
+typedef DPTR(uint8_t)   PTR_uint8_t;
+
+// -----------------------------------------------------------------------------------------------------------
+
+#define DATA_ALIGNMENT sizeof(uintptr_t)
+
+#define RAW_KEYWORD(x) x
+
+#define DECLSPEC_ALIGN(x)   __declspec(align(x))
+
+#define OS_PAGE_SIZE 4096
+
+#ifndef _ASSERTE
+#define _ASSERTE(_expr) ASSERT(_expr)
+#endif
+
+#define CONSISTENCY_CHECK(_expr) ASSERT(_expr)
+
+#define PREFIX_ASSUME(cond) ASSERT(cond)
+
+#define EEPOLICY_HANDLE_FATAL_ERROR(error) ASSERT(!"EEPOLICY_HANDLE_FATAL_ERROR")
+
+#define UI64(_literal) _literal##ULL
+
+class ObjHeader;
+class MethodTable;
+class Object;
+class ArrayBase;
+
+// Various types used to refer to object references or handles. This will get more complex if we decide
+// Redhawk wants to wrap object references in the debug build.
+typedef DPTR(Object) PTR_Object;
+typedef DPTR(PTR_Object) PTR_PTR_Object;
+
+typedef PTR_Object OBJECTREF;
+typedef PTR_PTR_Object PTR_OBJECTREF;
+typedef PTR_Object _UNCHECKED_OBJECTREF;
+typedef PTR_PTR_Object PTR_UNCHECKED_OBJECTREF;
+
+#ifndef DACCESS_COMPILE
+struct OBJECTHANDLE__
+{
+    void* unused;
+};
+typedef struct OBJECTHANDLE__* OBJECTHANDLE;
+#else
+typedef TADDR OBJECTHANDLE;
+#endif
+
+// With no object reference wrapping the following macros are very simple.
+#define ObjectToOBJECTREF(_obj) (OBJECTREF)(_obj)
+#define OBJECTREFToObject(_obj) (Object*)(_obj)
+
+#define VALIDATEOBJECTREF(_objref) _objref;
+
+#define VOLATILE(T) T volatile
+
+//
+// This code is extremely compiler- and CPU-specific, and will need to be altered to 
+// support new compilers and/or CPUs.  Here we enforce that we can only compile using
+// VC++, or Clang on x86, AMD64, ARM and ARM64.
+// 
+#if !defined(_MSC_VER) && !defined(__clang__)
+#error The Volatile type is currently only defined for Visual C++ and Clang
+#endif
+
+#if defined(__clang__) && !defined(_X86_) && !defined(_AMD64_) && !defined(_ARM_) && !defined(_ARM64_)
+#error The Volatile type is currently only defined for Clang when targeting x86, AMD64, ARM or ARM64 CPUs
+#endif
+
+#if defined(__clang__)
+#if defined(_ARM_) || defined(_ARM64_)
+// This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows.
+#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory")
+#else
+//
+// For Clang, we prevent reordering by the compiler by inserting the following after a volatile
+// load (to prevent subsequent operations from moving before the read), and before a volatile 
+// write (to prevent prior operations from moving past the write).  We don't need to do anything
+// special to prevent CPU reorderings, because the x86 and AMD64 architectures are already
+// sufficiently constrained for our purposes.  If we ever need to run on weaker CPU architectures
+// (such as PowerPC), then we will need to do more work.
+// 
+// Please do not use this macro outside of this file.  It is subject to change or removal without
+// notice.
+//
+#define VOLATILE_MEMORY_BARRIER() asm volatile ("" : : : "memory")
+#endif // !_ARM_
+#elif defined(_ARM_) && _ISO_VOLATILE
+// ARM has a very weak memory model and very few tools to control that model. We're forced to perform a full
+// memory barrier to preserve the volatile semantics. Technically this is only necessary on MP systems but we
+// currently don't have a cheap way to determine the number of CPUs from this header file. Revisit this if it
+// turns out to be a performance issue for the uni-proc case.
+#define VOLATILE_MEMORY_BARRIER() MemoryBarrier()
+#else
+//
+// On VC++, reorderings at the compiler and machine level are prevented by the use of the 
+// "volatile" keyword in VolatileLoad and VolatileStore.  This should work on any CPU architecture
+// targeted by VC++ with /iso_volatile-.
+//
+#define VOLATILE_MEMORY_BARRIER()
+#endif
+
+//
+// VolatileLoad loads a T from a pointer to T.  It is guaranteed that this load will not be optimized
+// away by the compiler, and that any operation that occurs after this load, in program order, will
+// not be moved before this load.  In general it is not guaranteed that the load will be atomic, though
+// this is the case for most aligned scalar data types.  If you need atomic loads or stores, you need
+// to consult the compiler and CPU manuals to find which circumstances allow atomicity.
+//
+template<typename T>
+inline
+T VolatileLoad(T const * pt)
+{
+    T val = *(T volatile const *)pt;
+    VOLATILE_MEMORY_BARRIER();
+    return val;
+}
+
+template<typename T>
+inline
+T VolatileLoadWithoutBarrier(T const * pt)
+{
+#ifndef DACCESS_COMPILE
+    T val = *(T volatile const *)pt;
+#else
+    T val = *pt;
+#endif
+    return val;
+}
+
+//
+// VolatileStore stores a T into the target of a pointer to T.  Is is guaranteed that this store will
+// not be optimized away by the compiler, and that any operation that occurs before this store, in program
+// order, will not be moved after this store.  In general, it is not guaranteed that the store will be
+// atomic, though this is the case for most aligned scalar data types.  If you need atomic loads or stores,
+// you need to consult the compiler and CPU manuals to find which circumstances allow atomicity.
+//
+template<typename T>
+inline
+void VolatileStore(T* pt, T val)
+{
+    VOLATILE_MEMORY_BARRIER();
+    *(T volatile *)pt = val;
+}
+
+extern GCSystemInfo g_SystemInfo;
+
+extern MethodTable * g_pFreeObjectMethodTable;
+
+extern int32_t g_TrapReturningThreads;
+
+extern bool g_fFinalizerRunOnShutDown;
+
+//
+// Locks
+//
+
+struct alloc_context;
+class Thread;
+
+Thread * GetThread();
+
+typedef void (CALLBACK *HANDLESCANPROC)(PTR_UNCHECKED_OBJECTREF pref, uintptr_t *pExtraInfo, uintptr_t param1, uintptr_t param2);
+
+class FinalizerThread
+{
+public:
+    static bool Initialize();
+    static void EnableFinalization();
+
+    static bool HaveExtraWorkForFinalizer();
+
+    static bool IsCurrentThreadFinalizer();
+    static void Wait(DWORD timeout, bool allowReentrantWait = false);
+    static void SignalFinalizationDone(bool fFinalizer);
+    static void SetFinalizerThread(Thread * pThread);
+    static HANDLE GetFinalizerEvent();
+};
+
+bool IsGCSpecialThread();
+
+inline bool dbgOnly_IsSpecialEEThread()
+{
+    return false;
+}
+
+#define ClrFlsSetThreadType(type)
+
+//
+// Performance logging
+//
+
+#define COUNTER_ONLY(x)
+
+//#include "etmdummy.h"
+//#define ETW_EVENT_ENABLED(e,f) false
+
+namespace ETW
+{
+    typedef  enum _GC_ROOT_KIND {
+        GC_ROOT_STACK = 0,
+        GC_ROOT_FQ = 1,
+        GC_ROOT_HANDLES = 2,
+        GC_ROOT_OLDER = 3,
+        GC_ROOT_SIZEDREF = 4,
+        GC_ROOT_OVERFLOW = 5
+    } GC_ROOT_KIND;
+};
+
+//
+// Logging
+//
+
+void LogSpewAlways(const char *fmt, ...);
+
+#define DEFAULT_GC_PRN_LVL 3
+
+// -----------------------------------------------------------------------------------------------------------
+
+void StompWriteBarrierEphemeral(bool isRuntimeSuspended);
+void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck);
+bool IsGCThread();
+
+class CLRConfig
+{
+public:
+    enum CLRConfigTypes
+    {
+        UNSUPPORTED_GCLogEnabled,
+        UNSUPPORTED_GCLogFile,
+        UNSUPPORTED_GCLogFileSize,
+        UNSUPPORTED_GCConfigLogEnabled,
+        UNSUPPORTED_GCConfigLogFile,
+        UNSUPPORTED_BGCSpinCount,
+        UNSUPPORTED_BGCSpin,
+        EXTERNAL_GCStressStart,
+        INTERNAL_GCStressStartAtJit,
+        INTERNAL_DbgDACSkipVerifyDlls,
+        Config_COUNT
+    };
+
+    typedef CLRConfigTypes ConfigDWORDInfo;
+    typedef CLRConfigTypes ConfigStringInfo;
+
+    static uint32_t GetConfigValue(ConfigDWORDInfo eType);
+    static HRESULT GetConfigValue(ConfigStringInfo /*eType*/, __out_z TCHAR * * outVal);
+};
+
+inline bool FitsInU1(uint64_t val)
+{
+    return val == (uint64_t)(uint8_t)val;
+}
+
+// -----------------------------------------------------------------------------------------------------------
+//
+// AppDomain emulation. The we don't have these in Redhawk so instead we emulate the bare minimum of the API
+// touched by the GC/HandleTable and pretend we have precisely one (default) appdomain.
+//
+
+#define RH_DEFAULT_DOMAIN_ID 1
+
+struct ADIndex
+{
+    DWORD m_dwIndex;
+
+    ADIndex () : m_dwIndex(RH_DEFAULT_DOMAIN_ID) {}
+    explicit ADIndex (DWORD id) : m_dwIndex(id) {}
+    BOOL operator==(const ADIndex& ad) const { return m_dwIndex == ad.m_dwIndex; }
+    BOOL operator!=(const ADIndex& ad) const { return m_dwIndex != ad.m_dwIndex; }
+};
+
+class AppDomain
+{
+public:
+    ADIndex GetIndex() { return ADIndex(RH_DEFAULT_DOMAIN_ID); }
+    BOOL IsRudeUnload() { return FALSE; }
+    BOOL NoAccessToHandleTable() { return FALSE; }
+    void DecNumSizedRefHandles() {}
+};
+
+class SystemDomain
+{
+public:
+    static SystemDomain *System() { return NULL; }
+    static AppDomain *GetAppDomainAtIndex(ADIndex /*index*/) { return (AppDomain *)-1; }
+    static AppDomain *AppDomainBeingUnloaded() { return NULL; }
+    AppDomain *DefaultDomain() { return NULL; }
+    DWORD GetTotalNumSizedRefHandles() { return 0; }
+};
+
+#ifdef STRESS_HEAP
+namespace GCStressPolicy
+{
+    static volatile int32_t s_cGcStressDisables;
+
+    inline bool IsEnabled() { return s_cGcStressDisables == 0; }
+    inline void GlobalDisable() { Interlocked::Increment(&s_cGcStressDisables); }
+    inline void GlobalEnable() { Interlocked::Decrement(&s_cGcStressDisables); }
+}
+
+enum gcs_trigger_points
+{
+    cfg_any,
+};
+
+template <enum gcs_trigger_points tp>
+class GCStress
+{
+public:
+    static inline bool IsEnabled()
+    {
+        return g_pConfig->GetGCStressLevel() != 0;
+    }
+};
+#endif // STRESS_HEAP
+
+class NumaNodeInfo
+{
+public:
+    static bool CanEnableGCNumaAware();
+    static void GetGroupForProcessor(uint16_t processor_number, uint16_t * group_number, uint16_t * group_processor_number);
+    static bool GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, uint16_t * node_no);
+};
+
+class CPUGroupInfo
+{
+public:
+    static bool CanEnableGCCPUGroups();
+    static uint32_t GetNumActiveProcessors();
+    static void GetGroupForProcessor(uint16_t processor_number, uint16_t * group_number, uint16_t * group_processor_number);
+};
+
+
+#endif // __GCENV_BASE_INCLUDED__
diff --git a/src/gc/env/gcenv.ee.h b/src/gc/env/gcenv.ee.h
new file mode 100644
index 0000000000..0c1fd4988a
--- /dev/null
+++ b/src/gc/env/gcenv.ee.h
@@ -0,0 +1,85 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// Interface between the GC and EE
+//
+
+#ifndef __GCENV_EE_H__
+#define __GCENV_EE_H__
+
+struct ScanContext;
+class CrawlFrame;
+
+typedef void promote_func(PTR_PTR_Object, ScanContext*, uint32_t);
+
+typedef void enum_alloc_context_func(alloc_context*, void*);
+
+typedef struct
+{
+    promote_func*  f;
+    ScanContext*   sc;
+    CrawlFrame *   cf;
+} GCCONTEXT;
+
+// GC background thread function prototype
+typedef uint32_t (__stdcall *GCBackgroundThreadFunction)(void* param);
+
+class GCToEEInterface
+{
+public:
+    //
+    // Suspend/Resume callbacks
+    //
+    typedef enum
+    {
+        SUSPEND_FOR_GC = 1,
+        SUSPEND_FOR_GC_PREP = 6
+    } SUSPEND_REASON;
+
+    static void SuspendEE(SUSPEND_REASON reason);
+    static void RestartEE(bool bFinishedGC); //resume threads.
+
+    // 
+    // The GC roots enumeration callback
+    //
+    static void GcScanRoots(promote_func* fn, int condemned, int max_gen, ScanContext* sc);
+
+    // 
+    // Callbacks issues during GC that the execution engine can do its own bookeeping
+    //
+
+    // start of GC call back - single threaded
+    static void GcStartWork(int condemned, int max_gen); 
+
+    //EE can perform post stack scanning action, while the 
+    // user threads are still suspended 
+    static void AfterGcScanRoots(int condemned, int max_gen, ScanContext* sc);
+
+    // Called before BGC starts sweeping, the heap is walkable
+    static void GcBeforeBGCSweepWork();
+
+    // post-gc callback.
+    static void GcDone(int condemned);
+
+    // Promote refcounted handle callback
+    static bool RefCountedHandleCallbacks(Object * pObject);
+
+    // Sync block cache management
+    static void SyncBlockCacheWeakPtrScan(HANDLESCANPROC scanProc, uintptr_t lp1, uintptr_t lp2);
+    static void SyncBlockCacheDemote(int max_gen);
+    static void SyncBlockCachePromotionsGranted(int max_gen);
+
+    // Thread functions
+    static bool IsPreemptiveGCDisabled(Thread * pThread);
+    static void EnablePreemptiveGC(Thread * pThread);
+    static void DisablePreemptiveGC(Thread * pThread);
+
+    static alloc_context * GetAllocContext(Thread * pThread);
+    static bool CatchAtSafePoint(Thread * pThread);
+
+    static void GcEnumAllocContexts(enum_alloc_context_func* fn, void* param);
+
+    static Thread* CreateBackgroundThread(GCBackgroundThreadFunction threadStart, void* arg);
+};
+
+#endif // __GCENV_EE_H__
diff --git a/src/gc/env/gcenv.interlocked.h b/src/gc/env/gcenv.interlocked.h
new file mode 100644
index 0000000000..1b1035958e
--- /dev/null
+++ b/src/gc/env/gcenv.interlocked.h
@@ -0,0 +1,101 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// Interlocked operations
+//
+
+#ifndef __GCENV_INTERLOCKED_H__
+#define __GCENV_INTERLOCKED_H__
+
+// Interlocked operations
+class Interlocked
+{
+public:
+
+    // Increment the value of the specified 32-bit variable as an atomic operation.
+    // Parameters:
+    //  addend - variable to be incremented
+    // Return:
+    //  The resulting incremented value
+    template<typename T>
+    static T Increment(T volatile *addend);
+
+    // Decrement the value of the specified 32-bit variable as an atomic operation.
+    // Parameters:
+    //  addend - variable to be decremented
+    // Return:
+    //  The resulting decremented value
+    template<typename T>
+    static T Decrement(T volatile *addend);
+
+    // Perform an atomic AND operation on the specified values values
+    // Parameters:
+    //  destination - the first operand and the destination
+    //  value       - second operand
+    template<typename T>
+    static void And(T volatile *destination, T value);
+
+    // Perform an atomic OR operation on the specified values values
+    // Parameters:
+    //  destination - the first operand and the destination
+    //  value       - second operand
+    template<typename T>
+    static void Or(T volatile *destination, T value);
+
+    // Set a 32-bit variable to the specified value as an atomic operation. 
+    // Parameters:
+    //  destination - value to be exchanged
+    //  value       - value to set the destination to
+    // Return:
+    //  The previous value of the destination
+    template<typename T>
+    static T Exchange(T volatile *destination, T value);
+
+    // Set a pointer variable to the specified value as an atomic operation.
+    // Parameters:
+    //  destination - value to be exchanged
+    //  value       - value to set the destination to
+    // Return:
+    //  The previous value of the destination
+    template <typename T>
+    static T ExchangePointer(T volatile * destination, T value);
+
+    template <typename T>
+    static T ExchangePointer(T volatile * destination, std::nullptr_t value);
+
+    // Perform an atomic addition of two 32-bit values and return the original value of the addend.
+    // Parameters:
+    //  addend - variable to be added to
+    //  value  - value to add
+    // Return:
+    //  The previous value of the addend
+    template<typename T>
+    static T ExchangeAdd(T volatile *addend, T value);
+
+    // Performs an atomic compare-and-exchange operation on the specified values. 
+    // Parameters:
+    //  destination - value to be exchanged
+    //  exchange    - value to set the destination to
+    //  comparand   - value to compare the destination to before setting it to the exchange.
+    //                The destination is set only if the destination is equal to the comparand.
+    // Return:
+    //  The original value of the destination
+    template<typename T>
+    static T CompareExchange(T volatile *destination, T exchange, T comparand);
+
+    // Performs an atomic compare-and-exchange operation on the specified pointers. 
+    // Parameters:
+    //  destination - value to be exchanged
+    //  exchange    - value to set the destination to
+    //  comparand   - value to compare the destination to before setting it to the exchange.
+    //                The destination is set only if the destination is equal to the comparand.
+    // Return:
+    //  The original value of the destination
+    template <typename T>
+    static T CompareExchangePointer(T volatile *destination, T exchange, T comparand);
+
+    template <typename T>
+    static T CompareExchangePointer(T volatile *destination, T exchange, std::nullptr_t comparand);
+};
+
+#endif // __GCENV_INTERLOCKED_H__
diff --git a/src/gc/env/gcenv.interlocked.inl b/src/gc/env/gcenv.interlocked.inl
new file mode 100644
index 0000000000..fd4f839970
--- /dev/null
+++ b/src/gc/env/gcenv.interlocked.inl
@@ -0,0 +1,199 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// __forceinline implementation of the Interlocked class methods
+//
+
+#ifndef __GCENV_INTERLOCKED_INL__
+#define __GCENV_INTERLOCKED_INL__
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif // _MSC_VER
+
+// Increment the value of the specified 32-bit variable as an atomic operation.
+// Parameters:
+//  addend - variable to be incremented
+// Return:
+//  The resulting incremented value
+template <typename T>
+__forceinline T Interlocked::Increment(T volatile *addend)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    return _InterlockedIncrement((long*)addend);
+#else
+    return __sync_add_and_fetch(addend, 1);
+#endif
+}
+
+// Decrement the value of the specified 32-bit variable as an atomic operation.
+// Parameters:
+//  addend - variable to be decremented
+// Return:
+//  The resulting decremented value
+template <typename T>
+__forceinline T Interlocked::Decrement(T volatile *addend)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    return _InterlockedDecrement((long*)addend);
+#else
+    return __sync_sub_and_fetch(addend, 1);
+#endif
+}
+
+// Set a 32-bit variable to the specified value as an atomic operation. 
+// Parameters:
+//  destination - value to be exchanged
+//  value       - value to set the destination to
+// Return:
+//  The previous value of the destination
+template <typename T>
+__forceinline T Interlocked::Exchange(T volatile *destination, T value)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    return _InterlockedExchange((long*)destination, value);
+#else
+    return __sync_swap(destination, value);
+#endif
+}
+
+// Performs an atomic compare-and-exchange operation on the specified values. 
+// Parameters:
+//  destination - value to be exchanged
+//  exchange    - value to set the destinaton to
+//  comparand   - value to compare the destination to before setting it to the exchange.
+//                The destination is set only if the destination is equal to the comparand.
+// Return:
+//  The original value of the destination
+template <typename T>
+__forceinline T Interlocked::CompareExchange(T volatile *destination, T exchange, T comparand)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    return _InterlockedCompareExchange((long*)destination, exchange, comparand);
+#else
+    return __sync_val_compare_and_swap(destination, comparand, exchange);
+#endif
+}
+
+// Perform an atomic addition of two 32-bit values and return the original value of the addend.
+// Parameters:
+//  addend - variable to be added to
+//  value  - value to add
+// Return:
+//  The previous value of the addend
+template <typename T>
+__forceinline T Interlocked::ExchangeAdd(T volatile *addend, T value)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    return _InterlockedExchangeAdd((long*)addend, value);
+#else
+    return __sync_fetch_and_add(addend, value);
+#endif
+}
+
+// Perform an atomic AND operation on the specified values values
+// Parameters:
+//  destination - the first operand and the destination
+//  value       - second operand
+template <typename T>
+__forceinline void Interlocked::And(T volatile *destination, T value)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    _InterlockedAnd((long*)destination, value);
+#else
+    __sync_and_and_fetch(destination, value);
+#endif
+}
+
+// Perform an atomic OR operation on the specified values values
+// Parameters:
+//  destination - the first operand and the destination
+//  value       - second operand
+template <typename T>
+__forceinline void Interlocked::Or(T volatile *destination, T value)
+{
+#ifdef _MSC_VER
+    static_assert(sizeof(long) == sizeof(T), "Size of long must be the same as size of T");
+    _InterlockedOr((long*)destination, value);
+#else
+    __sync_or_and_fetch(destination, value);
+#endif
+}
+
+// Set a pointer variable to the specified value as an atomic operation.
+// Parameters:
+//  destination - value to be exchanged
+//  value       - value to set the destination to
+// Return:
+//  The previous value of the destination
+template <typename T>
+__forceinline T Interlocked::ExchangePointer(T volatile * destination, T value)
+{
+#ifdef _MSC_VER
+#ifdef BIT64
+    return (T)(TADDR)_InterlockedExchangePointer((void* volatile *)destination, value);
+#else
+    return (T)(TADDR)_InterlockedExchange((long volatile *)(void* volatile *)destination, (long)(void*)value);
+#endif
+#else
+    return (T)(TADDR)__sync_swap((void* volatile *)destination, value);
+#endif
+}
+
+template <typename T>
+__forceinline T Interlocked::ExchangePointer(T volatile * destination, std::nullptr_t value)
+{
+#ifdef _MSC_VER
+#ifdef BIT64
+    return (T)(TADDR)_InterlockedExchangePointer((void* volatile *)destination, value);
+#else
+    return (T)(TADDR)_InterlockedExchange((long volatile *)(void* volatile *)destination, (long)(void*)value);
+#endif
+#else
+    return (T)(TADDR)__sync_swap((void* volatile *)destination, value);
+#endif
+}
+
+// Performs an atomic compare-and-exchange operation on the specified pointers. 
+// Parameters:
+//  destination - value to be exchanged
+//  exchange    - value to set the destinaton to
+//  comparand   - value to compare the destination to before setting it to the exchange.
+//                The destination is set only if the destination is equal to the comparand.
+// Return:
+//  The original value of the destination
+template <typename T>
+__forceinline T Interlocked::CompareExchangePointer(T volatile *destination, T exchange, T comparand)
+{
+#ifdef _MSC_VER
+#ifdef BIT64
+    return (T)(TADDR)_InterlockedCompareExchangePointer((void* volatile *)destination, exchange, comparand);
+#else
+    return (T)(TADDR)_InterlockedCompareExchange((long volatile *)(void* volatile *)destination, (long)(void*)exchange, (long)(void*)comparand);
+#endif
+#else
+    return (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, comparand, exchange);
+#endif
+}
+
+template <typename T>
+__forceinline T Interlocked::CompareExchangePointer(T volatile *destination, T exchange, std::nullptr_t comparand)
+{
+#ifdef _MSC_VER
+#ifdef BIT64
+    return (T)(TADDR)_InterlockedCompareExchangePointer((void* volatile *)destination, (void*)exchange, (void*)comparand);
+#else
+    return (T)(TADDR)_InterlockedCompareExchange((long volatile *)(void* volatile *)destination, (long)(void*)exchange, (long)(void*)comparand);
+#endif
+#else
+    return (T)(TADDR)__sync_val_compare_and_swap((void* volatile *)destination, (void*)comparand, (void*)exchange);
+#endif
+}
+
+#endif // __GCENV_INTERLOCKED_INL__
diff --git a/src/gc/env/gcenv.object.h b/src/gc/env/gcenv.object.h
new file mode 100644
index 0000000000..c999e4538e
--- /dev/null
+++ b/src/gc/env/gcenv.object.h
@@ -0,0 +1,148 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//-------------------------------------------------------------------------------------------------
+//
+// Low-level types describing GC object layouts.
+//
+
+// Bits stolen from the sync block index that the GC/HandleTable knows about (currently these are at the same
+// positions as the mainline runtime but we can change this below when it becomes apparent how Redhawk will
+// handle sync blocks).
+#define BIT_SBLK_GC_RESERVE                 0x20000000
+#define BIT_SBLK_FINALIZER_RUN              0x40000000
+
+// The sync block index header (small structure that immediately precedes every object in the GC heap). Only
+// the GC uses this so far, and only to store a couple of bits of information.
+class ObjHeader
+{
+private:
+#if defined(BIT64)
+    uint32_t m_uAlignpad;
+#endif // BIT64
+    uint32_t m_uSyncBlockValue;
+
+public:
+    uint32_t GetBits() { return m_uSyncBlockValue; }
+    void SetBit(uint32_t uBit) { Interlocked::Or(&m_uSyncBlockValue, uBit); }
+    void ClrBit(uint32_t uBit) { Interlocked::And(&m_uSyncBlockValue, ~uBit); }
+    void SetGCBit() { m_uSyncBlockValue |= BIT_SBLK_GC_RESERVE; }
+    void ClrGCBit() { m_uSyncBlockValue &= ~BIT_SBLK_GC_RESERVE; }
+};
+
+#define MTFlag_ContainsPointers 1
+#define MTFlag_HasFinalizer 2
+#define MTFlag_IsArray 4
+
+class MethodTable
+{
+public:
+    uint16_t    m_componentSize;
+    uint16_t    m_flags;
+    uint32_t    m_baseSize;
+
+    MethodTable * m_pRelatedType;
+
+public:
+    void InitializeFreeObject()
+    {
+        m_baseSize = 3 * sizeof(void *);
+        m_componentSize = 1;
+        m_flags = 0;
+    }
+
+    uint32_t GetBaseSize()
+    {
+        return m_baseSize;
+    }
+
+    uint16_t RawGetComponentSize()
+    {
+        return m_componentSize;
+    }
+
+    bool ContainsPointers()
+    {
+        return (m_flags & MTFlag_ContainsPointers) != 0;
+    }
+
+    bool ContainsPointersOrCollectible()
+    {
+        return ContainsPointers();
+    }
+
+    bool HasComponentSize()
+    {
+        return m_componentSize != 0;
+    }
+
+    bool HasFinalizer()
+    {
+        return (m_flags & MTFlag_HasFinalizer) != 0;
+    }
+
+    bool HasCriticalFinalizer()
+    {
+        return false;
+    }
+
+    bool IsArray()
+    {
+        return (m_flags & MTFlag_IsArray) != 0;
+    }
+
+    MethodTable * GetParent()
+    {
+        _ASSERTE(!IsArray());
+        return m_pRelatedType;
+    }
+
+    bool SanityCheck()
+    {
+        return true;
+    }
+};
+
+class Object
+{
+    MethodTable * m_pMethTab;
+
+public:
+    ObjHeader * GetHeader()
+    { 
+        return ((ObjHeader *)this) - 1;
+    }
+
+    MethodTable * RawGetMethodTable() const
+    {
+        return m_pMethTab;
+    }
+
+    MethodTable * GetGCSafeMethodTable() const
+    {
+        return (MethodTable *)((uintptr_t)m_pMethTab & ~3);
+    }
+
+    void RawSetMethodTable(MethodTable * pMT)
+    {
+        m_pMethTab = pMT;
+    }
+};
+#define MIN_OBJECT_SIZE     (2*sizeof(uint8_t*) + sizeof(ObjHeader))
+
+class ArrayBase : public Object
+{
+    uint32_t m_dwLength;
+
+public:
+    uint32_t GetNumComponents()
+    {
+        return m_dwLength;
+    }
+
+    static size_t GetOffsetOfNumComponents()
+    {
+        return offsetof(ArrayBase, m_dwLength);
+    }
+};
diff --git a/src/gc/env/gcenv.os.h b/src/gc/env/gcenv.os.h
new file mode 100644
index 0000000000..bb0153f117
--- /dev/null
+++ b/src/gc/env/gcenv.os.h
@@ -0,0 +1,283 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// Interface between GC and the OS specific functionality
+//
+
+#ifndef __GCENV_OS_H__
+#define __GCENV_OS_H__
+
+// Critical section used by the GC
+class CLRCriticalSection
+{
+    CRITICAL_SECTION m_cs;
+
+public:
+    // Initialize the critical section
+    void Initialize();
+
+    // Destroy the critical section
+    void Destroy();
+
+    // Enter the critical section. Blocks until the section can be entered.
+    void Enter();
+
+    // Leave the critical section
+    void Leave();
+};
+
+// Flags for the GCToOSInterface::VirtualReserve method
+struct VirtualReserveFlags
+{
+    enum
+    {
+        None = 0,
+        WriteWatch = 1,
+    };
+};
+
+// Affinity of a GC thread
+struct GCThreadAffinity
+{
+    static const int None = -1;
+
+    // Processor group index, None if no group is specified
+    int Group;
+    // Processor index, None if no affinity is specified
+    int Processor;
+};
+
+// GC thread function prototype
+typedef void (*GCThreadFunction)(void* param);
+
+// Interface that the GC uses to invoke OS specific functionality
+class GCToOSInterface
+{
+public:
+
+    //
+    // Initialization and shutdown of the interface
+    //
+
+    // Initialize the interface implementation
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool Initialize();
+
+    // Shutdown the interface implementation
+    static void Shutdown();
+
+    //
+    // Virtual memory management
+    //
+
+    // Reserve virtual memory range.
+    // Parameters:
+    //  address   - starting virtual address, it can be NULL to let the function choose the starting address
+    //  size      - size of the virtual memory range
+    //  alignment - requested memory alignment
+    //  flags     - flags to control special settings like write watching
+    // Return:
+    //  Starting virtual address of the reserved range
+    static void* VirtualReserve(void *address, size_t size, size_t alignment, uint32_t flags);
+
+    // Release virtual memory range previously reserved using VirtualReserve
+    // Parameters:
+    //  address - starting virtual address
+    //  size    - size of the virtual memory range
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool VirtualRelease(void *address, size_t size);
+
+    // Commit virtual memory range. It must be part of a range reserved using VirtualReserve.
+    // Parameters:
+    //  address - starting virtual address
+    //  size    - size of the virtual memory range
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool VirtualCommit(void *address, size_t size);
+
+    // Decomit virtual memory range.
+    // Parameters:
+    //  address - starting virtual address
+    //  size    - size of the virtual memory range
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool VirtualDecommit(void *address, size_t size);
+
+    // Reset virtual memory range. Indicates that data in the memory range specified by address and size is no 
+    // longer of interest, but it should not be decommitted.
+    // Parameters:
+    //  address - starting virtual address
+    //  size    - size of the virtual memory range
+    //  unlock  - true if the memory range should also be unlocked
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool VirtualReset(void *address, size_t size, bool unlock);
+
+    //
+    // Write watching
+    //
+
+    // Check if the OS supports write watching
+    static bool SupportsWriteWatch();
+
+    // Reset the write tracking state for the specified virtual memory range.
+    // Parameters:
+    //  address - starting virtual address
+    //  size    - size of the virtual memory range
+    static void ResetWriteWatch(void *address, size_t size);
+
+    // Retrieve addresses of the pages that are written to in a region of virtual memory
+    // Parameters:
+    //  resetState         - true indicates to reset the write tracking state
+    //  address            - starting virtual address
+    //  size               - size of the virtual memory range
+    //  pageAddresses      - buffer that receives an array of page addresses in the memory region
+    //  pageAddressesCount - on input, size of the lpAddresses array, in array elements
+    //                       on output, the number of page addresses that are returned in the array.
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool GetWriteWatch(bool resetState, void* address, size_t size, void** pageAddresses, uintptr_t* pageAddressesCount);
+
+    //
+    // Thread and process
+    //
+
+    // Create a new thread
+    // Parameters:
+    //  function - the function to be executed by the thread
+    //  param    - parameters of the thread
+    //  affinity - processor affinity of the thread
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool CreateThread(GCThreadFunction function, void* param, GCThreadAffinity* affinity);
+
+    // Causes the calling thread to sleep for the specified number of milliseconds
+    // Parameters:
+    //  sleepMSec   - time to sleep before switching to another thread
+    static void Sleep(uint32_t sleepMSec);
+
+    // Causes the calling thread to yield execution to another thread that is ready to run on the current processor.
+    // Parameters:
+    //  switchCount - number of times the YieldThread was called in a loop
+    static void YieldThread(uint32_t switchCount);
+
+    // Get the number of the current processor
+    static uint32_t GetCurrentProcessorNumber();
+
+    // Check if the OS supports getting current processor number
+    static bool CanGetCurrentProcessorNumber();
+
+    // Set ideal processor for the current thread
+    // Parameters:
+    //  processorIndex - index of the processor in the group
+    //  affinity - ideal processor affinity for the thread
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    static bool SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity);
+
+    // Get numeric id of the current thread if possible on the
+    // current platform. It is indended for logging purposes only.
+    // Return:
+    //  Numeric id of the current thread or 0 if the 
+    static uint64_t GetCurrentThreadIdForLogging();
+
+    // Get id of the current process
+    // Return:
+    //  Id of the current process
+    static uint32_t GetCurrentProcessId();
+
+    //
+    // Processor topology
+    //
+
+    // Get number of logical processors
+    static uint32_t GetLogicalCpuCount();
+
+    // Get size of the largest cache on the processor die
+    // Parameters:
+    //  trueSize - true to return true cache size, false to return scaled up size based on
+    //             the processor architecture
+    // Return:
+    //  Size of the cache
+    static size_t GetLargestOnDieCacheSize(bool trueSize = true);
+
+    // Get number of processors assigned to the current process
+    // Return:
+    //  The number of processors
+    static uint32_t GetCurrentProcessCpuCount();
+
+    // Get affinity mask of the current process
+    // Parameters:
+    //  processMask - affinity mask for the specified process
+    //  systemMask  - affinity mask for the system
+    // Return:
+    //  true if it has succeeded, false if it has failed
+    // Remarks:
+    //  A process affinity mask is a bit vector in which each bit represents the processors that
+    //  a process is allowed to run on. A system affinity mask is a bit vector in which each bit
+    //  represents the processors that are configured into a system.
+    //  A process affinity mask is a subset of the system affinity mask. A process is only allowed
+    //  to run on the processors configured into a system. Therefore, the process affinity mask cannot
+    //  specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
+    static bool GetCurrentProcessAffinityMask(uintptr_t *processMask, uintptr_t *systemMask);
+
+    //
+    // Global memory info
+    //
+
+    // Return the size of the user-mode portion of the virtual address space of this process.
+    // Return:
+    //  non zero if it has succeeded, 0 if it has failed
+    static size_t GetVirtualMemoryLimit();
+
+    // Get the physical memory that this process can use.
+    // Return:
+    //  non zero if it has succeeded, 0 if it has failed
+    // Remarks:
+    //  If a process runs with a restricted memory limit, it returns the limit. If there's no limit 
+    //  specified, it returns amount of actual physical memory.
+    static uint64_t GetPhysicalMemoryLimit();
+
+    // Get memory status
+    // Parameters:
+    //  memory_load - A number between 0 and 100 that specifies the approximate percentage of physical memory
+    //      that is in use (0 indicates no memory use and 100 indicates full memory use).
+    //  available_physical - The amount of physical memory currently available, in bytes.
+    //  available_page_file - The maximum amount of memory the current process can commit, in bytes.
+    // Remarks:
+    //  Any parameter can be null.
+    static void GetMemoryStatus(uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file);
+
+    //
+    // Misc
+    //
+
+    // Flush write buffers of processors that are executing threads of the current process
+    static void FlushProcessWriteBuffers();
+
+    // Break into a debugger
+    static void DebugBreak();
+
+    //
+    // Time
+    //
+
+    // Get a high precision performance counter
+    // Return:
+    //  The counter value
+    static int64_t QueryPerformanceCounter();
+
+    // Get a frequency of the high precision performance counter
+    // Return:
+    //  The counter frequency
+    static int64_t QueryPerformanceFrequency();
+
+    // Get a time stamp with a low precision
+    // Return:
+    //  Time stamp in milliseconds
+    static uint32_t GetLowPrecisionTimeStamp();
+};
+
+#endif // __GCENV_OS_H__
diff --git a/src/gc/env/gcenv.structs.h b/src/gc/env/gcenv.structs.h
new file mode 100644
index 0000000000..5887dd7852
--- /dev/null
+++ b/src/gc/env/gcenv.structs.h
@@ -0,0 +1,122 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+#ifndef __GCENV_STRUCTS_INCLUDED__
+#define __GCENV_STRUCTS_INCLUDED__
+//
+// Structs shared between the GC and the environment
+//
+
+struct GCSystemInfo
+{
+    uint32_t dwNumberOfProcessors;
+    uint32_t dwPageSize;
+    uint32_t dwAllocationGranularity;
+};
+
+typedef void * HANDLE;
+
+#ifdef PLATFORM_UNIX
+
+typedef char TCHAR;
+#define _T(s) s
+
+#else
+
+#ifndef _INC_WINDOWS
+typedef wchar_t TCHAR;
+#define _T(s) L##s
+#endif
+
+#endif
+
+#ifdef PLATFORM_UNIX
+
+class EEThreadId
+{
+    pthread_t m_id;
+    // Indicates whether the m_id is valid or not. pthread_t doesn't have any
+    // portable "invalid" value.
+    bool m_isValid;
+
+public:
+    bool IsCurrentThread()
+    {
+        return m_isValid && pthread_equal(m_id, pthread_self());
+    }
+
+    void SetToCurrentThread()
+    {
+        m_id = pthread_self();
+        m_isValid = true;
+    }
+
+    void Clear()
+    {
+        m_isValid = false;
+    }
+};
+
+#else // PLATFORM_UNIX
+
+#ifndef _INC_WINDOWS
+extern "C" uint32_t __stdcall GetCurrentThreadId();
+#endif
+
+class EEThreadId
+{
+    uint32_t m_uiId;
+public:
+
+    bool IsCurrentThread()
+    {
+        return m_uiId == ::GetCurrentThreadId();
+    }
+
+    void SetToCurrentThread()
+    {
+        m_uiId = ::GetCurrentThreadId();
+    }
+
+    void Clear()
+    {
+        m_uiId = 0;
+    }
+};
+
+#endif // PLATFORM_UNIX
+
+#ifndef _INC_WINDOWS
+
+#ifdef PLATFORM_UNIX
+
+typedef struct _RTL_CRITICAL_SECTION {
+    pthread_mutex_t mutex;
+} CRITICAL_SECTION, RTL_CRITICAL_SECTION, *PRTL_CRITICAL_SECTION;
+
+#else
+
+#pragma pack(push, 8)
+
+typedef struct _RTL_CRITICAL_SECTION {
+    void* DebugInfo;
+
+    //
+    //  The following three fields control entering and exiting the critical
+    //  section for the resource
+    //
+
+    int32_t LockCount;
+    int32_t RecursionCount;
+    HANDLE OwningThread;        // from the thread's ClientId->UniqueThread
+    HANDLE LockSemaphore;
+    uintptr_t SpinCount;        // force size on 64-bit systems when packed
+} CRITICAL_SECTION, RTL_CRITICAL_SECTION, *PRTL_CRITICAL_SECTION;
+
+#pragma pack(pop)
+
+#endif
+
+#endif // _INC_WINDOWS
+
+#endif // __GCENV_STRUCTS_INCLUDED__
diff --git a/src/gc/env/gcenv.sync.h b/src/gc/env/gcenv.sync.h
new file mode 100644
index 0000000000..d6bee05a19
--- /dev/null
+++ b/src/gc/env/gcenv.sync.h
@@ -0,0 +1,145 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// -----------------------------------------------------------------------------------------------------------
+//
+// Helper classes expected by the GC
+//
+#define CRST_REENTRANCY         0
+#define CRST_UNSAFE_SAMELEVEL   0
+#define CRST_UNSAFE_ANYMODE     0
+#define CRST_DEBUGGER_THREAD    0
+#define CRST_DEFAULT            0
+
+#define CrstHandleTable         0
+
+typedef int CrstFlags;
+typedef int CrstType;
+
+class CrstStatic
+{
+    CLRCriticalSection m_cs;
+#ifdef _DEBUG
+    EEThreadId m_holderThreadId;
+#endif
+
+public:
+    bool InitNoThrow(CrstType eType, CrstFlags eFlags = CRST_DEFAULT)
+    {
+        m_cs.Initialize();
+        return true;
+    }
+
+    void Destroy()
+    {
+        m_cs.Destroy();
+    }
+
+    void Enter()
+    {
+        m_cs.Enter();
+#ifdef _DEBUG
+        m_holderThreadId.SetToCurrentThread();
+#endif
+    }
+
+    void Leave()
+    {
+#ifdef _DEBUG
+        m_holderThreadId.Clear();
+#endif
+        m_cs.Leave();
+    }
+
+#ifdef _DEBUG
+    EEThreadId GetHolderThreadId()
+    {
+        return m_holderThreadId;
+    }
+
+    bool OwnedByCurrentThread()
+    {
+        return GetHolderThreadId().IsCurrentThread();
+    }
+#endif
+};
+
+class CrstHolder
+{
+    CrstStatic * m_pLock;
+
+public:
+    CrstHolder(CrstStatic * pLock)
+        : m_pLock(pLock)
+    {
+        m_pLock->Enter();
+    }
+
+    ~CrstHolder()
+    {
+        m_pLock->Leave();
+    }
+};
+
+class CrstHolderWithState
+{
+    CrstStatic * m_pLock;
+    bool m_fAcquired;
+
+public:
+    CrstHolderWithState(CrstStatic * pLock, bool fAcquire = true)
+        : m_pLock(pLock), m_fAcquired(fAcquire)
+    {
+        if (fAcquire)
+            m_pLock->Enter();
+    }
+
+    ~CrstHolderWithState()
+    {
+        if (m_fAcquired)
+            m_pLock->Leave();
+    }
+
+    void Acquire()
+    {
+        if (!m_fAcquired)
+        {
+            m_pLock->Enter();
+            m_fAcquired = true;
+        }
+    }
+
+    void Release()
+    {
+        if (m_fAcquired)
+        {
+            m_pLock->Leave();
+            m_fAcquired = false;
+        }
+    }
+
+    CrstStatic * GetValue()
+    {
+        return m_pLock;
+    }
+};
+
+class CLREventStatic
+{
+public:
+    bool CreateAutoEventNoThrow(bool bInitialState);
+    bool CreateManualEventNoThrow(bool bInitialState);
+    bool CreateOSAutoEventNoThrow(bool bInitialState);
+    bool CreateOSManualEventNoThrow(bool bInitialState);
+
+    void CloseEvent();
+    bool IsValid() const;
+    bool Set();
+    bool Reset();
+    uint32_t Wait(uint32_t dwMilliseconds, bool bAlertable);
+
+private:
+    HANDLE  m_hEvent;
+    bool    m_fInitialized;
+};
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
new file mode 100644
index 0000000000..a62b02d33a
--- /dev/null
+++ b/src/gc/gc.cpp
@@ -0,0 +1,36954 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+//
+// #Overview
+//
+// GC automatically manages memory allocated by managed code.
+// The design doc for GC can be found at Documentation/botr/garbage-collection.md
+//
+// This file includes both the code for GC and the allocator. The most common
+// case for a GC to be triggered is from the allocator code. See
+// code:#try_allocate_more_space where it calls GarbageCollectGeneration.
+//
+// Entry points for the allocator are GCHeap::Alloc* which are called by the
+// allocation helpers in gcscan.cpp
+//
+
+#include "gcpriv.h"
+
+#define USE_INTROSORT
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+inline BOOL ShouldTrackMovementForProfilerOrEtw()
+{
+#ifdef GC_PROFILING
+    if (CORProfilerTrackGC())
+        return true;
+#endif
+
+#ifdef FEATURE_EVENT_TRACE
+    if (ETW::GCLog::ShouldTrackMovementForEtw())
+        return true;
+#endif
+
+    return false;
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+BOOL bgc_heap_walk_for_etw_p = FALSE;
+#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE
+
+#if defined(FEATURE_REDHAWK)
+#define MAYBE_UNUSED_VAR(v) v = v
+#else
+#define MAYBE_UNUSED_VAR(v)
+#endif // FEATURE_REDHAWK
+
+#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0))
+
+#ifdef SERVER_GC
+#define partial_size_th 100
+#define num_partial_refs 64
+#else //SERVER_GC
+#define partial_size_th 100
+#define num_partial_refs 32
+#endif //SERVER_GC
+
+#define demotion_plug_len_th (6*1024*1024)
+
+#ifdef BIT64
+#define MARK_STACK_INITIAL_LENGTH 1024
+#else
+#define MARK_STACK_INITIAL_LENGTH 128
+#endif // BIT64
+
+#define LOH_PIN_QUEUE_LENGTH 100
+#define LOH_PIN_DECAY 10
+
+// Right now we support maximum 256 procs - meaning that we will create at most
+// 256 GC threads and 256 GC heaps. 
+#define MAX_SUPPORTED_CPUS 256
+
+#ifdef GC_CONFIG_DRIVEN
+int compact_ratio = 0;
+#endif //GC_CONFIG_DRIVEN
+
+#if defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP) && defined(NO_WRITE_BARRIER)
+#error Software write watch requires write barriers.
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP && NO_WRITE_BARRIER
+
+// See comments in reset_memory.
+BOOL reset_mm_p = TRUE;
+
+#if defined (TRACE_GC) && !defined (DACCESS_COMPILE)
+const char * const allocation_state_str[] = {
+    "start",
+    "can_allocate",
+    "cant_allocate",
+    "try_fit",
+    "try_fit_new_seg",
+    "try_fit_new_seg_after_cg",
+    "try_fit_no_seg",
+    "try_fit_after_cg",
+    "try_fit_after_bgc",
+    "try_free_full_seg_in_bgc", 
+    "try_free_after_bgc",
+    "try_seg_end",
+    "acquire_seg",
+    "acquire_seg_after_cg",
+    "acquire_seg_after_bgc",
+    "check_and_wait_for_bgc",
+    "trigger_full_compact_gc",
+    "trigger_ephemeral_gc",
+    "trigger_2nd_ephemeral_gc",
+    "check_retry_seg"
+};
+#endif //TRACE_GC && !DACCESS_COMPILE
+
+
+// Keep this in sync with the definition of gc_reason
+#if (defined(DT_LOG) || defined(TRACE_GC)) && !defined (DACCESS_COMPILE)
+static const char* const str_gc_reasons[] = 
+{
+    "alloc_soh",
+    "induced",
+    "lowmem",
+    "empty",
+    "alloc_loh",
+    "oos_soh",
+    "oos_loh",
+    "induced_noforce",
+    "gcstress",
+    "induced_lowmem",
+    "induced_compacting"
+};
+
+static const char* const str_gc_pause_modes[] = 
+{
+    "batch",
+    "interactive",
+    "low_latency",
+    "sustained_low_latency",
+    "no_gc"
+};
+#endif // defined(DT_LOG) || defined(TRACE_GC)
+
+inline
+BOOL is_induced (gc_reason reason)
+{
+    return ((reason == reason_induced) ||
+            (reason == reason_induced_noforce) ||
+            (reason == reason_lowmemory) ||
+            (reason == reason_lowmemory_blocking) || 
+            (reason == reason_induced_compacting));
+}
+
+inline
+BOOL is_induced_blocking (gc_reason reason)
+{
+    return ((reason == reason_induced) ||
+            (reason == reason_lowmemory_blocking) || 
+            (reason == reason_induced_compacting));
+}
+
+#ifndef DACCESS_COMPILE
+int64_t qpf;
+
+size_t GetHighPrecisionTimeStamp()
+{
+    int64_t ts = GCToOSInterface::QueryPerformanceCounter();
+    
+    return (size_t)(ts / (qpf / 1000));    
+}
+#endif
+
+#ifdef GC_STATS
+// There is a current and a prior copy of the statistics.  This allows us to display deltas per reporting
+// interval, as well as running totals.  The 'min' and 'max' values require special treatment.  They are
+// Reset (zeroed) in the current statistics when we begin a new interval and they are updated via a
+// comparison with the global min/max.
+GCStatistics g_GCStatistics;
+GCStatistics g_LastGCStatistics;
+
+TCHAR* GCStatistics::logFileName = NULL;
+FILE*  GCStatistics::logFile = NULL;
+
+void GCStatistics::AddGCStats(const gc_mechanisms& settings, size_t timeInMSec)
+{
+#ifdef BACKGROUND_GC
+    if (settings.concurrent)
+    {
+        bgc.Accumulate((uint32_t)timeInMSec*1000);
+        cntBGC++;
+    }
+    else if (settings.background_p)
+    {
+        fgc.Accumulate((uint32_t)timeInMSec*1000);
+        cntFGC++;
+        if (settings.compaction)
+            cntCompactFGC++;
+        assert(settings.condemned_generation < max_generation);
+        cntFGCGen[settings.condemned_generation]++;
+    }
+    else
+#endif // BACKGROUND_GC
+    {
+        ngc.Accumulate((uint32_t)timeInMSec*1000);
+        cntNGC++;
+        if (settings.compaction)
+            cntCompactNGC++;
+        cntNGCGen[settings.condemned_generation]++;
+    }
+
+    if (is_induced (settings.reason))
+        cntReasons[(int)reason_induced]++;
+    else if (settings.stress_induced)
+        cntReasons[(int)reason_gcstress]++;
+    else
+        cntReasons[(int)settings.reason]++;
+
+#ifdef BACKGROUND_GC
+    if (settings.concurrent || !settings.background_p)
+    {
+#endif // BACKGROUND_GC
+        RollOverIfNeeded();
+#ifdef BACKGROUND_GC
+    }
+#endif // BACKGROUND_GC
+}
+
+void GCStatistics::Initialize()
+{
+    LIMITED_METHOD_CONTRACT;
+    // for efficiency sake we're taking a dependency on the layout of a C++ object
+    // with a vtable. protect against violations of our premise:
+    static_assert(offsetof(GCStatistics, cntDisplay) == sizeof(void*),
+            "The first field of GCStatistics follows the pointer sized vtable");
+
+    int podOffs = offsetof(GCStatistics, cntDisplay);       // offset of the first POD field
+    memset((uint8_t*)(&g_GCStatistics)+podOffs, 0, sizeof(g_GCStatistics)-podOffs);
+    memset((uint8_t*)(&g_LastGCStatistics)+podOffs, 0, sizeof(g_LastGCStatistics)-podOffs);
+}
+
+void GCStatistics::DisplayAndUpdate()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (logFileName == NULL || logFile == NULL)
+        return;
+
+    {
+        if (cntDisplay == 0)
+            fprintf(logFile, "\nGCMix **** Initialize *****\n\n");
+            
+        fprintf(logFile, "GCMix **** Summary ***** %d\n", cntDisplay);
+
+        // NGC summary (total, timing info)
+        ngc.DisplayAndUpdate(logFile, "NGC ", &g_LastGCStatistics.ngc, cntNGC, g_LastGCStatistics.cntNGC, msec);
+
+        // FGC summary (total, timing info)
+        fgc.DisplayAndUpdate(logFile, "FGC ", &g_LastGCStatistics.fgc, cntFGC, g_LastGCStatistics.cntFGC, msec);
+
+        // BGC summary
+        bgc.DisplayAndUpdate(logFile, "BGC ", &g_LastGCStatistics.bgc, cntBGC, g_LastGCStatistics.cntBGC, msec);
+
+        // NGC/FGC break out by generation & compacting vs. sweeping
+        fprintf(logFile, "NGC   ");
+        for (int i = max_generation; i >= 0; --i)
+            fprintf(logFile, "gen%d %d (%d). ", i, cntNGCGen[i]-g_LastGCStatistics.cntNGCGen[i], cntNGCGen[i]);
+        fprintf(logFile, "\n");
+
+        fprintf(logFile, "FGC   ");
+        for (int i = max_generation-1; i >= 0; --i)
+            fprintf(logFile, "gen%d %d (%d). ", i, cntFGCGen[i]-g_LastGCStatistics.cntFGCGen[i], cntFGCGen[i]);
+        fprintf(logFile, "\n");
+
+        // Compacting vs. Sweeping break out
+        int _cntSweep = cntNGC-cntCompactNGC;
+        int _cntLastSweep = g_LastGCStatistics.cntNGC-g_LastGCStatistics.cntCompactNGC;
+        fprintf(logFile, "NGC   Sweeping %d (%d) Compacting %d (%d)\n",
+               _cntSweep - _cntLastSweep, _cntSweep,
+               cntCompactNGC - g_LastGCStatistics.cntCompactNGC, cntCompactNGC);
+
+        _cntSweep = cntFGC-cntCompactFGC;
+        _cntLastSweep = g_LastGCStatistics.cntFGC-g_LastGCStatistics.cntCompactFGC;
+        fprintf(logFile, "FGC   Sweeping %d (%d) Compacting %d (%d)\n",
+               _cntSweep - _cntLastSweep, _cntSweep,
+               cntCompactFGC - g_LastGCStatistics.cntCompactFGC, cntCompactFGC);
+
+#ifdef TRACE_GC
+        // GC reasons...
+        for (int reason=(int)reason_alloc_soh; reason <= (int)reason_gcstress; ++reason)
+        {
+            if (cntReasons[reason] != 0)
+                fprintf(logFile, "%s %d (%d). ", str_gc_reasons[reason], 
+                    cntReasons[reason]-g_LastGCStatistics.cntReasons[reason], cntReasons[reason]);
+        }
+#endif // TRACE_GC
+        fprintf(logFile, "\n\n");
+
+        // flush the log file...
+        fflush(logFile);
+    }
+
+    g_LastGCStatistics = *this;
+
+    ngc.Reset();
+    fgc.Reset();
+    bgc.Reset();
+}
+
+#endif // GC_STATS
+
+#ifdef BACKGROUND_GC
+uint32_t bgc_alloc_spin_count = 140;
+uint32_t bgc_alloc_spin_count_loh = 16;
+uint32_t bgc_alloc_spin = 2;
+
+
+inline
+void c_write (uint32_t& place, uint32_t value)
+{
+    Interlocked::Exchange (&place, value);
+    //place = value;
+}
+
+#ifndef DACCESS_COMPILE
+// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC.
+const size_t bgc_min_per_heap = 4*1024*1024;
+
+int gc_heap::gchist_index = 0;
+gc_mechanisms_store gc_heap::gchist[max_history_count];
+
+#ifndef MULTIPLE_HEAPS
+size_t gc_heap::total_promoted_bytes = 0;
+VOLATILE(bgc_state) gc_heap::current_bgc_state = bgc_not_in_process;
+int gc_heap::gchist_index_per_heap = 0;
+gc_heap::gc_history gc_heap::gchist_per_heap[max_history_count];
+#endif //MULTIPLE_HEAPS
+
+void gc_heap::add_to_history_per_heap()
+{
+#ifdef GC_HISTORY
+    gc_history* current_hist = &gchist_per_heap[gchist_index_per_heap];
+    current_hist->gc_index = settings.gc_index;
+    current_hist->current_bgc_state = current_bgc_state;
+    size_t elapsed = dd_gc_elapsed_time (dynamic_data_of (0));
+    current_hist->gc_time_ms = (uint32_t)elapsed;
+    current_hist->gc_efficiency = (elapsed ? (total_promoted_bytes / elapsed) : total_promoted_bytes);
+    current_hist->eph_low = generation_allocation_start (generation_of (max_generation-1));
+    current_hist->gen0_start = generation_allocation_start (generation_of (0));
+    current_hist->eph_high = heap_segment_allocated (ephemeral_heap_segment);
+#ifdef BACKGROUND_GC
+    current_hist->bgc_lowest = background_saved_lowest_address;
+    current_hist->bgc_highest = background_saved_highest_address;
+#endif //BACKGROUND_GC
+    current_hist->fgc_lowest = lowest_address;
+    current_hist->fgc_highest = highest_address;
+    current_hist->g_lowest = g_lowest_address;
+    current_hist->g_highest = g_highest_address;
+
+    gchist_index_per_heap++;
+    if (gchist_index_per_heap == max_history_count)
+    {
+        gchist_index_per_heap = 0;
+    }
+#endif //GC_HISTORY
+}
+
+void gc_heap::add_to_history()
+{
+#ifdef GC_HISTORY
+    gc_mechanisms_store* current_settings = &gchist[gchist_index];
+    current_settings->store (&settings);
+
+    gchist_index++;
+    if (gchist_index == max_history_count)
+    {
+        gchist_index = 0;
+    }
+#endif //GC_HISTORY
+}
+
+#endif //DACCESS_COMPILE
+#endif //BACKGROUND_GC
+
+#if defined(TRACE_GC) && !defined(DACCESS_COMPILE)
+BOOL   gc_log_on = TRUE;
+FILE* gc_log = NULL;
+size_t gc_log_file_size = 0;
+
+size_t gc_buffer_index = 0;
+size_t max_gc_buffers = 0;
+
+static CLRCriticalSection gc_log_lock;
+
+// we keep this much in a buffer and only flush when the buffer is full
+#define gc_log_buffer_size (1024*1024)
+uint8_t* gc_log_buffer = 0;
+size_t gc_log_buffer_offset = 0;
+
+void log_va_msg(const char *fmt, va_list args)
+{
+    gc_log_lock.Enter();
+
+    const int BUFFERSIZE = 512;
+    static char rgchBuffer[BUFFERSIZE];
+    char *  pBuffer  = &rgchBuffer[0];
+
+    pBuffer[0] = '\n';
+    int buffer_start = 1;
+    int pid_len = sprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, "[%5d]", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging());
+    buffer_start += pid_len;
+    memset(&pBuffer[buffer_start], '-', BUFFERSIZE - buffer_start);
+    int msg_len = _vsnprintf(&pBuffer[buffer_start], BUFFERSIZE - buffer_start, fmt, args );
+    if (msg_len == -1)
+    {
+        msg_len = BUFFERSIZE - buffer_start;
+    }
+
+    msg_len += buffer_start;
+
+    if ((gc_log_buffer_offset + msg_len) > (gc_log_buffer_size - 12))
+    {
+        char index_str[8];
+        memset (index_str, '-', 8);
+        sprintf_s (index_str, _countof(index_str), "%d", (int)gc_buffer_index);
+        gc_log_buffer[gc_log_buffer_offset] = '\n';
+        memcpy (gc_log_buffer + (gc_log_buffer_offset + 1), index_str, 8);
+
+        gc_buffer_index++;
+        if (gc_buffer_index > max_gc_buffers)
+        {
+            fseek (gc_log, 0, SEEK_SET);
+            gc_buffer_index = 0;
+        }
+        fwrite(gc_log_buffer, gc_log_buffer_size, 1, gc_log);
+        fflush(gc_log);
+        memset (gc_log_buffer, '*', gc_log_buffer_size);
+        gc_log_buffer_offset = 0;
+    }
+
+    memcpy (gc_log_buffer + gc_log_buffer_offset, pBuffer, msg_len);
+    gc_log_buffer_offset += msg_len;
+
+    gc_log_lock.Leave();
+}
+
+void GCLog (const char *fmt, ... )
+{
+    if (gc_log_on && (gc_log != NULL))
+    {
+        va_list     args;
+        va_start(args, fmt);
+        log_va_msg (fmt, args);
+        va_end(args);
+    }
+}
+#endif // TRACE_GC && !DACCESS_COMPILE
+
+#if defined(GC_CONFIG_DRIVEN) && !defined(DACCESS_COMPILE)
+
+BOOL   gc_config_log_on = FALSE;
+FILE* gc_config_log = NULL;
+
+// we keep this much in a buffer and only flush when the buffer is full
+#define gc_config_log_buffer_size (1*1024) // TEMP
+uint8_t* gc_config_log_buffer = 0;
+size_t gc_config_log_buffer_offset = 0;
+
+// For config since we log so little we keep the whole history. Also it's only
+// ever logged by one thread so no need to synchronize.
+void log_va_msg_config(const char *fmt, va_list args)
+{
+    const int BUFFERSIZE = 256;
+    static char rgchBuffer[BUFFERSIZE];
+    char *  pBuffer  = &rgchBuffer[0];
+
+    pBuffer[0] = '\n';
+    int buffer_start = 1;
+    int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args );
+    assert (msg_len != -1);
+    msg_len += buffer_start;
+
+    if ((gc_config_log_buffer_offset + msg_len) > gc_config_log_buffer_size)
+    {
+        fwrite(gc_config_log_buffer, gc_config_log_buffer_offset, 1, gc_config_log);
+        fflush(gc_config_log);
+        gc_config_log_buffer_offset = 0;
+    }
+
+    memcpy (gc_config_log_buffer + gc_config_log_buffer_offset, pBuffer, msg_len);
+    gc_config_log_buffer_offset += msg_len;
+}
+
+void GCLogConfig (const char *fmt, ... )
+{
+    if (gc_config_log_on && (gc_config_log != NULL))
+    {
+        va_list     args;
+        va_start( args, fmt );
+        log_va_msg_config (fmt, args);
+    }
+}
+#endif // GC_CONFIG_DRIVEN && !DACCESS_COMPILE
+
+#ifdef SYNCHRONIZATION_STATS
+
+// Number of GCs have we done since we last logged.
+static unsigned int         gc_count_during_log;
+ // In ms. This is how often we print out stats.
+static const unsigned int   log_interval = 5000;
+// Time (in ms) when we start a new log interval.
+static unsigned int         log_start_tick;
+static unsigned int         gc_lock_contended;
+// Cycles accumulated in SuspendEE during log_interval.
+static uint64_t             suspend_ee_during_log;
+// Cycles accumulated in RestartEE during log_interval.
+static uint64_t             restart_ee_during_log;
+static uint64_t             gc_during_log;
+
+#endif //SYNCHRONIZATION_STATS
+
+void
+init_sync_log_stats()
+{
+#ifdef SYNCHRONIZATION_STATS
+    if (gc_count_during_log == 0)
+    {
+        gc_heap::init_sync_stats();
+        suspend_ee_during_log = 0;
+        restart_ee_during_log = 0;
+        gc_during_log = 0;
+        gc_lock_contended = 0;
+
+        log_start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+    }
+    gc_count_during_log++;
+#endif //SYNCHRONIZATION_STATS
+}
+
+void
+process_sync_log_stats()
+{
+#ifdef SYNCHRONIZATION_STATS
+
+    unsigned int log_elapsed = GCToOSInterface::GetLowPrecisionTimeStamp() - log_start_tick;
+
+    if (log_elapsed > log_interval)
+    {
+        // Print out the cycles we spent on average in each suspend and restart.
+        printf("\n_________________________________________________________________________________\n"
+            "Past %d(s): #%3d GCs; Total gc_lock contended: %8u; GC: %12u\n"
+            "SuspendEE: %8u; RestartEE: %8u\n",
+            log_interval / 1000,
+            gc_count_during_log,
+            gc_lock_contended,
+            (unsigned int)(gc_during_log / gc_count_during_log),
+            (unsigned int)(suspend_ee_during_log / gc_count_during_log),
+            (unsigned int)(restart_ee_during_log / gc_count_during_log));
+        gc_heap::print_sync_stats(gc_count_during_log);
+
+        gc_count_during_log = 0;
+    }
+#endif //SYNCHRONIZATION_STATS
+}
+
+#ifdef MULTIPLE_HEAPS
+
+enum gc_join_stage
+{
+    gc_join_init_cpu_mapping = 0,
+    gc_join_done = 1,
+    gc_join_generation_determined = 2,
+    gc_join_begin_mark_phase = 3,
+    gc_join_scan_dependent_handles = 4,
+    gc_join_rescan_dependent_handles = 5,
+    gc_join_scan_sizedref_done = 6,
+    gc_join_null_dead_short_weak = 7,
+    gc_join_scan_finalization = 8,
+    gc_join_null_dead_long_weak = 9, 
+    gc_join_null_dead_syncblk = 10, 
+    gc_join_decide_on_compaction = 11, 
+    gc_join_rearrange_segs_compaction = 12, 
+    gc_join_adjust_handle_age_compact = 13,
+    gc_join_adjust_handle_age_sweep = 14,
+    gc_join_begin_relocate_phase = 15,
+    gc_join_relocate_phase_done = 16,
+    gc_join_verify_objects_done = 17,
+    gc_join_start_bgc = 18,
+    gc_join_restart_ee = 19,
+    gc_join_concurrent_overflow = 20,
+    gc_join_suspend_ee = 21,
+    gc_join_bgc_after_ephemeral = 22,
+    gc_join_allow_fgc = 23,
+    gc_join_bgc_sweep = 24,
+    gc_join_suspend_ee_verify = 25,
+    gc_join_restart_ee_verify = 26,
+    gc_join_set_state_free = 27,
+    gc_r_join_update_card_bundle = 28,
+    gc_join_after_absorb = 29, 
+    gc_join_verify_copy_table = 30,
+    gc_join_after_reset = 31,
+    gc_join_after_ephemeral_sweep = 32,
+    gc_join_after_profiler_heap_walk = 33,
+    gc_join_minimal_gc = 34,
+    gc_join_after_commit_soh_no_gc = 35,
+    gc_join_expand_loh_no_gc = 36,
+    gc_join_final_no_gc = 37,
+    gc_join_disable_software_write_watch = 38,
+    gc_join_max = 39
+};
+
+enum gc_join_flavor
+{
+    join_flavor_server_gc = 0,
+    join_flavor_bgc = 1
+};
+
+#define first_thread_arrived 2
+struct join_structure
+{
+    CLREvent joined_event[3]; // the last event in the array is only used for first_thread_arrived.
+    VOLATILE(int32_t) join_lock;
+    VOLATILE(int32_t) r_join_lock;
+    VOLATILE(int32_t) join_restart;
+    VOLATILE(int32_t) r_join_restart; // only used by get_here_first and friends.
+    int n_threads;
+    VOLATILE(BOOL) joined_p;
+    // avoid lock_color and join_lock being on same cache line
+    // make sure to modify this if adding/removing variables to layout
+    char cache_line_separator[HS_CACHE_LINE_SIZE - (3*sizeof(int) + sizeof(int) + sizeof(BOOL))];
+    VOLATILE(int) lock_color;
+    VOLATILE(BOOL) wait_done;
+};
+
+enum join_type 
+{
+    type_last_join = 0, 
+    type_join = 1, 
+    type_restart = 2, 
+    type_first_r_join = 3, 
+    type_r_join = 4
+};
+
+enum join_time 
+{
+    time_start = 0, 
+    time_end = 1
+};
+
+enum join_heap_index
+{
+    join_heap_restart = 100,
+    join_heap_r_restart = 200
+};
+
+struct join_event
+{
+    uint32_t heap;
+    join_time time;
+    join_type type;
+};
+
+class t_join
+{
+    join_structure join_struct;
+
+    int id;
+    gc_join_flavor flavor;
+
+#ifdef JOIN_STATS
+    unsigned int start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq;
+    // remember join id and last thread to arrive so restart can use these
+    int thd;
+    // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval
+    uint32_t start_tick;
+    // counters for joins, in 1000's of clock cycles
+    unsigned int elapsed_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max];
+#endif //JOIN_STATS
+
+public:
+    BOOL init (int n_th, gc_join_flavor f)
+    {
+        dprintf (JOIN_LOG, ("Initializing join structure"));
+        join_struct.n_threads = n_th;
+        join_struct.lock_color = 0;
+        for (int i = 0; i < 3; i++)
+        {
+            if (!join_struct.joined_event[i].IsValid())
+            {
+                join_struct.joined_p = FALSE;
+                dprintf (JOIN_LOG, ("Creating join event %d", i));
+                // TODO - changing this to a non OS event
+                // because this is also used by BGC threads which are 
+                // managed threads and WaitEx does not allow you to wait
+                // for an OS event on a managed thread.
+                // But we are not sure if this plays well in the hosting 
+                // environment.
+                //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE);
+                if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE))
+                    return FALSE;
+            }
+        }
+        join_struct.join_lock = join_struct.n_threads;
+        join_struct.join_restart = join_struct.n_threads - 1;
+        join_struct.r_join_lock = join_struct.n_threads;
+        join_struct.r_join_restart = join_struct.n_threads - 1;
+        join_struct.wait_done = FALSE;
+        flavor = f;
+
+#ifdef JOIN_STATS
+        start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+#endif //JOIN_STATS
+
+        return TRUE;
+    }
+    
+    void destroy ()
+    {
+        dprintf (JOIN_LOG, ("Destroying join structure"));
+        for (int i = 0; i < 3; i++)
+        {
+            if (join_struct.joined_event[i].IsValid())
+                join_struct.joined_event[i].CloseEvent();
+        }
+    }
+
+    inline void fire_event (int heap, join_time time, join_type type, int join_id)
+    {
+        FireEtwGCJoin_V2(heap, time, type, GetClrInstanceId(), join_id);
+    }
+
+    void join (gc_heap* gch, int join_id)
+    {
+#ifdef JOIN_STATS
+        // parallel execution ends here
+        end[gch->heap_number] = GetCycleCount32();
+#endif //JOIN_STATS
+
+        assert (!join_struct.joined_p);
+        int color = join_struct.lock_color;
+
+        if (Interlocked::Decrement(&join_struct.join_lock) != 0)
+        {
+            dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", 
+                flavor, join_id, (int32_t)(join_struct.join_lock)));
+
+            fire_event (gch->heap_number, time_start, type_join, join_id);
+
+            //busy wait around the color
+            if (color == join_struct.lock_color)
+            {
+respin:
+                int spin_count = 4096 * g_SystemInfo.dwNumberOfProcessors;
+                for (int j = 0; j < spin_count; j++)
+                {
+                    if (color != join_struct.lock_color)
+                    {
+                        break;
+                    }
+                    YieldProcessor();           // indicate to the processor that we are spinning
+                }
+
+                // we've spun, and if color still hasn't changed, fall into hard wait
+                if (color == join_struct.lock_color)
+                {
+                    dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d", 
+                        flavor, join_id, color, (int32_t)(join_struct.join_lock)));
+
+                    //Thread* current_thread = GetThread();
+                    //BOOL cooperative_mode = gc_heap::enable_preemptive (current_thread);
+                    uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE);
+                    //gc_heap::disable_preemptive (current_thread, cooperative_mode);
+
+                    if (dwJoinWait != WAIT_OBJECT_0)
+                    {
+                        STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait);
+                        FATAL_GC_ERROR ();
+                    }
+                }
+
+                // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
+                if (color == join_struct.lock_color)
+                {
+                    goto respin;
+                }
+
+                dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d", 
+                    flavor, join_id, (int32_t)(join_struct.join_lock)));
+            }
+
+            fire_event (gch->heap_number, time_end, type_join, join_id);
+
+            // last thread out should reset event
+            if (Interlocked::Decrement(&join_struct.join_restart) == 0)
+            {
+                // the joined event must be set at this point, because the restarting must have done this
+                join_struct.join_restart = join_struct.n_threads - 1;
+//                printf("Reset joined_event %d\n", color);
+            }
+
+#ifdef JOIN_STATS
+            // parallel execution starts here
+            start[gch->heap_number] = GetCycleCount32();
+            Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])/1000);
+#endif //JOIN_STATS
+        }
+        else
+        {
+            fire_event (gch->heap_number, time_start, type_last_join, join_id);
+
+            join_struct.joined_p = TRUE;
+            dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id));
+            join_struct.joined_event[!color].Reset();
+            id = join_id;
+            // this one is alone so it can proceed
+#ifdef JOIN_STATS
+            // remember the join id, the last thread arriving, the start of the sequential phase,
+            // and keep track of the cycles spent waiting in the join
+            thd = gch->heap_number;
+            start_seq = GetCycleCount32();
+            Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number])/1000);
+#endif //JOIN_STATS
+        }
+    }
+
+    // Reverse join - first thread gets here does the work; other threads will only proceed
+    // afte the work is done.
+    // Note that you cannot call this twice in a row on the same thread. Plus there's no 
+    // need to call it twice in row - you should just merge the work.
+    BOOL r_join (gc_heap* gch, int join_id)
+    {
+#ifdef JOIN_STATS
+        // parallel execution ends here
+        end[gch->heap_number] = GetCycleCount32();
+#endif //JOIN_STATS
+
+        if (join_struct.n_threads == 1)
+        {
+            return TRUE;
+        }
+
+        if (Interlocked::Decrement(&join_struct.r_join_lock) != (join_struct.n_threads - 1))
+        {
+            if (!join_struct.wait_done)
+            {
+                dprintf (JOIN_LOG, ("r_join() Waiting..."));
+
+                fire_event (gch->heap_number, time_start, type_join, join_id);
+
+                //busy wait around the color
+                if (!join_struct.wait_done)
+                {
+        respin:
+                    int spin_count = 2 * 4096 * g_SystemInfo.dwNumberOfProcessors;
+                    for (int j = 0; j < spin_count; j++)
+                    {
+                        if (join_struct.wait_done)
+                        {
+                            break;
+                        }
+                        YieldProcessor();           // indicate to the processor that we are spinning
+                    }
+
+                    // we've spun, and if color still hasn't changed, fall into hard wait
+                    if (!join_struct.wait_done)
+                    {
+                        dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
+                        uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
+                        if (dwJoinWait != WAIT_OBJECT_0)
+                        {
+                            STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait);
+                            FATAL_GC_ERROR ();
+                        }
+                    }
+
+                    // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
+                    if (!join_struct.wait_done)
+                    {
+                        goto respin;
+                    }
+
+                    dprintf (JOIN_LOG, ("r_join() done"));
+                }
+
+                fire_event (gch->heap_number, time_end, type_join, join_id);
+
+#ifdef JOIN_STATS
+                // parallel execution starts here
+                start[gch->heap_number] = GetCycleCount32();
+                Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])/1000);
+#endif //JOIN_STATS
+            }
+
+            return FALSE;
+        }
+        else
+        {
+            fire_event (gch->heap_number, time_start, type_first_r_join, join_id);
+            return TRUE;
+        }
+    }
+
+    void restart()
+    {
+#ifdef JOIN_STATS
+        unsigned int elapsed_seq = GetCycleCount32() - start_seq;
+        unsigned int max = 0, sum = 0;
+        for (int i = 0; i < join_struct.n_threads; i++)
+        {
+            unsigned int elapsed = end[i] - start[i];
+            if (max < elapsed)
+                max = elapsed;
+            sum += elapsed;
+        }
+        unsigned int seq_loss = (join_struct.n_threads - 1)*elapsed_seq;
+        unsigned int par_loss = join_struct.n_threads*max - sum;
+        double efficiency = 0.0;
+        if (max > 0)
+            efficiency = sum*100.0/(join_struct.n_threads*max);
+
+        // enable this printf to get statistics on each individual join as it occurs
+//      printf("join #%3d  seq_loss = %5d   par_loss = %5d  efficiency = %3.0f%%\n", join_id, seq_loss/1000, par_loss/1000, efficiency);
+
+        elapsed_total[join_id] += sum/1000;
+        seq_loss_total[join_id] += seq_loss/1000;
+        par_loss_total[join_id] += par_loss/1000;
+
+        // every 10 seconds, print a summary of the time spent in each type of join, in 1000's of clock cycles
+        if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000)
+        {
+            printf("**** summary *****\n");
+            for (int i = 0; i < 16; i++)
+            {
+                printf("join #%3d  seq_loss = %8u  par_loss = %8u  in_join_total = %8u\n", i, seq_loss_total[i], par_loss_total[i], in_join_total[i]);
+                elapsed_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0;
+            }
+            start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+        }
+#endif //JOIN_STATS
+
+        fire_event (join_heap_restart, time_start, type_restart, -1);
+        assert (join_struct.joined_p);
+        join_struct.joined_p = FALSE;
+        join_struct.join_lock = join_struct.n_threads;
+        dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
+//        printf("restart from join #%d at cycle %u from start of gc\n", join_id, GetCycleCount32() - gc_start);
+        int color = join_struct.lock_color;
+        join_struct.lock_color = !color;
+        join_struct.joined_event[color].Set();
+
+//        printf("Set joined_event %d\n", !join_struct.lock_color);
+
+        fire_event (join_heap_restart, time_end, type_restart, -1);
+
+#ifdef JOIN_STATS
+        start[thd] = GetCycleCount32();
+#endif //JOIN_STATS
+    }
+    
+    BOOL joined()
+    {
+        dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
+        return join_struct.joined_p;
+    }
+
+    void r_restart()
+    {
+        if (join_struct.n_threads != 1)
+        {
+            fire_event (join_heap_r_restart, time_start, type_restart, -1);
+            join_struct.wait_done = TRUE;
+            join_struct.joined_event[first_thread_arrived].Set();
+            fire_event (join_heap_r_restart, time_end, type_restart, -1);
+        }
+    }
+
+    void r_init()
+    {
+        if (join_struct.n_threads != 1)
+        {
+            join_struct.r_join_lock = join_struct.n_threads;
+            join_struct.r_join_restart = join_struct.n_threads - 1;
+            join_struct.wait_done = FALSE;
+            join_struct.joined_event[first_thread_arrived].Reset();
+        }
+    }
+};
+
+t_join gc_t_join;
+
+#ifdef BACKGROUND_GC
+t_join bgc_t_join;
+#endif //BACKGROUND_GC
+
+#endif //MULTIPLE_HEAPS
+
+#define spin_and_switch(count_to_spin, expr) \
+{ \
+    for (int j = 0; j < count_to_spin; j++) \
+    { \
+        if (expr) \
+        { \
+            break;\
+        } \
+        YieldProcessor(); \
+    } \
+    if (!(expr)) \
+    { \
+        GCToOSInterface::YieldThread(0); \
+    } \
+}
+
+#ifndef DACCESS_COMPILE
+#ifdef BACKGROUND_GC
+
+#define max_pending_allocs 64
+
+class exclusive_sync
+{
+    // TODO - verify that this is the right syntax for Volatile.
+    VOLATILE(uint8_t*) rwp_object;
+    VOLATILE(int32_t) needs_checking;
+    
+    int spin_count;
+
+    uint8_t cache_separator[HS_CACHE_LINE_SIZE - sizeof (int) - sizeof (int32_t)];
+
+    // TODO - perhaps each object should be on its own cache line...
+    VOLATILE(uint8_t*) alloc_objects[max_pending_allocs];
+
+    int find_free_index ()
+    {
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] == (uint8_t*)0)
+            {
+                return i;
+            }
+        }
+ 
+        return -1;
+    }
+
+public:
+    void init()
+    {
+        spin_count = 32 * (g_SystemInfo.dwNumberOfProcessors - 1);
+        rwp_object = 0;
+        needs_checking = 0;
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            alloc_objects [i] = (uint8_t*)0;
+        }
+    }
+
+    void check()
+    {
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] != (uint8_t*)0)
+            {
+                GCToOSInterface::DebugBreak();
+            }
+        }
+    }
+
+    void bgc_mark_set (uint8_t* obj)
+    {
+        dprintf (3, ("cm: probing %Ix", obj));
+retry:
+        if (Interlocked::Exchange (&needs_checking, 1) == 0)
+        {
+            // If we spend too much time spending all the allocs,
+            // consider adding a high water mark and scan up
+            // to that; we'll need to interlock in done when
+            // we update the high watermark.
+            for (int i = 0; i < max_pending_allocs; i++)
+            {
+                if (obj == alloc_objects[i])
+                {
+                    needs_checking = 0;
+                    dprintf (3, ("cm: will spin"));
+                    spin_and_switch (spin_count, (obj != alloc_objects[i]));
+                    goto retry;
+                }
+            }
+
+            rwp_object = obj;
+            needs_checking = 0;
+            dprintf (3, ("cm: set %Ix", obj));
+            return;
+        }
+        else
+        {
+            spin_and_switch (spin_count, (needs_checking == 0));
+            goto retry;
+        }
+    }
+
+    int loh_alloc_set (uint8_t* obj)
+    {
+        if (!gc_heap::cm_in_progress)
+        {
+            return -1;
+        }
+
+retry:
+        dprintf (3, ("loh alloc: probing %Ix", obj));
+
+        if (Interlocked::Exchange (&needs_checking, 1) == 0)
+        {
+            if (obj == rwp_object)
+            {
+                needs_checking = 0;
+                spin_and_switch (spin_count, (obj != rwp_object));
+                goto retry;
+            }
+            else
+            {
+                int cookie = find_free_index();
+
+                if (cookie != -1)
+                {
+                    alloc_objects[cookie] = obj;
+                    needs_checking = 0;
+                    //if (cookie >= 4)
+                    //{
+                    //    GCToOSInterface::DebugBreak();
+                    //}
+
+                    dprintf (3, ("loh alloc: set %Ix at %d", obj, cookie));
+                    return cookie;
+                } 
+                else 
+                {
+                    needs_checking = 0;
+                    dprintf (3, ("loh alloc: setting %Ix will spin to acquire a free index", obj));
+                    spin_and_switch (spin_count, (find_free_index () != -1));
+                    goto retry;
+                }
+            }
+        }
+        else
+        {
+            dprintf (3, ("loh alloc: will spin on checking %Ix", obj));
+            spin_and_switch (spin_count, (needs_checking == 0));
+            goto retry;
+        }
+    }
+
+    void bgc_mark_done ()
+    {
+        dprintf (3, ("cm: release lock on %Ix", (uint8_t *)rwp_object));
+        rwp_object = 0;
+    }
+
+    void loh_alloc_done_with_index (int index)
+    {
+        dprintf (3, ("loh alloc: release lock on %Ix based on %d", (uint8_t *)alloc_objects[index], index));
+        assert ((index >= 0) && (index < max_pending_allocs)); 
+        alloc_objects[index] = (uint8_t*)0;
+    }
+
+    void loh_alloc_done (uint8_t* obj)
+    {
+#ifdef BACKGROUND_GC
+        if (!gc_heap::cm_in_progress)
+        {
+            return;
+        }
+
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] == obj)
+            {
+                dprintf (3, ("loh alloc: release lock on %Ix at %d", (uint8_t *)alloc_objects[i], i));
+                alloc_objects[i] = (uint8_t*)0;
+                return;
+            }
+        }
+#endif //BACKGROUND_GC
+    }
+};
+
+// Note that this class was written assuming just synchronization between
+// one background GC thread and multiple user threads that might request 
+// an FGC - it does not take into account what kind of locks the multiple
+// user threads might be holding at the time (eg, there could only be one
+// user thread requesting an FGC because it needs to take gc_lock first)
+// so you'll see checks that may not be necessary if you take those conditions
+// into consideration.
+//
+// With the introduction of Server Background GC we no longer use this
+// class to do synchronization between FGCs and BGC.
+class recursive_gc_sync
+{
+    static VOLATILE(int32_t) foreground_request_count;//initial state 0
+    static VOLATILE(BOOL) gc_background_running; //initial state FALSE
+    static VOLATILE(int32_t) foreground_count; // initial state 0;
+    static VOLATILE(uint32_t) foreground_gate; // initial state FALSE;
+    static CLREvent foreground_complete;//Auto Reset
+    static CLREvent foreground_allowed;//Auto Reset
+public:
+    static void begin_background();
+    static void end_background();
+    static void begin_foreground();
+    static void end_foreground();
+    BOOL allow_foreground ();
+    static BOOL init();
+    static void shutdown();
+    static BOOL background_running_p() {return gc_background_running;}
+};
+
+VOLATILE(int32_t) recursive_gc_sync::foreground_request_count = 0;//initial state 0
+VOLATILE(int32_t) recursive_gc_sync::foreground_count = 0; // initial state 0;
+VOLATILE(BOOL) recursive_gc_sync::gc_background_running = FALSE; //initial state FALSE
+VOLATILE(uint32_t) recursive_gc_sync::foreground_gate = 0;
+CLREvent recursive_gc_sync::foreground_complete;//Auto Reset
+CLREvent recursive_gc_sync::foreground_allowed;//Manual Reset
+
+BOOL recursive_gc_sync::init ()
+{
+    foreground_request_count = 0;
+    foreground_count = 0;
+    gc_background_running = FALSE;
+    foreground_gate = 0;
+
+    if (!foreground_complete.CreateOSAutoEventNoThrow(FALSE))
+    {
+        goto error;
+    }
+    if (!foreground_allowed.CreateManualEventNoThrow(FALSE))
+    {
+        goto error;
+    }
+    return TRUE;
+
+error:
+    shutdown();
+    return FALSE;
+
+}
+
+void recursive_gc_sync::shutdown()
+{
+    if (foreground_complete.IsValid())
+        foreground_complete.CloseEvent();
+    if (foreground_allowed.IsValid())
+        foreground_allowed.CloseEvent();
+}
+
+void recursive_gc_sync::begin_background()
+{
+    dprintf (2, ("begin background"));
+    foreground_request_count = 1;
+    foreground_count = 1;
+    foreground_allowed.Reset();
+    gc_background_running = TRUE;
+}
+void recursive_gc_sync::end_background()
+{
+    dprintf (2, ("end background"));
+    gc_background_running = FALSE;
+    foreground_gate = 1;
+    foreground_allowed.Set();
+}
+
+void recursive_gc_sync::begin_foreground()
+{
+    dprintf (2, ("begin_foreground"));
+
+    BOOL cooperative_mode = FALSE;
+    Thread* current_thread = 0;
+
+    if (gc_background_running)
+    {
+        gc_heap::fire_alloc_wait_event_begin (awr_fgc_wait_for_bgc);
+        gc_heap::alloc_wait_event_p = TRUE;
+
+try_again_top:
+
+        Interlocked::Increment (&foreground_request_count);
+
+try_again_no_inc:
+        dprintf(2, ("Waiting sync gc point"));
+        assert (foreground_allowed.IsValid());
+        assert (foreground_complete.IsValid());
+
+        current_thread = GetThread();
+        cooperative_mode = gc_heap::enable_preemptive (current_thread);
+
+        foreground_allowed.Wait(INFINITE, FALSE);
+
+        dprintf(2, ("Waiting sync gc point is done"));
+
+        gc_heap::disable_preemptive (current_thread, cooperative_mode);
+
+        if (foreground_gate)
+        {
+            Interlocked::Increment (&foreground_count);
+            dprintf (2, ("foreground_count: %d", (int32_t)foreground_count));
+            if (foreground_gate)
+            {
+                gc_heap::settings.concurrent = FALSE;
+                return;
+            }
+            else
+            {
+                end_foreground();
+                goto try_again_top;
+            }
+        }
+        else
+        {
+            goto try_again_no_inc;
+        }
+    }
+}
+
+void recursive_gc_sync::end_foreground()
+{
+    dprintf (2, ("end_foreground"));
+    if (gc_background_running)
+    {
+        Interlocked::Decrement (&foreground_request_count);
+        dprintf (2, ("foreground_count before decrement: %d", (int32_t)foreground_count));
+        if (Interlocked::Decrement (&foreground_count) == 0)
+        {
+            //c_write ((BOOL*)&foreground_gate, 0);
+            // TODO - couldn't make the syntax work with Volatile<T>
+            foreground_gate = 0;
+            if (foreground_count == 0)
+            {
+                foreground_allowed.Reset ();
+                dprintf(2, ("setting foreground complete event"));
+                foreground_complete.Set();
+            }
+        }
+    }
+}
+
+inline
+BOOL recursive_gc_sync::allow_foreground()
+{
+    assert (gc_heap::settings.concurrent);
+    dprintf (100, ("enter allow_foreground, f_req_count: %d, f_count: %d",
+                   (int32_t)foreground_request_count, (int32_t)foreground_count));
+
+    BOOL did_fgc = FALSE;
+
+    //if we have suspended the EE, just return because
+    //some thread could be waiting on this to proceed.
+    if (!GCHeap::GcInProgress)
+    {
+        //TODO BACKGROUND_GC This is to stress the concurrency between
+        //background and foreground
+//        gc_heap::disallow_new_allocation (0);
+
+        //GCToOSInterface::YieldThread(0);
+
+        //END of TODO
+        if (foreground_request_count != 0)
+        {
+            //foreground wants to run
+            //save the important settings
+            //TODO BACKGROUND_GC be more selective about the important settings.
+            gc_mechanisms saved_settings = gc_heap::settings;
+            do
+            {
+                did_fgc = TRUE;
+                //c_write ((BOOL*)&foreground_gate, 1);
+                // TODO - couldn't make the syntax work with Volatile<T>
+                foreground_gate = 1;
+                foreground_allowed.Set ();
+                foreground_complete.Wait (INFINITE, FALSE);
+            }while (/*foreground_request_count ||*/ foreground_gate);
+
+            assert (!foreground_gate);
+
+            //restore the important settings
+            gc_heap::settings = saved_settings;
+            GCHeap::GcCondemnedGeneration = gc_heap::settings.condemned_generation;
+            //the background GC shouldn't be using gc_high and gc_low
+            //gc_low = lowest_address;
+            //gc_high = highest_address;
+        }
+
+        //TODO BACKGROUND_GC This is to stress the concurrency between
+        //background and foreground
+//        gc_heap::allow_new_allocation (0);
+        //END of TODO
+    }
+
+    dprintf (100, ("leave allow_foreground"));
+    assert (gc_heap::settings.concurrent);
+    return did_fgc;
+}
+
+#endif //BACKGROUND_GC
+#endif //DACCESS_COMPILE
+
+#if  defined(COUNT_CYCLES) || defined(JOIN_STATS) || defined(SYNCHRONIZATION_STATS)
+#ifdef _MSC_VER
+#pragma warning(disable:4035)
+#endif //_MSC_VER
+
+static
+unsigned        GetCycleCount32()        // enough for about 40 seconds
+{
+__asm   push    EDX
+__asm   _emit   0x0F
+__asm   _emit   0x31
+__asm   pop     EDX
+};
+
+#pragma warning(default:4035)
+
+#endif //COUNT_CYCLES || JOIN_STATS || SYNCHRONIZATION_STATS
+
+#ifdef TIME_GC
+int mark_time, plan_time, sweep_time, reloc_time, compact_time;
+#endif //TIME_GC
+
+#ifndef MULTIPLE_HEAPS
+
+#define ephemeral_low           g_ephemeral_low
+#define ephemeral_high          g_ephemeral_high
+
+#endif // MULTIPLE_HEAPS
+
+#ifdef TRACE_GC
+
+int     print_level     = DEFAULT_GC_PRN_LVL;  //level of detail of the debug trace
+BOOL    trace_gc        = FALSE;
+int       gc_trace_fac = 0;
+hlet* hlet::bindings = 0;
+
+#endif //TRACE_GC
+
+void reset_memory (uint8_t* o, size_t sizeo);
+
+#ifdef WRITE_WATCH
+
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+static bool virtual_alloc_hardware_write_watch = false;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+static bool hardware_write_watch_capability = false;
+
+#ifndef DACCESS_COMPILE
+
+//check if the write watch APIs are supported.
+
+void hardware_write_watch_api_supported()
+{
+    if (GCToOSInterface::SupportsWriteWatch())
+    {
+        hardware_write_watch_capability = true;
+        dprintf (2, ("WriteWatch supported"));
+    }
+    else
+    {
+        dprintf (2,("WriteWatch not supported"));
+    }
+}
+
+#endif //!DACCESS_COMPILE
+
+inline bool can_use_hardware_write_watch()
+{
+    return hardware_write_watch_capability;
+}
+
+inline bool can_use_write_watch_for_gc_heap()
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    return true;
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    return can_use_hardware_write_watch();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
+inline bool can_use_write_watch_for_card_table()
+{
+    return can_use_hardware_write_watch();
+}
+
+#else
+#define mem_reserve (MEM_RESERVE)
+#endif //WRITE_WATCH
+
+//check if the low memory notification is supported
+
+#ifndef DACCESS_COMPILE
+
+void WaitLongerNoInstru (int i)
+{
+    // every 8th attempt:
+    Thread *pCurThread = GetThread();
+    bool bToggleGC = false;
+    if (pCurThread)
+    {
+        bToggleGC = GCToEEInterface::IsPreemptiveGCDisabled(pCurThread);
+        if (bToggleGC)
+            GCToEEInterface::EnablePreemptiveGC(pCurThread);
+    }
+
+    // if we're waiting for gc to finish, we should block immediately
+    if (!g_TrapReturningThreads)
+    {
+        if  (g_SystemInfo.dwNumberOfProcessors > 1)
+        {
+            YieldProcessor();           // indicate to the processor that we are spining
+            if  (i & 0x01f)
+                GCToOSInterface::YieldThread (0);
+            else
+                GCToOSInterface::Sleep (5);
+        }
+        else
+            GCToOSInterface::Sleep (5);
+    }
+
+    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
+    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
+    // It is important that the thread is going to wait for GC.  Otherwise the thread
+    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
+    if (pCurThread)
+    {
+        if (bToggleGC || g_TrapReturningThreads)
+        {
+#ifdef _DEBUG
+            // In debug builds, all enter_spin_lock operations go through this code.  If a GC has
+            // started, it is important to block until the GC thread calls set_gc_done (since it is
+            // guaranteed to have cleared g_TrapReturningThreads by this point).  This avoids livelock
+            // conditions which can otherwise occur if threads are allowed to spin in this function
+            // (and therefore starve the GC thread) between the point when the GC thread sets the
+            // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads.
+            if (gc_heap::gc_started)
+            {
+                gc_heap::wait_for_gc_done();
+            }
+#endif // _DEBUG
+            GCToEEInterface::DisablePreemptiveGC(pCurThread);
+            if (!bToggleGC)
+            {
+                GCToEEInterface::EnablePreemptiveGC(pCurThread);
+            }
+        }
+    }
+    else if (g_TrapReturningThreads)
+    {
+        GCHeap::GetGCHeap()->WaitUntilGCComplete();
+    }
+}
+
+inline
+static void safe_switch_to_thread()
+{
+    Thread* current_thread = GetThread();
+    BOOL cooperative_mode = gc_heap::enable_preemptive(current_thread);
+
+    GCToOSInterface::YieldThread(0);
+
+    gc_heap::disable_preemptive(current_thread, cooperative_mode);
+}
+
+//
+// We need the following methods to have volatile arguments, so that they can accept
+// raw pointers in addition to the results of the & operator on Volatile<T>.
+//
+inline
+static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
+{
+retry:
+
+    if (Interlocked::Exchange (lock, 0) >= 0)
+    {
+        unsigned int i = 0;
+        while (VolatileLoad(lock) >= 0)
+        {
+            if ((++i & 7) && !GCHeap::IsGCInProgress())
+            {
+                if  (g_SystemInfo.dwNumberOfProcessors > 1)
+                {
+#ifndef MULTIPLE_HEAPS
+                    int spin_count = 1024 * g_SystemInfo.dwNumberOfProcessors;
+#else //!MULTIPLE_HEAPS
+                    int spin_count = 32 * g_SystemInfo.dwNumberOfProcessors;
+#endif //!MULTIPLE_HEAPS
+                    for (int j = 0; j < spin_count; j++)
+                    {
+                        if  (VolatileLoad(lock) < 0 || GCHeap::IsGCInProgress())
+                            break;
+                        YieldProcessor();           // indicate to the processor that we are spining
+                    }
+                    if  (VolatileLoad(lock) >= 0 && !GCHeap::IsGCInProgress())
+                    {
+                        safe_switch_to_thread();
+                    }
+                }
+                else
+                {
+                    safe_switch_to_thread();
+                }
+            }
+            else
+            {
+                WaitLongerNoInstru(i);
+            }
+        }
+        goto retry;
+    }
+}
+
+inline
+static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock)
+{
+    return (Interlocked::Exchange (&*lock, 0) < 0);
+}
+
+inline
+static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
+{
+    VolatileStore<int32_t>((int32_t*)lock, -1);
+}
+
+#ifdef _DEBUG
+
+inline
+static void enter_spin_lock(GCSpinLock *pSpinLock)
+{
+    enter_spin_lock_noinstru(&pSpinLock->lock);
+    assert (pSpinLock->holding_thread == (Thread*)-1);
+    pSpinLock->holding_thread = GetThread();
+}
+
+inline
+static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock)
+{
+    BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock);
+    if (ret)
+        pSpinLock->holding_thread = GetThread();
+    return ret;
+}
+
+inline
+static void leave_spin_lock(GCSpinLock *pSpinLock)
+{
+    BOOL gc_thread_p = IsGCSpecialThread();
+//    _ASSERTE((pSpinLock->holding_thread == GetThread()) || gc_thread_p || pSpinLock->released_by_gc_p);
+    pSpinLock->released_by_gc_p = gc_thread_p;
+    pSpinLock->holding_thread = (Thread*) -1;
+    if (pSpinLock->lock != -1)
+        leave_spin_lock_noinstru(&pSpinLock->lock);
+}
+
+#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \
+    _ASSERTE((pSpinLock)->holding_thread == GetThread());
+
+#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \
+    _ASSERTE((pSpinLock)->holding_thread != GetThread());
+
+#else //_DEBUG
+
+//In the concurrent version, the Enable/DisablePreemptiveGC is optional because
+//the gc thread call WaitLonger.
+void WaitLonger (int i
+#ifdef SYNCHRONIZATION_STATS
+    , VOLATILE(GCSpinLock)* spin_lock
+#endif //SYNCHRONIZATION_STATS
+    )
+{
+#ifdef SYNCHRONIZATION_STATS
+    (spin_lock->num_wait_longer)++;
+#endif //SYNCHRONIZATION_STATS
+
+    // every 8th attempt:
+    Thread *pCurThread = GetThread();
+    bool bToggleGC = false;
+    if (pCurThread)
+    {
+        bToggleGC = GCToEEInterface::IsPreemptiveGCDisabled(pCurThread);
+        if (bToggleGC)
+        {
+            GCToEEInterface::EnablePreemptiveGC(pCurThread);
+        }
+        else
+        {
+            assert (!"bToggleGC == TRUE");
+        }
+    }
+
+    // if we're waiting for gc to finish, we should block immediately
+    if (!gc_heap::gc_started)
+    {
+#ifdef SYNCHRONIZATION_STATS
+        (spin_lock->num_switch_thread_w)++;
+#endif //SYNCHRONIZATION_STATS
+        if  (g_SystemInfo.dwNumberOfProcessors > 1)
+        {
+            YieldProcessor();           // indicate to the processor that we are spining
+            if  (i & 0x01f)
+                GCToOSInterface::YieldThread (0);
+            else
+                GCToOSInterface::Sleep (5);
+        }
+        else
+            GCToOSInterface::Sleep (5);
+    }
+
+    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
+    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
+    // It is important that the thread is going to wait for GC.  Otherwise the thread
+    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
+    if (pCurThread)
+    {
+        if (bToggleGC || gc_heap::gc_started)
+        {
+            if (gc_heap::gc_started)
+            {
+                gc_heap::wait_for_gc_done();
+            }
+
+#ifdef SYNCHRONIZATION_STATS
+            (spin_lock->num_disable_preemptive_w)++;
+#endif //SYNCHRONIZATION_STATS
+            GCToEEInterface::DisablePreemptiveGC(pCurThread);
+        }
+    }
+}
+
+inline
+static void enter_spin_lock (GCSpinLock* spin_lock)
+{
+retry:
+
+    if (Interlocked::Exchange (&spin_lock->lock, 0) >= 0)
+    {
+        unsigned int i = 0;
+        while (spin_lock->lock >= 0)
+        {
+            if ((++i & 7) && !gc_heap::gc_started)
+            {
+                if  (g_SystemInfo.dwNumberOfProcessors > 1)
+                {
+#ifndef MULTIPLE_HEAPS
+                    int spin_count = 1024 * g_SystemInfo.dwNumberOfProcessors;
+#else //!MULTIPLE_HEAPS
+                    int spin_count = 32 * g_SystemInfo.dwNumberOfProcessors;
+#endif //!MULTIPLE_HEAPS
+                    for (int j = 0; j < spin_count; j++)
+                    {
+                        if  (spin_lock->lock < 0 || gc_heap::gc_started)
+                            break;
+                        YieldProcessor();           // indicate to the processor that we are spining
+                    }
+                    if  (spin_lock->lock >= 0 && !gc_heap::gc_started)
+                    {
+#ifdef SYNCHRONIZATION_STATS
+                        (spin_lock->num_switch_thread)++;
+#endif //SYNCHRONIZATION_STATS
+                        Thread* current_thread = GetThread();
+                        BOOL cooperative_mode = gc_heap::enable_preemptive (current_thread);
+
+                        GCToOSInterface::YieldThread(0);
+
+                        gc_heap::disable_preemptive (current_thread, cooperative_mode);
+                    }
+                }
+                else
+                    GCToOSInterface::YieldThread(0);
+            }
+            else
+            {
+                WaitLonger(i
+#ifdef SYNCHRONIZATION_STATS
+                        , spin_lock
+#endif //SYNCHRONIZATION_STATS
+                    );
+            }
+        }
+        goto retry;
+    }
+}
+
+inline BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
+{
+    return (Interlocked::Exchange (&spin_lock->lock, 0) < 0);
+}
+
+inline
+static void leave_spin_lock (GCSpinLock * spin_lock)
+{
+    spin_lock->lock = -1;
+}
+
+#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock)
+
+#endif //_DEBUG
+
+BOOL gc_heap::enable_preemptive (Thread* current_thread)
+{
+    bool cooperative_mode = false;
+    if (current_thread)
+    {
+        cooperative_mode = GCToEEInterface::IsPreemptiveGCDisabled(current_thread);
+        if (cooperative_mode)
+        {
+            GCToEEInterface::EnablePreemptiveGC(current_thread);
+        }
+    }
+
+    return cooperative_mode;
+}
+
+void gc_heap::disable_preemptive (Thread* current_thread, BOOL restore_cooperative)
+{
+    if (current_thread)
+    {
+        if (restore_cooperative)
+        {
+            GCToEEInterface::DisablePreemptiveGC(current_thread);
+        }
+    }
+}
+
+#endif // !DACCESS_COMPILE
+
+typedef void **  PTR_PTR;
+//This function clears a piece of memory
+// size has to be Dword aligned
+
+inline
+void memclr ( uint8_t* mem, size_t size)
+{
+    dprintf (3, ("MEMCLR: %Ix, %d", mem, size));
+    assert ((size & (sizeof(PTR_PTR)-1)) == 0);
+    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);
+
+#if 0
+    // The compiler will recognize this pattern and replace it with memset call. We can as well just call 
+    // memset directly to make it obvious what's going on.
+    PTR_PTR m = (PTR_PTR) mem;
+    for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
+        *(m++) = 0;
+#endif
+
+    memset (mem, 0, size);
+}
+
+void memcopy (uint8_t* dmem, uint8_t* smem, size_t size)
+{
+    const size_t sz4ptr = sizeof(PTR_PTR)*4;
+    const size_t sz2ptr = sizeof(PTR_PTR)*2;
+    const size_t sz1ptr = sizeof(PTR_PTR)*1;
+
+    // size must be a multiple of the pointer size
+    assert ((size & (sizeof (PTR_PTR)-1)) == 0);
+    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);
+
+    // copy in groups of four pointer sized things at a time
+    if (size >= sz4ptr)
+    {
+        do
+        {
+            ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
+            ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1];
+            ((PTR_PTR)dmem)[2] = ((PTR_PTR)smem)[2];
+            ((PTR_PTR)dmem)[3] = ((PTR_PTR)smem)[3];
+            dmem += sz4ptr;
+            smem += sz4ptr;
+        }
+        while ((size -= sz4ptr) >= sz4ptr);
+    }
+
+    // still two pointer sized things or more left to copy?
+    if (size & sz2ptr)
+    {
+        ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
+        ((PTR_PTR)dmem)[1] = ((PTR_PTR)smem)[1];
+        dmem += sz2ptr;
+        smem += sz2ptr;
+    }
+
+    // still one pointer sized thing left to copy?
+    if (size & sz1ptr)
+    {
+        ((PTR_PTR)dmem)[0] = ((PTR_PTR)smem)[0];
+        // dmem += sz1ptr;
+        // smem += sz1ptr;
+    }
+
+}
+
+inline
+ptrdiff_t round_down (ptrdiff_t add, int pitch)
+{
+    return ((add / pitch) * pitch);
+}
+
+#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT)
+// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment,
+// i.e, if a larger alignment matters or is beneficial, the compiler
+// generated info tells us so.  RESPECT_LARGE_ALIGNMENT is just the
+// converse - it's a heuristic for the GC to use a larger alignment.
+#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT
+#endif
+
+#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION)
+#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive
+#endif
+
+#if defined(GROWABLE_SEG_MAPPING_TABLE) && !defined(SEG_MAPPING_TABLE)
+#error if GROWABLE_SEG_MAPPING_TABLE is defined, SEG_MAPPING_TABLE must be defined
+#endif
+
+inline
+BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2)
+{
+#ifdef RESPECT_LARGE_ALIGNMENT
+    return ((((size_t)p1 ^ (size_t)p2) & 7) == 0);
+#else
+    UNREFERENCED_PARAMETER(p1);
+    UNREFERENCED_PARAMETER(p2);
+    return TRUE;
+#endif //RESPECT_LARGE_ALIGNMENT
+}
+
+inline 
+size_t switch_alignment_size (BOOL already_padded_p)
+{
+    if (already_padded_p)
+        return DATA_ALIGNMENT;
+    else
+        return (Align (min_obj_size) +((Align (min_obj_size)&DATA_ALIGNMENT)^DATA_ALIGNMENT));
+}
+
+
+#ifdef FEATURE_STRUCTALIGN
+void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad);
+void clear_node_aligninfo (uint8_t *node);
+#else // FEATURE_STRUCTALIGN
+#define node_realigned(node)    (((plug_and_reloc*)(node))[-1].reloc & 1)
+void set_node_realigned (uint8_t* node);
+void clear_node_realigned(uint8_t* node);
+#endif // FEATURE_STRUCTALIGN
+
+inline
+size_t AlignQword (size_t nbytes)
+{
+#ifdef FEATURE_STRUCTALIGN
+    // This function is used to align everything on the large object
+    // heap to an 8-byte boundary, to reduce the number of unaligned
+    // accesses to (say) arrays of doubles.  With FEATURE_STRUCTALIGN,
+    // the compiler dictates the optimal alignment instead of having
+    // a heuristic in the GC.
+    return Align (nbytes);
+#else // FEATURE_STRUCTALIGN
+    return (nbytes + 7) & ~7;
+#endif // FEATURE_STRUCTALIGN
+}
+
+inline
+BOOL Aligned (size_t n)
+{
+    return (n & ALIGNCONST) == 0;
+}
+
+#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *))
+
+#ifdef FEATURE_STRUCTALIGN
+#define MAX_STRUCTALIGN OS_PAGE_SIZE
+#else // FEATURE_STRUCTALIGN
+#define MAX_STRUCTALIGN 0
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef FEATURE_STRUCTALIGN
+inline
+ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment)
+{
+    // The resulting alignpad must be either 0 or at least min_obj_size.
+    // Note that by computing the following difference on unsigned types,
+    // we can do the range check 0 < alignpad < min_obj_size with a
+    // single conditional branch.
+    if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT)
+    {
+        return requiredAlignment;
+    }
+    return 0;
+}
+
+inline
+uint8_t* StructAlign (uint8_t* origPtr, int requiredAlignment, ptrdiff_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
+{
+    // required alignment must be a power of two
+    _ASSERTE(((size_t)origPtr & ALIGNCONST) == 0);
+    _ASSERTE(((requiredAlignment - 1) & requiredAlignment) == 0);
+    _ASSERTE(requiredAlignment >= sizeof(void *));
+    _ASSERTE(requiredAlignment <= MAX_STRUCTALIGN);
+
+    // When this method is invoked for individual objects (i.e., alignmentOffset
+    // is just the size of the PostHeader), what needs to be aligned when
+    // we're done is the pointer to the payload of the object (which means
+    // the actual resulting object pointer is typically not aligned).
+
+    uint8_t* result = (uint8_t*)Align ((size_t)origPtr + alignmentOffset, requiredAlignment-1) - alignmentOffset;
+    ptrdiff_t alignpad = result - origPtr;
+
+    return result + AdjustmentForMinPadSize (alignpad, requiredAlignment);
+}
+
+inline
+ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
+{
+    return StructAlign (plug, requiredAlignment, alignmentOffset) - plug;
+}
+
+BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment)
+{
+    return StructAlign (ptr, requiredAlignment) == ptr;
+}
+
+inline
+ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment)
+{
+    if (requiredAlignment == DATA_ALIGNMENT)
+        return 0;
+    // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the
+    // alignment padding object), the worst-case alignment padding is correspondingly larger
+    // than the required alignment.
+    return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT;
+}
+
+inline
+ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment)
+{
+    if (requiredAlignment <= get_alignment_constant (TRUE)+1)
+        return 0;
+    // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space
+    // for padding before the actual object, it also leaves space for filling a gap after the
+    // actual object.  This is needed on the large object heap, as the outer allocation functions
+    // don't operate on an allocation context (which would have left space for the final gap).
+    return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT;
+}
+
+uint8_t* gc_heap::pad_for_alignment (uint8_t* newAlloc, int requiredAlignment, size_t size, alloc_context* acontext)
+{
+    uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment);
+    if (alignedPtr != newAlloc) {
+        make_unused_array (newAlloc, alignedPtr - newAlloc);
+    }
+    acontext->alloc_ptr = alignedPtr + Align (size);
+    return alignedPtr;
+}
+
+uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignment, size_t size)
+{
+    uint8_t* alignedPtr = StructAlign (newAlloc, requiredAlignment);
+    if (alignedPtr != newAlloc) {
+        make_unused_array (newAlloc, alignedPtr - newAlloc);
+    }
+    if (alignedPtr < newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment)) {
+        make_unused_array (alignedPtr + AlignQword (size), newAlloc + ComputeMaxStructAlignPadLarge (requiredAlignment) - alignedPtr);
+    }
+    return alignedPtr;
+}
+#else // FEATURE_STRUCTALIGN
+#define ComputeMaxStructAlignPad(requiredAlignment) 0
+#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0
+#endif // FEATURE_STRUCTALIGN
+
+//CLR_SIZE  is the max amount of bytes from gen0 that is set to 0 in one chunk
+#ifdef SERVER_GC
+#define CLR_SIZE ((size_t)(8*1024))
+#else //SERVER_GC
+#define CLR_SIZE ((size_t)(8*1024))
+#endif //SERVER_GC
+
+#define END_SPACE_AFTER_GC (LARGE_OBJECT_SIZE + MAX_STRUCTALIGN)
+
+#ifdef BACKGROUND_GC
+#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE)
+#else
+#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE)
+#endif //BACKGROUND_GC
+
+#ifdef SERVER_GC
+
+#ifdef BIT64
+
+#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*256))
+
+#else
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*64))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*32))
+
+#endif  // BIT64
+
+#else //SERVER_GC
+
+#ifdef BIT64
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*256))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*128))
+
+#else
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*16))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*16))
+
+#endif  // BIT64
+
+#endif //SERVER_GC
+
+//amount in bytes of the etw allocation tick
+const size_t etw_allocation_tick = 100*1024;
+
+const size_t low_latency_alloc = 256*1024;
+
+const size_t fgn_check_quantum = 2*1024*1024;
+
+#ifdef MH_SC_MARK
+const int max_snoop_level = 128;
+#endif //MH_SC_MARK
+
+
+#ifdef CARD_BUNDLE
+//threshold of heap size to turn on card bundles.
+#define SH_TH_CARD_BUNDLE  (40*1024*1024)
+#define MH_TH_CARD_BUNDLE  (180*1024*1024)
+#endif //CARD_BUNDLE
+
+#define page_size OS_PAGE_SIZE
+
+#define GC_EPHEMERAL_DECOMMIT_TIMEOUT 5000
+
+inline
+size_t align_on_page (size_t add)
+{
+    return ((add + page_size - 1) & ~(page_size - 1));
+}
+
+inline
+uint8_t* align_on_page (uint8_t* add)
+{
+    return (uint8_t*)align_on_page ((size_t) add);
+}
+
+inline
+size_t align_lower_page (size_t add)
+{
+    return (add & ~(page_size - 1));
+}
+
+inline
+uint8_t* align_lower_page (uint8_t* add)
+{
+    return (uint8_t*)align_lower_page ((size_t)add);
+}
+
+inline
+BOOL power_of_two_p (size_t integer)
+{
+    return !(integer & (integer-1));
+}
+
+inline
+BOOL oddp (size_t integer)
+{
+    return (integer & 1) != 0;
+}
+
+// we only ever use this for WORDs.
+size_t logcount (size_t word)
+{
+    //counts the number of high bits in a 16 bit word.
+    assert (word < 0x10000);
+    size_t count;
+    count = (word & 0x5555) + ( (word >> 1 ) & 0x5555);
+    count = (count & 0x3333) + ( (count >> 2) & 0x3333);
+    count = (count & 0x0F0F) + ( (count >> 4) & 0x0F0F);
+    count = (count & 0x00FF) + ( (count >> 8) & 0x00FF);
+    return count;
+}
+
+//n!=0
+int log2(unsigned int n) 
+{
+    int pos = 0;
+    if (n >= 1<<16) { n >>= 16; pos += 16; }
+    if (n >= 1<< 8) { n >>=  8; pos +=  8; }
+    if (n >= 1<< 4) { n >>=  4; pos +=  4; }
+    if (n >= 1<< 2) { n >>=  2; pos +=  2; }
+    if (n >= 1<< 1) {           pos +=  1; }
+    return pos;
+}
+
+//extract the low bits [0,low[ of a uint32_t
+#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1))
+//extract the high bits [high, 32] of a uint32_t
+#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1))
+
+
+class mark;
+class generation;
+class heap_segment;
+class CObjectHeader;
+class dynamic_data;
+class l_heap;
+class sorted_table;
+class c_synchronize;
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+#ifndef DACCESS_COMPILE
+static
+HRESULT AllocateCFinalize(CFinalize **pCFinalize);
+#endif //!DACCESS_COMPILE
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+uint8_t* tree_search (uint8_t* tree, uint8_t* old_address);
+
+
+#ifdef USE_INTROSORT
+#define _sort introsort::sort
+#else //USE_INTROSORT
+#define _sort qsort1
+void qsort1(uint8_t** low, uint8_t** high, unsigned int depth);
+#endif //USE_INTROSORT
+
+void* virtual_alloc (size_t size);
+void virtual_free (void* add, size_t size);
+
+/* per heap static initialization */
+#ifdef MARK_ARRAY
+#ifndef MULTIPLE_HEAPS
+SPTR_IMPL_NS(uint32_t, WKS, gc_heap, mark_array);
+#endif //!MULTIPLE_HEAPS
+#endif //MARK_ARRAY
+
+#ifdef MARK_LIST
+uint8_t**   gc_heap::g_mark_list;
+
+#ifdef PARALLEL_MARK_LIST_SORT
+uint8_t**   gc_heap::g_mark_list_copy;
+#endif //PARALLEL_MARK_LIST_SORT
+
+size_t      gc_heap::mark_list_size;
+#endif //MARK_LIST
+
+#ifdef SEG_MAPPING_TABLE
+seg_mapping* seg_mapping_table;
+#endif //SEG_MAPPING_TABLE
+
+#if !defined(SEG_MAPPING_TABLE) || defined(FEATURE_BASICFREEZE)
+sorted_table* gc_heap::seg_table;
+#endif //!SEG_MAPPING_TABLE || FEATURE_BASICFREEZE
+
+#ifdef MULTIPLE_HEAPS
+CLREvent    gc_heap::ee_suspend_event;
+size_t      gc_heap::min_balance_threshold = 0;
+#endif //MULTIPLE_HEAPS
+
+VOLATILE(BOOL) gc_heap::gc_started;
+
+#ifdef MULTIPLE_HEAPS
+
+CLREvent    gc_heap::gc_start_event;
+
+bool        gc_heap::gc_thread_no_affinitize_p = false;
+
+SVAL_IMPL_NS(int, SVR, gc_heap, n_heaps);
+SPTR_IMPL_NS(PTR_gc_heap, SVR, gc_heap, g_heaps);
+
+size_t*     gc_heap::g_promoted;
+
+#ifdef MH_SC_MARK
+int*        gc_heap::g_mark_stack_busy;
+#endif //MH_SC_MARK
+
+
+#ifdef BACKGROUND_GC
+size_t*     gc_heap::g_bpromoted;
+#endif //BACKGROUND_GC
+
+#else  //MULTIPLE_HEAPS
+
+size_t      gc_heap::g_promoted;
+
+#ifdef BACKGROUND_GC
+size_t      gc_heap::g_bpromoted;
+#endif //BACKGROUND_GC
+
+#endif //MULTIPLE_HEAPS
+
+size_t      gc_heap::reserved_memory = 0;
+size_t      gc_heap::reserved_memory_limit = 0;
+BOOL        gc_heap::g_low_memory_status;
+
+#ifndef DACCESS_COMPILE
+static gc_reason gc_trigger_reason = reason_empty;
+#endif //DACCESS_COMPILE
+
+gc_mechanisms  gc_heap::settings;
+
+gc_history_global gc_heap::gc_data_global;
+
+size_t      gc_heap::gc_last_ephemeral_decommit_time = 0;
+
+size_t      gc_heap::gc_gen0_desired_high;
+
+#ifdef SHORT_PLUGS
+double       gc_heap::short_plugs_pad_ratio = 0;
+#endif //SHORT_PLUGS
+
+#if defined(BIT64)
+#define MAX_ALLOWED_MEM_LOAD 85
+
+// consider putting this in dynamic data -
+// we may want different values for workstation
+// and server GC.
+#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024)
+
+size_t      gc_heap::youngest_gen_desired_th;
+#endif //BIT64
+
+uint64_t    gc_heap::mem_one_percent;
+
+uint32_t    gc_heap::high_memory_load_th;
+
+uint64_t    gc_heap::total_physical_mem;
+
+uint64_t    gc_heap::entry_available_physical_mem;
+
+#ifdef BACKGROUND_GC
+CLREvent    gc_heap::bgc_start_event;
+
+gc_mechanisms gc_heap::saved_bgc_settings;
+
+CLREvent    gc_heap::background_gc_done_event;
+
+CLREvent    gc_heap::ee_proceed_event;
+
+bool        gc_heap::gc_can_use_concurrent = false;
+
+bool        gc_heap::temp_disable_concurrent_p = false;
+
+uint32_t    gc_heap::cm_in_progress = FALSE;
+
+BOOL        gc_heap::dont_restart_ee_p = FALSE;
+
+BOOL        gc_heap::keep_bgc_threads_p = FALSE;
+
+CLREvent    gc_heap::bgc_threads_sync_event;
+
+BOOL        gc_heap::do_ephemeral_gc_p = FALSE;
+
+BOOL        gc_heap::do_concurrent_p = FALSE;
+
+size_t      gc_heap::ephemeral_fgc_counts[max_generation];
+
+BOOL        gc_heap::alloc_wait_event_p = FALSE;
+
+#if defined (DACCESS_COMPILE) && !defined (MULTIPLE_HEAPS)
+SVAL_IMPL_NS_INIT(gc_heap::c_gc_state, WKS, gc_heap, current_c_gc_state, c_gc_state_free);
+#else
+VOLATILE(gc_heap::c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free;
+#endif //DACCESS_COMPILE && !MULTIPLE_HEAPS
+
+#endif //BACKGROUND_GC
+
+#ifndef MULTIPLE_HEAPS
+#ifdef SPINLOCK_HISTORY
+int         gc_heap::spinlock_info_index = 0;
+spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info + 8];
+#endif //SPINLOCK_HISTORY
+
+size_t      gc_heap::fgn_last_alloc = 0;
+
+int         gc_heap::generation_skip_ratio = 100;
+
+uint64_t    gc_heap::loh_alloc_since_cg = 0;
+
+BOOL        gc_heap::elevation_requested = FALSE;
+
+BOOL        gc_heap::last_gc_before_oom = FALSE;
+
+#ifdef BACKGROUND_GC
+SPTR_IMPL_NS_INIT(uint8_t, WKS, gc_heap, background_saved_lowest_address, 0);
+SPTR_IMPL_NS_INIT(uint8_t, WKS, gc_heap, background_saved_highest_address, 0);
+SPTR_IMPL_NS_INIT(uint8_t, WKS, gc_heap, next_sweep_obj, 0);
+uint8_t*    gc_heap::current_sweep_pos = 0;
+exclusive_sync* gc_heap::bgc_alloc_lock;
+#endif //BACKGROUND_GC
+
+SVAL_IMPL_NS(oom_history, WKS, gc_heap, oom_info);
+
+fgm_history gc_heap::fgm_result;
+
+BOOL        gc_heap::ro_segments_in_range;
+
+size_t      gc_heap::gen0_big_free_spaces = 0;
+
+uint8_t*    gc_heap::lowest_address;
+
+uint8_t*    gc_heap::highest_address;
+
+BOOL        gc_heap::ephemeral_promotion;
+
+uint8_t*    gc_heap::saved_ephemeral_plan_start[NUMBERGENERATIONS-1];
+size_t      gc_heap::saved_ephemeral_plan_start_size[NUMBERGENERATIONS-1];
+
+short*      gc_heap::brick_table;
+
+uint32_t*   gc_heap::card_table;
+
+#ifdef CARD_BUNDLE
+uint32_t*   gc_heap::card_bundle_table;
+#endif //CARD_BUNDLE
+
+uint8_t*    gc_heap::gc_low;
+
+uint8_t*    gc_heap::gc_high;
+
+uint8_t*    gc_heap::demotion_low;
+
+uint8_t*    gc_heap::demotion_high;
+
+BOOL        gc_heap::demote_gen1_p = TRUE;
+
+uint8_t*    gc_heap::last_gen1_pin_end;
+
+gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons;
+
+size_t      gc_heap::etw_allocation_running_amount[2];
+
+int         gc_heap::gc_policy = 0;
+
+size_t      gc_heap::allocation_running_time;
+
+size_t      gc_heap::allocation_running_amount;
+
+SPTR_IMPL_NS_INIT(heap_segment, WKS, gc_heap, ephemeral_heap_segment, 0);
+
+BOOL        gc_heap::blocking_collection = FALSE;
+
+heap_segment* gc_heap::freeable_large_heap_segment = 0;
+
+size_t      gc_heap::time_bgc_last = 0;
+
+size_t      gc_heap::mark_stack_tos = 0;
+
+size_t      gc_heap::mark_stack_bos = 0;
+
+size_t      gc_heap::mark_stack_array_length = 0;
+
+mark*       gc_heap::mark_stack_array = 0;
+
+BOOL        gc_heap::verify_pinned_queue_p = FALSE;
+
+uint8_t*    gc_heap::oldest_pinned_plug = 0;
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+size_t      gc_heap::num_pinned_objects = 0;
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef FEATURE_LOH_COMPACTION
+size_t      gc_heap::loh_pinned_queue_tos = 0;
+
+size_t      gc_heap::loh_pinned_queue_bos = 0;
+
+size_t      gc_heap::loh_pinned_queue_length = 0;
+
+mark*       gc_heap::loh_pinned_queue = 0;
+
+BOOL        gc_heap::loh_compacted_p = FALSE;
+#endif //FEATURE_LOH_COMPACTION
+
+#ifdef BACKGROUND_GC
+
+EEThreadId  gc_heap::bgc_thread_id;
+
+uint8_t*    gc_heap::background_written_addresses [array_size+2];
+
+heap_segment* gc_heap::freeable_small_heap_segment = 0;
+
+size_t      gc_heap::bgc_overflow_count = 0;
+
+size_t      gc_heap::bgc_begin_loh_size = 0;
+size_t      gc_heap::end_loh_size = 0;
+
+uint32_t    gc_heap::bgc_alloc_spin_loh = 0;
+
+size_t      gc_heap::bgc_loh_size_increased = 0;
+
+size_t      gc_heap::bgc_loh_allocated_in_free = 0;
+
+size_t      gc_heap::background_soh_alloc_count = 0;
+
+size_t      gc_heap::background_loh_alloc_count = 0;
+
+uint8_t**   gc_heap::background_mark_stack_tos = 0;
+
+uint8_t**   gc_heap::background_mark_stack_array = 0;
+
+size_t      gc_heap::background_mark_stack_array_length = 0;
+
+uint8_t*    gc_heap::background_min_overflow_address =0;
+
+uint8_t*    gc_heap::background_max_overflow_address =0;
+
+BOOL        gc_heap::processed_soh_overflow_p = FALSE;
+
+uint8_t*    gc_heap::background_min_soh_overflow_address =0;
+
+uint8_t*    gc_heap::background_max_soh_overflow_address =0;
+
+SPTR_IMPL_NS_INIT(heap_segment, WKS, gc_heap, saved_sweep_ephemeral_seg, 0);
+SPTR_IMPL_NS_INIT(uint8_t, WKS, gc_heap, saved_sweep_ephemeral_start, 0);
+
+heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0;
+
+Thread*     gc_heap::bgc_thread = 0;
+
+BOOL        gc_heap::expanded_in_fgc = FALSE;
+
+uint8_t**   gc_heap::c_mark_list = 0;
+
+size_t      gc_heap::c_mark_list_length = 0;
+
+size_t      gc_heap::c_mark_list_index = 0;
+
+gc_history_per_heap gc_heap::bgc_data_per_heap;
+
+BOOL    gc_heap::bgc_thread_running;
+
+CLRCriticalSection gc_heap::bgc_threads_timeout_cs;
+
+CLREvent gc_heap::gc_lh_block_event;
+
+#endif //BACKGROUND_GC
+
+#ifdef MARK_LIST
+uint8_t**   gc_heap::mark_list;
+uint8_t**   gc_heap::mark_list_index;
+uint8_t**   gc_heap::mark_list_end;
+#endif //MARK_LIST
+
+#ifdef SNOOP_STATS
+snoop_stats_data gc_heap::snoop_stat;
+#endif //SNOOP_STATS
+
+uint8_t*    gc_heap::min_overflow_address = MAX_PTR;
+
+uint8_t*    gc_heap::max_overflow_address = 0;
+
+uint8_t*    gc_heap::shigh = 0;
+
+uint8_t*    gc_heap::slow = MAX_PTR;
+
+size_t      gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS];
+
+size_t      gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS];
+
+size_t      gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS];
+
+size_t      gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS];
+
+BOOL        gc_heap::ordered_plug_indices_init = FALSE;
+
+BOOL        gc_heap::use_bestfit = FALSE;
+
+uint8_t*    gc_heap::bestfit_first_pin = 0;
+
+BOOL        gc_heap::commit_end_of_seg = FALSE;
+
+size_t      gc_heap::max_free_space_items = 0;
+
+size_t      gc_heap::free_space_buckets = 0;
+
+size_t      gc_heap::free_space_items = 0;
+
+int         gc_heap::trimmed_free_space_index = 0;
+
+size_t      gc_heap::total_ephemeral_plugs = 0;
+
+seg_free_spaces* gc_heap::bestfit_seg = 0;
+
+size_t      gc_heap::total_ephemeral_size = 0;
+
+#ifdef HEAP_ANALYZE
+
+size_t      gc_heap::internal_root_array_length = initial_internal_roots;
+
+SPTR_IMPL_NS_INIT(PTR_uint8_t, WKS, gc_heap, internal_root_array, 0);
+SVAL_IMPL_NS_INIT(size_t, WKS, gc_heap, internal_root_array_index, 0);
+SVAL_IMPL_NS_INIT(BOOL, WKS, gc_heap, heap_analyze_success, TRUE);
+
+uint8_t*    gc_heap::current_obj = 0;
+size_t      gc_heap::current_obj_size = 0;
+
+#endif //HEAP_ANALYZE
+
+#ifdef GC_CONFIG_DRIVEN
+size_t gc_heap::interesting_data_per_gc[max_idp_count];
+//size_t gc_heap::interesting_data_per_heap[max_idp_count];
+//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count];
+#endif //GC_CONFIG_DRIVEN
+#endif //MULTIPLE_HEAPS
+
+no_gc_region_info gc_heap::current_no_gc_region_info;
+BOOL gc_heap::proceed_with_gc_p = FALSE;
+GCSpinLock gc_heap::gc_lock;
+
+size_t gc_heap::eph_gen_starts_size = 0;
+heap_segment* gc_heap::segment_standby_list;
+size_t        gc_heap::last_gc_index = 0;
+size_t        gc_heap::min_segment_size = 0;
+
+#ifdef GC_CONFIG_DRIVEN
+size_t gc_heap::time_init = 0;
+size_t gc_heap::time_since_init = 0;
+size_t gc_heap::compact_or_sweep_gcs[2];
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef FEATURE_LOH_COMPACTION
+BOOL                   gc_heap::loh_compaction_always_p = FALSE;
+gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default;
+int                    gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY;
+
+#endif //FEATURE_LOH_COMPACTION
+
+CLREvent gc_heap::full_gc_approach_event;
+
+CLREvent gc_heap::full_gc_end_event;
+
+uint32_t gc_heap::fgn_maxgen_percent = 0;
+
+uint32_t gc_heap::fgn_loh_percent = 0;
+
+#ifdef BACKGROUND_GC
+BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE;
+#endif //BACKGROUND_GC
+
+VOLATILE(bool) gc_heap::full_gc_approach_event_set;
+
+size_t gc_heap::full_gc_counts[gc_type_max];
+
+BOOL gc_heap::should_expand_in_full_gc = FALSE;
+
+#ifdef HEAP_ANALYZE
+BOOL        gc_heap::heap_analyze_enabled = FALSE;
+#endif //HEAP_ANALYZE
+
+#ifndef MULTIPLE_HEAPS
+
+#ifndef DACCESS_COMPILE
+extern "C" {
+#endif //!DACCESS_COMPILE
+GARY_IMPL(generation, generation_table,NUMBERGENERATIONS+1);
+#ifdef GC_CONFIG_DRIVEN
+GARY_IMPL(size_t, interesting_data_per_heap, max_idp_count);
+GARY_IMPL(size_t, compact_reasons_per_heap, max_compact_reasons_count);
+GARY_IMPL(size_t, expand_mechanisms_per_heap, max_expand_mechanisms_count);
+GARY_IMPL(size_t, interesting_mechanism_bits_per_heap, max_gc_mechanism_bits_count);
+#endif //GC_CONFIG_DRIVEN
+#ifndef DACCESS_COMPILE
+}
+#endif //!DACCESS_COMPILE
+
+#endif //MULTIPLE_HEAPS
+
+#ifndef MULTIPLE_HEAPS
+
+alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1];
+alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1];
+
+dynamic_data gc_heap::dynamic_data_table [NUMBERGENERATIONS+1];
+gc_history_per_heap gc_heap::gc_data_per_heap;
+size_t gc_heap::maxgen_pinned_compact_before_advance = 0;
+
+SPTR_IMPL_NS_INIT(uint8_t, WKS, gc_heap, alloc_allocated, 0);
+
+size_t gc_heap::allocation_quantum = CLR_SIZE;
+
+GCSpinLock gc_heap::more_space_lock;
+
+#ifdef SYNCHRONIZATION_STATS
+unsigned int gc_heap::good_suspension = 0;
+unsigned int gc_heap::bad_suspension = 0;
+uint64_t     gc_heap::total_msl_acquire = 0;
+unsigned int gc_heap::num_msl_acquired = 0;
+unsigned int gc_heap::num_high_msl_acquire = 0;
+unsigned int gc_heap::num_low_msl_acquire = 0;
+#endif //SYNCHRONIZATION_STATS
+
+size_t   gc_heap::alloc_contexts_used = 0;
+size_t   gc_heap::soh_allocation_no_gc = 0;
+size_t   gc_heap::loh_allocation_no_gc = 0;
+heap_segment* gc_heap::saved_loh_segment_no_gc = 0;
+
+#endif //MULTIPLE_HEAPS
+
+#ifndef MULTIPLE_HEAPS
+
+BOOL        gc_heap::gen0_bricks_cleared = FALSE;
+
+#ifdef FFIND_OBJECT
+int         gc_heap::gen0_must_clear_bricks = 0;
+#endif //FFIND_OBJECT
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+SPTR_IMPL_NS_INIT(CFinalize, WKS, gc_heap, finalize_queue, 0);
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+#endif // MULTIPLE_HEAPS
+
+/* end of per heap static initialization */
+
+/* end of static initialization */
+
+#ifndef DACCESS_COMPILE
+
+void gen_to_condemn_tuning::print (int heap_num)
+{
+#ifdef DT_LOG
+    dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition));
+    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header));
+    gc_condemn_reason_gen r_gen;
+    for (int i = 0; i < gcrg_max; i++)
+    {
+        r_gen = (gc_condemn_reason_gen)(i);
+        str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen));
+    }
+    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen));
+
+    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header));
+    gc_condemn_reason_condition r_condition;
+    for (int i = 0; i < gcrc_max; i++)
+    {
+        r_condition = (gc_condemn_reason_condition)(i);
+        str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition));
+    }
+
+    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition));
+#else
+    UNREFERENCED_PARAMETER(heap_num);
+#endif //DT_LOG
+}
+
+void gc_generation_data::print (int heap_num, int gen_num)
+{
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
+    dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id p %Id np %Id alloc %Id",
+                heap_num, gen_num, 
+                size_before, 
+                free_list_space_before, free_obj_space_before,
+                size_after, 
+                free_list_space_after, free_obj_space_after, 
+                in, pinned_surv, npinned_surv,
+                new_allocation));
+#else
+    UNREFERENCED_PARAMETER(heap_num);
+    UNREFERENCED_PARAMETER(gen_num);
+#endif //SIMPLE_DPRINTF && DT_LOG
+}
+
+void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value)
+{
+    uint32_t* mechanism = &mechanisms[mechanism_per_heap];
+    *mechanism = 0;
+    *mechanism |= mechanism_mask;
+    *mechanism |= (1 << value);
+
+#ifdef DT_LOG
+    gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap];
+    dprintf (DT_LOG_0, ("setting %s: %s", 
+            descr->name,
+            (descr->descr)[value]));
+#endif //DT_LOG
+}
+
+void gc_history_per_heap::print()
+{
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
+    for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++)
+    {
+        gen_data[i].print (heap_index, i);
+    }
+
+    dprintf (DT_LOG_0, ("fla %Id flr %Id esa %Id ca %Id pa %Id paa %Id, rfle %d, ec %Id", 
+                    maxgen_size_info.free_list_allocated,
+                    maxgen_size_info.free_list_rejected,
+                    maxgen_size_info.end_seg_allocated,
+                    maxgen_size_info.condemned_allocated,
+                    maxgen_size_info.pinned_allocated,
+                    maxgen_size_info.pinned_allocated_advance,
+                    maxgen_size_info.running_free_list_efficiency,
+                    extra_gen0_committed));
+
+    int mechanism = 0;
+    gc_mechanism_descr* descr = 0;
+
+    for (int i = 0; i < max_mechanism_per_heap; i++)
+    {
+        mechanism = get_mechanism ((gc_mechanism_per_heap)i);
+
+        if (mechanism >= 0)
+        {
+            descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i];
+            dprintf (DT_LOG_0, ("[%2d]%s%s", 
+                        heap_index,
+                        descr->name, 
+                        (descr->descr)[mechanism]));
+        }
+    }
+#endif //SIMPLE_DPRINTF && DT_LOG
+}
+
+void gc_history_global::print()
+{
+#ifdef DT_LOG
+    char str_settings[64];
+    memset (str_settings, '|', sizeof (char) * 64);
+    str_settings[max_global_mechanisms_count*2] = 0;
+
+    for (int i = 0; i < max_global_mechanisms_count; i++)
+    {
+        str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N');
+    }
+
+    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|"));
+
+    dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings));
+    dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %Id(%d), memload %d",
+                        condemned_generation,
+                        str_gc_reasons[reason],
+                        str_gc_pause_modes[pause_mode],                        
+                        final_youngest_desired,
+                        gen0_reduction_count,
+                        mem_pressure));
+#endif //DT_LOG
+}
+
+void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num)
+{
+    maxgen_size_increase* maxgen_size_info = &(current_gc_data_per_heap->maxgen_size_info);
+    FireEtwGCPerHeapHistory_V3(GetClrInstanceId(),
+                               (uint8_t*)(maxgen_size_info->free_list_allocated),
+                               (uint8_t*)(maxgen_size_info->free_list_rejected),
+                               (uint8_t*)(maxgen_size_info->end_seg_allocated),
+                               (uint8_t*)(maxgen_size_info->condemned_allocated),
+                               (uint8_t*)(maxgen_size_info->pinned_allocated),
+                               (uint8_t*)(maxgen_size_info->pinned_allocated_advance),
+                               maxgen_size_info->running_free_list_efficiency,
+                               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons0(),
+                               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons1(),
+                               current_gc_data_per_heap->mechanisms[gc_heap_compact],
+                               current_gc_data_per_heap->mechanisms[gc_heap_expand],
+                               current_gc_data_per_heap->heap_index,
+                               (uint8_t*)(current_gc_data_per_heap->extra_gen0_committed),
+                               (max_generation + 2),
+                               sizeof (gc_generation_data),
+                               &(current_gc_data_per_heap->gen_data[0]));
+
+    current_gc_data_per_heap->print();
+    current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_num);
+}
+
+void gc_heap::fire_pevents()
+{
+#ifndef CORECLR
+    settings.record (&gc_data_global);
+    gc_data_global.print();
+
+    FireEtwGCGlobalHeapHistory_V2(gc_data_global.final_youngest_desired, 
+                                  gc_data_global.num_heaps, 
+                                  gc_data_global.condemned_generation, 
+                                  gc_data_global.gen0_reduction_count, 
+                                  gc_data_global.reason, 
+                                  gc_data_global.global_mechanims_p, 
+                                  GetClrInstanceId(),
+                                  gc_data_global.pause_mode, 
+                                  gc_data_global.mem_pressure);
+
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
+        fire_per_heap_hist_event (current_gc_data_per_heap, hp->heap_number);
+    }
+#else
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+    fire_per_heap_hist_event (current_gc_data_per_heap, heap_number);
+#endif    
+#endif //!CORECLR
+}
+
+inline BOOL
+gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
+    {
+        case tuning_deciding_condemned_gen:
+        case tuning_deciding_compaction:
+        case tuning_deciding_expansion:
+        case tuning_deciding_full_gc:
+        {
+            ret = (!ephemeral_gen_fit_p (tp));
+            break;
+        }
+        case tuning_deciding_promote_ephemeral:
+        {
+            size_t new_gen0size = approximate_new_allocation();
+            ptrdiff_t plan_ephemeral_size = total_ephemeral_size;
+            
+            dprintf (GTC_LOG, ("h%d: plan eph size is %Id, new gen0 is %Id", 
+                heap_number, plan_ephemeral_size, new_gen0size));
+            ret = ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - (heap_segment_mem (ephemeral_heap_segment))) <
+                    (plan_ephemeral_size + new_gen0size));
+            break;
+        }
+        default:
+            break;
+    }
+
+    return ret;
+}
+
+BOOL 
+gc_heap::dt_high_frag_p (gc_tuning_point tp, 
+                         int gen_number, 
+                         BOOL elevate_p)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
+    {
+        case tuning_deciding_condemned_gen:
+        {
+            dynamic_data* dd = dynamic_data_of (gen_number);
+            float fragmentation_burden = 0;
+
+            if (elevate_p)
+            {
+                ret = (dd_fragmentation (dynamic_data_of (max_generation)) >= dd_max_size(dd));
+                dprintf (GTC_LOG, ("h%d: frag is %Id, max size is %Id",
+                    heap_number, dd_fragmentation (dd), dd_max_size(dd)));
+            }
+            else
+            {
+#ifndef MULTIPLE_HEAPS
+                if (gen_number == max_generation)
+                {
+                    float frag_ratio = (float)(dd_fragmentation (dynamic_data_of (max_generation))) / (float)generation_size (max_generation);
+                    if (frag_ratio > 0.65)
+                    {
+                        dprintf (GTC_LOG, ("g2 FR: %d%%", (int)(frag_ratio*100)));
+                        return TRUE;
+                    }
+                }
+#endif //!MULTIPLE_HEAPS
+                size_t fr = generation_unusable_fragmentation (generation_of (gen_number));
+                ret = (fr > dd_fragmentation_limit(dd));
+                if (ret)
+                {
+                    fragmentation_burden = (float)fr / generation_size (gen_number);
+                    ret = (fragmentation_burden > dd_v_fragmentation_burden_limit (dd));
+                }
+                dprintf (GTC_LOG, ("h%d: gen%d, frag is %Id, alloc effi: %d%%, unusable frag is %Id, ratio is %d",
+                    heap_number, gen_number, dd_fragmentation (dd), 
+                    (int)(100*generation_allocator_efficiency (generation_of (gen_number))),
+                    fr, (int)(fragmentation_burden*100)));
+            }
+            break;
+        }
+        default:
+            break;
+    }
+
+    return ret;
+}
+
+inline BOOL 
+gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
+    {
+        case tuning_deciding_condemned_gen:
+        {
+            if (gen_number == max_generation)
+            {
+                dynamic_data* dd = dynamic_data_of (gen_number);
+                size_t maxgen_allocated = (dd_desired_allocation (dd) - dd_new_allocation (dd));
+                size_t maxgen_total_size = maxgen_allocated + dd_current_size (dd);
+                size_t est_maxgen_surv = (size_t)((float) (maxgen_total_size) * dd_surv (dd));
+                size_t est_maxgen_free = maxgen_total_size - est_maxgen_surv + dd_fragmentation (dd);
+
+                dprintf (GTC_LOG, ("h%d: Total gen2 size: %Id, est gen2 dead space: %Id (s: %d, allocated: %Id), frag: %Id",
+                            heap_number,
+                            maxgen_total_size,
+                            est_maxgen_free, 
+                            (int)(dd_surv (dd) * 100),
+                            maxgen_allocated,
+                            dd_fragmentation (dd)));
+
+                uint32_t num_heaps = 1;
+
+#ifdef MULTIPLE_HEAPS
+                num_heaps = gc_heap::n_heaps;
+#endif //MULTIPLE_HEAPS
+
+                size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps);
+                dprintf (GTC_LOG, ("h%d, min frag is %Id", heap_number, min_frag_th));
+                ret = (est_maxgen_free >= min_frag_th);
+            }
+            else
+            {
+                assert (0);
+            }
+            break;
+        }
+
+        default:
+            break;
+    }
+
+    return ret;
+}
+
+// DTREVIEW: Right now we only estimate gen2 fragmentation. 
+// on 64-bit though we should consider gen1 or even gen0 fragmentatioin as
+// well 
+inline BOOL 
+gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
+    {
+        case tuning_deciding_condemned_gen:
+        {
+            if (gen_number == max_generation)
+            {
+                dynamic_data* dd = dynamic_data_of (gen_number);
+                float est_frag_ratio = 0;
+                if (dd_current_size (dd) == 0)
+                {
+                    est_frag_ratio = 1;
+                }
+                else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0))
+                {
+                    est_frag_ratio = 0;
+                }
+                else
+                {
+                    est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd));
+                }
+                
+                size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio));
+                dprintf (GTC_LOG, ("h%d: gen%d: current_size is %Id, frag is %Id, est_frag_ratio is %d%%, estimated frag is %Id", 
+                    heap_number,
+                    gen_number,
+                    dd_current_size (dd),
+                    dd_fragmentation (dd),
+                    (int)(est_frag_ratio*100),
+                    est_frag));
+
+                uint32_t num_heaps = 1;
+
+#ifdef MULTIPLE_HEAPS
+                num_heaps = gc_heap::n_heaps;
+#endif //MULTIPLE_HEAPS
+                uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps);
+                //dprintf (GTC_LOG, ("h%d, min frag is %I64d", heap_number, min_frag_th));
+                ret = (est_frag >= min_frag_th);
+            }
+            else
+            {
+                assert (0);
+            }
+            break;
+        }
+
+        default:
+            break;
+    }
+
+    return ret;
+}
+
+inline BOOL 
+gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
+    {
+    case tuning_deciding_condemned_gen:
+    {
+        /* promote into max-generation if the card table has too many
+        * generation faults besides the n -> 0
+        */
+        ret = (generation_skip_ratio < 30);
+        break;
+    }
+
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+inline BOOL
+in_range_for_segment(uint8_t* add, heap_segment* seg)
+{
+    return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg)));
+}
+
+#if !defined(SEG_MAPPING_TABLE) || defined(FEATURE_BASICFREEZE)
+// The array we allocate is organized as follows:
+// 0th element is the address of the last array we allocated.
+// starting from the 1st element are the segment addresses, that's
+// what buckets() returns.
+struct bk
+{
+    uint8_t* add;
+    size_t val;
+};
+
+class sorted_table
+{
+private:
+    ptrdiff_t size;
+    ptrdiff_t count;
+    bk* slots;
+    bk* buckets() { return (slots + 1); }
+    uint8_t*& last_slot (bk* arr) { return arr[0].add; }
+    bk* old_slots;
+public:
+    static  sorted_table* make_sorted_table ();
+    BOOL    insert (uint8_t* add, size_t val);;
+    size_t  lookup (uint8_t*& add);
+    void    remove (uint8_t* add);
+    void    clear ();
+    void    delete_sorted_table();
+    void    delete_old_slots();
+    void    enqueue_old_slot(bk* sl);
+    BOOL    ensure_space_for_insert();
+};
+
+sorted_table*
+sorted_table::make_sorted_table ()
+{
+    size_t size = 400;
+
+    // allocate one more bk to store the older slot address.
+    sorted_table* res = (sorted_table*)new char [sizeof (sorted_table) + (size + 1) * sizeof (bk)];
+    if (!res)
+        return 0;
+    res->size = size;
+    res->slots = (bk*)(res + 1);
+    res->old_slots = 0;
+    res->clear();
+    return res;
+}
+
+void
+sorted_table::delete_sorted_table()
+{
+    if (slots != (bk*)(this+1))
+    {
+        delete slots;
+    }
+    delete_old_slots();
+    delete this;
+}
+void
+sorted_table::delete_old_slots()
+{
+    uint8_t* sl = (uint8_t*)old_slots;
+    while (sl)
+    {
+        uint8_t* dsl = sl;
+        sl = last_slot ((bk*)sl);
+        delete dsl;
+    }
+    old_slots = 0;
+}
+void
+sorted_table::enqueue_old_slot(bk* sl)
+{
+    last_slot (sl) = (uint8_t*)old_slots;
+    old_slots = sl;
+}
+
+inline
+size_t
+sorted_table::lookup (uint8_t*& add)
+{
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
+        {
+            if ((ti > 0) && (buck[ti-1].add <= add))
+            {
+                add = buck[ti-1].add;
+                return buck[ti - 1].val;
+            }
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
+            {
+                add = buck[ti].add;
+                return buck[ti].val;
+            }
+            low = mid + 1;
+        }
+    }
+    add = 0;
+    return 0;
+}
+
+BOOL
+sorted_table::ensure_space_for_insert()
+{
+    if (count == size)
+    {
+        size = (size * 3)/2;
+        assert((size * sizeof (bk)) > 0);
+        bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)];
+        assert (res);
+        if (!res)
+            return FALSE;
+
+        last_slot (res) = 0;
+        memcpy (((bk*)res + 1), buckets(), count * sizeof (bk));
+        bk* last_old_slots = slots;
+        slots = res;
+        if (last_old_slots != (bk*)(this + 1))
+            enqueue_old_slot (last_old_slots);
+    }
+    return TRUE;
+}
+
+BOOL
+sorted_table::insert (uint8_t* add, size_t val)
+{
+    //grow if no more room
+    assert (count < size);
+
+    //insert sorted
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
+        {
+            if ((ti == 0) || (buck[ti-1].add <= add))
+            {
+                // found insertion point
+                for (ptrdiff_t k = count; k > ti;k--)
+                {
+                    buck [k] = buck [k-1];
+                }
+                buck[ti].add = add;
+                buck[ti].val = val;
+                count++;
+                return TRUE;
+            }
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
+            {
+                //found the insertion point
+                for (ptrdiff_t k = count; k > ti+1;k--)
+                {
+                    buck [k] = buck [k-1];
+                }
+                buck[ti+1].add = add;
+                buck[ti+1].val = val;
+                count++;
+                return TRUE;
+            }
+            low = mid + 1;
+        }
+    }
+    assert (0);
+    return TRUE;
+}
+
+void
+sorted_table::remove (uint8_t* add)
+{
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
+        {
+            if (buck[ti-1].add <= add)
+            {
+                // found the guy to remove
+                for (ptrdiff_t k = ti; k < count; k++)
+                    buck[k-1] = buck[k];
+                count--;
+                return;
+            }
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
+            {
+                // found the guy to remove
+                for (ptrdiff_t k = ti+1; k < count; k++)
+                    buck[k-1] = buck[k];
+                count--;
+                return;
+            }
+            low = mid + 1;
+        }
+    }
+    assert (0);
+}
+
+void
+sorted_table::clear()
+{
+    count = 1;
+    buckets()[0].add = MAX_PTR;
+}
+#endif //!SEG_MAPPING_TABLE || FEATURE_BASICFREEZE
+
+#ifdef SEG_MAPPING_TABLE
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+inline
+uint8_t* align_on_segment (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + (gc_heap::min_segment_size - 1)) & ~(gc_heap::min_segment_size - 1));
+}
+
+inline
+uint8_t* align_lower_segment (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(gc_heap::min_segment_size - 1));
+}
+
+size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end)
+{
+    from = align_lower_segment (from);
+    end = align_on_segment (end);
+    dprintf (1, ("from: %Ix, end: %Ix, size: %Ix", from, end, sizeof (seg_mapping)*(((end - from) / gc_heap::min_segment_size))));
+    return sizeof (seg_mapping)*((end - from) / gc_heap::min_segment_size);
+}
+
+// for seg_mapping_table we want it to start from a pointer sized address.
+inline
+size_t align_for_seg_mapping_table (size_t size)
+{
+    return ((size + (sizeof (uint8_t*) - 1)) &~ (sizeof (uint8_t*) - 1));
+}
+
+inline
+size_t seg_mapping_word_of (uint8_t* add)
+{
+    return (size_t)add / gc_heap::min_segment_size;
+}
+#else //GROWABLE_SEG_MAPPING_TABLE
+BOOL seg_mapping_table_init()
+{
+#ifdef BIT64
+    uint64_t total_address_space = (uint64_t)8*1024*1024*1024*1024;
+#else
+    uint64_t total_address_space = (uint64_t)4*1024*1024*1024;
+#endif // BIT64
+
+    size_t num_entries = (size_t)(total_address_space / gc_heap::min_segment_size);
+    seg_mapping_table = new seg_mapping[num_entries];
+
+    if (seg_mapping_table)
+    {
+        memset (seg_mapping_table, 0, num_entries * sizeof (seg_mapping));
+        dprintf (1, ("created %d entries for heap mapping (%Id bytes)", 
+                     num_entries, (num_entries * sizeof (seg_mapping))));
+        return TRUE;
+    }
+    else
+    {
+        dprintf (1, ("failed to create %d entries for heap mapping (%Id bytes)", 
+                     num_entries, (num_entries * sizeof (seg_mapping))));
+        return FALSE;
+    }
+}
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+#ifdef FEATURE_BASICFREEZE
+inline
+size_t ro_seg_begin_index (heap_segment* seg)
+{
+    size_t begin_index = (size_t)seg / gc_heap::min_segment_size;
+    begin_index = max (begin_index, (size_t)g_lowest_address / gc_heap::min_segment_size);
+    return begin_index;
+}
+
+inline
+size_t ro_seg_end_index (heap_segment* seg)
+{
+    size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) / gc_heap::min_segment_size;
+    end_index = min (end_index, (size_t)g_highest_address / gc_heap::min_segment_size);
+    return end_index;
+}
+
+void seg_mapping_table_add_ro_segment (heap_segment* seg)
+{
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+    if ((heap_segment_reserved (seg) <= g_lowest_address) || (heap_segment_mem (seg) >= g_highest_address))
+        return;
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+    for (size_t entry_index = ro_seg_begin_index (seg); entry_index <= ro_seg_end_index (seg); entry_index++)
+        seg_mapping_table[entry_index].seg1 = (heap_segment*)((size_t)seg_mapping_table[entry_index].seg1 | ro_in_entry);
+}
+
+void seg_mapping_table_remove_ro_segment (heap_segment* seg)
+{
+    UNREFERENCED_PARAMETER(seg);
+#if 0
+// POSSIBLE PERF TODO: right now we are not doing anything because we can't simply remove the flag. If it proves
+// to be a perf problem, we can search in the current ro segs and see if any lands in this range and only
+// remove the flag if none lands in this range.
+#endif //0
+}
+
+heap_segment* ro_segment_lookup (uint8_t* o)
+{
+    uint8_t* ro_seg_start = o;
+    heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start);
+
+    if (ro_seg_start && in_range_for_segment (o, seg))
+        return seg;
+    else
+        return 0;
+}
+
+#endif //FEATURE_BASICFREEZE
+
+void gc_heap::seg_mapping_table_add_segment (heap_segment* seg, gc_heap* hp)
+{
+    size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1);
+    size_t begin_index = (size_t)seg / gc_heap::min_segment_size;
+    seg_mapping* begin_entry = &seg_mapping_table[begin_index];
+    size_t end_index = seg_end / gc_heap::min_segment_size;
+    seg_mapping* end_entry = &seg_mapping_table[end_index];
+    dprintf (1, ("adding seg %Ix(%d)-%Ix(%d)", 
+        seg, begin_index, heap_segment_reserved (seg), end_index));
+
+    dprintf (1, ("before add: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", 
+        begin_index, (seg_mapping_table[begin_index].boundary + 1),
+        end_index, (seg_mapping_table[end_index].boundary + 1)));
+
+#ifdef MULTIPLE_HEAPS
+#ifdef SIMPLE_DPRINTF
+    dprintf (1, ("begin %d: h0: %Ix(%d), h1: %Ix(%d); end %d: h0: %Ix(%d), h1: %Ix(%d)",
+        begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1),
+        (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1),
+        end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1),
+        (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1)));
+#endif //SIMPLE_DPRINTF
+    assert (end_entry->boundary == 0);
+    assert (end_entry->h0 == 0);
+    end_entry->h0 = hp;
+    assert (begin_entry->h1 == 0);
+    begin_entry->h1 = hp;
+#else
+    UNREFERENCED_PARAMETER(hp);
+#endif //MULTIPLE_HEAPS
+
+    end_entry->boundary = (uint8_t*)seg_end;
+
+    dprintf (1, ("set entry %d seg1 and %d seg0 to %Ix", begin_index, end_index, seg));
+    assert ((begin_entry->seg1 == 0) || ((size_t)(begin_entry->seg1) == ro_in_entry));
+    begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) | (size_t)seg);
+    end_entry->seg0 = seg;
+
+    // for every entry inbetween we need to set its heap too.
+    for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++)
+    {
+        assert (seg_mapping_table[entry_index].boundary == 0);
+#ifdef MULTIPLE_HEAPS
+        assert (seg_mapping_table[entry_index].h0 == 0);
+        seg_mapping_table[entry_index].h1 = hp;
+#endif //MULTIPLE_HEAPS
+        seg_mapping_table[entry_index].seg1 = seg;
+    }
+
+    dprintf (1, ("after add: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", 
+        begin_index, (seg_mapping_table[begin_index].boundary + 1),
+        end_index, (seg_mapping_table[end_index].boundary + 1)));
+#if defined(MULTIPLE_HEAPS) && defined(SIMPLE_DPRINTF)
+    dprintf (1, ("begin %d: h0: %Ix(%d), h1: %Ix(%d); end: %d h0: %Ix(%d), h1: %Ix(%d)",
+        begin_index, (uint8_t*)(begin_entry->h0), (begin_entry->h0 ? begin_entry->h0->heap_number : -1),
+        (uint8_t*)(begin_entry->h1), (begin_entry->h1 ? begin_entry->h1->heap_number : -1),
+        end_index, (uint8_t*)(end_entry->h0), (end_entry->h0 ? end_entry->h0->heap_number : -1),
+        (uint8_t*)(end_entry->h1), (end_entry->h1 ? end_entry->h1->heap_number : -1)));
+#endif //MULTIPLE_HEAPS && SIMPLE_DPRINTF
+}
+
+void gc_heap::seg_mapping_table_remove_segment (heap_segment* seg)
+{
+    size_t seg_end = (size_t)(heap_segment_reserved (seg) - 1);
+    size_t begin_index = (size_t)seg / gc_heap::min_segment_size;
+    seg_mapping* begin_entry = &seg_mapping_table[begin_index];
+    size_t end_index = seg_end / gc_heap::min_segment_size;
+    seg_mapping* end_entry = &seg_mapping_table[end_index];
+    dprintf (1, ("removing seg %Ix(%d)-%Ix(%d)", 
+        seg, begin_index, heap_segment_reserved (seg), end_index));
+
+    assert (end_entry->boundary == (uint8_t*)seg_end);
+    end_entry->boundary = 0;
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = heap_segment_heap (seg);
+    assert (end_entry->h0 == hp);
+    end_entry->h0 = 0;
+    assert (begin_entry->h1 == hp);
+    begin_entry->h1 = 0;
+#endif //MULTIPLE_HEAPS
+
+    assert (begin_entry->seg1 != 0);
+    begin_entry->seg1 = (heap_segment*)((size_t)(begin_entry->seg1) & ro_in_entry);
+    end_entry->seg0 = 0;
+
+    // for every entry inbetween we need to reset its heap too.
+    for (size_t entry_index = (begin_index + 1); entry_index <= (end_index - 1); entry_index++)
+    {
+        assert (seg_mapping_table[entry_index].boundary == 0);
+#ifdef MULTIPLE_HEAPS
+        assert (seg_mapping_table[entry_index].h0 == 0);
+        assert (seg_mapping_table[entry_index].h1 == hp);
+        seg_mapping_table[entry_index].h1 = 0;
+#endif //MULTIPLE_HEAPS
+        seg_mapping_table[entry_index].seg1 = 0;
+    }
+
+    dprintf (1, ("after remove: begin entry%d: boundary: %Ix; end entry: %d: boundary: %Ix", 
+        begin_index, (seg_mapping_table[begin_index].boundary + 1),
+        end_index, (seg_mapping_table[end_index].boundary + 1)));
+#ifdef MULTIPLE_HEAPS
+    dprintf (1, ("begin %d: h0: %Ix, h1: %Ix; end: %d h0: %Ix, h1: %Ix",
+        begin_index, (uint8_t*)(begin_entry->h0), (uint8_t*)(begin_entry->h1),
+        end_index, (uint8_t*)(end_entry->h0), (uint8_t*)(end_entry->h1)));
+#endif //MULTIPLE_HEAPS
+}
+
+#ifdef MULTIPLE_HEAPS
+inline
+gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o)
+{
+    size_t index = (size_t)o / gc_heap::min_segment_size;
+    seg_mapping* entry = &seg_mapping_table[index];
+
+    gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0);
+
+    dprintf (2, ("checking obj %Ix, index is %Id, entry: boundry: %Ix, h0: %Ix, seg0: %Ix, h1: %Ix, seg1: %Ix",
+        o, index, (entry->boundary + 1), 
+        (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0),
+        (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1)));
+
+#ifdef _DEBUG
+    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
+#ifdef FEATURE_BASICFREEZE
+    if ((size_t)seg & ro_in_entry)
+        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
+#endif //FEATURE_BASICFREEZE
+
+    if (seg)
+    {
+        if (in_range_for_segment (o, seg))
+        {
+            dprintf (2, ("obj %Ix belongs to segment %Ix(-%Ix)", o, seg, (uint8_t*)heap_segment_allocated (seg)));
+        }
+        else
+        {
+            dprintf (2, ("found seg %Ix(-%Ix) for obj %Ix, but it's not on the seg", 
+                seg, (uint8_t*)heap_segment_allocated (seg), o));
+        }
+    }
+    else
+    {
+        dprintf (2, ("could not find obj %Ix in any existing segments", o));
+    }
+#endif //_DEBUG
+
+    return hp;
+}
+
+gc_heap* seg_mapping_table_heap_of (uint8_t* o)
+{
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+    if ((o < g_lowest_address) || (o >= g_highest_address))
+        return 0;
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+    return seg_mapping_table_heap_of_worker (o);
+}
+
+gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o)
+{
+#if defined(FEATURE_BASICFREEZE) && defined(GROWABLE_SEG_MAPPING_TABLE)
+    if ((o < g_lowest_address) || (o >= g_highest_address))
+        return 0;
+#endif //FEATURE_BASICFREEZE || GROWABLE_SEG_MAPPING_TABLE
+
+    return seg_mapping_table_heap_of_worker (o);
+}
+#endif //MULTIPLE_HEAPS
+
+// Only returns a valid seg if we can actually find o on the seg.
+heap_segment* seg_mapping_table_segment_of (uint8_t* o)
+{
+#if defined(FEATURE_BASICFREEZE) && defined(GROWABLE_SEG_MAPPING_TABLE)
+    if ((o < g_lowest_address) || (o >= g_highest_address))
+#ifdef FEATURE_BASICFREEZE
+        return ro_segment_lookup (o);
+#else
+        return 0;
+#endif //FEATURE_BASICFREEZE
+#endif //FEATURE_BASICFREEZE || GROWABLE_SEG_MAPPING_TABLE
+
+    size_t index = (size_t)o / gc_heap::min_segment_size;
+    seg_mapping* entry = &seg_mapping_table[index];
+
+    dprintf (2, ("checking obj %Ix, index is %Id, entry: boundry: %Ix, seg0: %Ix, seg1: %Ix",
+        o, index, (entry->boundary + 1), 
+        (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1)));
+
+    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
+#ifdef FEATURE_BASICFREEZE
+    if ((size_t)seg & ro_in_entry)
+        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
+#endif //FEATURE_BASICFREEZE
+
+    if (seg)
+    {
+        // Can't assert this when it's callled by everyone (it's true when it's called by mark cards).
+        //assert (in_range_for_segment (o, seg));
+        if (in_range_for_segment (o, seg))
+        {
+            dprintf (2, ("obj %Ix belongs to segment %Ix(-%Ix)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg)));
+        }
+        else
+        {
+            dprintf (2, ("found seg %Ix(-%Ix) for obj %Ix, but it's not on the seg, setting it to 0", 
+                (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o));
+            seg = 0;
+        }
+    }
+    else
+    {
+        dprintf (2, ("could not find obj %Ix in any existing segments", o));
+    }
+
+#ifdef FEATURE_BASICFREEZE
+    // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro 
+    // segments whenever the ro segment falls into the [g_lowest_address,g_highest_address) range.  I.e., it had an 
+    // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression.  However, at the moment, grow_brick_card_table does 
+    // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest) 
+    // range changes.  We should probably go ahead and modify grow_brick_card_table and put back the 
+    // "&& (size_t)(entry->seg1) & ro_in_entry" here.
+    if (!seg)
+    {
+        seg = ro_segment_lookup (o);
+        if (seg && !in_range_for_segment (o, seg))
+            seg = 0;
+    }
+#endif //FEATURE_BASICFREEZE
+
+    return seg;
+}
+#endif //SEG_MAPPING_TABLE
+
+size_t gcard_of ( uint8_t*);
+void gset_card (size_t card);
+
+#define memref(i) *(uint8_t**)(i)
+
+//GC Flags
+#define GC_MARKED       (size_t)0x1
+#define slot(i, j) ((uint8_t**)(i))[j+1]
+
+#define free_object_base_size (plug_skew + sizeof(ArrayBase))
+
+class CObjectHeader : public Object
+{
+public:
+
+#ifdef FEATURE_REDHAWK
+    // The GC expects the following methods that are provided by the Object class in the CLR but not provided
+    // by Redhawk's version of Object.
+    uint32_t GetNumComponents()
+    {
+        return ((ArrayBase *)this)->GetNumComponents();
+    }
+
+    void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = TRUE)
+    {
+        UNREFERENCED_PARAMETER(bVerifyNextHeader);
+
+        if (this == NULL)
+            return;
+
+        MethodTable * pMT = GetMethodTable();
+
+        _ASSERTE(pMT->SanityCheck());
+
+        bool noRangeChecks =
+            (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_NO_RANGE_CHECKS) == EEConfig::HEAPVERIFY_NO_RANGE_CHECKS;
+
+        BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE;
+        if (!noRangeChecks)
+        {
+            fSmallObjectHeapPtr = GCHeap::GetGCHeap()->IsHeapPointer(this, TRUE);
+            if (!fSmallObjectHeapPtr)
+                fLargeObjectHeapPtr = GCHeap::GetGCHeap()->IsHeapPointer(this);
+
+            _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr);
+        }
+
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment()));
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef FEATURE_64BIT_ALIGNMENT
+        if (pMT->RequiresAlign8())
+        {
+            _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U));
+        }
+#endif // FEATURE_64BIT_ALIGNMENT
+
+#ifdef VERIFY_HEAP
+        if (bDeep && (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC))
+            GCHeap::GetGCHeap()->ValidateObjectMember(this);
+#endif
+        if (fSmallObjectHeapPtr)
+        {
+#ifdef FEATURE_BASICFREEZE
+            _ASSERTE(!GCHeap::GetGCHeap()->IsLargeObject(pMT) || GCHeap::GetGCHeap()->IsInFrozenSegment(this));
+#else
+            _ASSERTE(!GCHeap::GetGCHeap()->IsLargeObject(pMT));
+#endif
+        }
+    }
+
+    void ValidatePromote(ScanContext *sc, uint32_t flags)
+    {
+        UNREFERENCED_PARAMETER(sc);
+        UNREFERENCED_PARAMETER(flags);
+
+        Validate();
+    }
+
+    void ValidateHeap(Object *from, BOOL bDeep)
+    {
+        UNREFERENCED_PARAMETER(from);
+
+        Validate(bDeep, FALSE);
+    }
+
+    ADIndex GetAppDomainIndex()
+    {
+        return (ADIndex)RH_DEFAULT_DOMAIN_ID;
+    }
+#endif //FEATURE_REDHAWK
+
+    /////
+    //
+    // Header Status Information
+    //
+
+    MethodTable    *GetMethodTable() const
+    {
+        return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~(GC_MARKED))));
+    }
+
+    void SetMarked()
+    {
+        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED));
+    }
+
+    BOOL IsMarked() const
+    {
+        return !!(((size_t)RawGetMethodTable()) & GC_MARKED);
+    }
+
+    void SetPinned()
+    {
+        assert (!(gc_heap::settings.concurrent));
+        GetHeader()->SetGCBit();
+    }
+
+    BOOL IsPinned() const
+    {
+        return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE);
+    }
+
+    void ClearMarked()
+    {
+        RawSetMethodTable( GetMethodTable() );
+    }
+
+    CGCDesc *GetSlotMap ()
+    {
+        assert (GetMethodTable()->ContainsPointers());
+        return CGCDesc::GetCGCDescFromMT(GetMethodTable());
+    }
+
+    void SetFree(size_t size)
+    {
+        assert (size >= free_object_base_size);
+
+        assert (g_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size);
+        assert (g_pFreeObjectMethodTable->RawGetComponentSize() == 1);
+
+        RawSetMethodTable( g_pFreeObjectMethodTable );
+
+        size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()];
+        *numComponentsPtr = size - free_object_base_size;
+#ifdef VERIFY_HEAP
+        //This introduces a bug in the free list management. 
+        //((void**) this)[-1] = 0;    // clear the sync block,
+        assert (*numComponentsPtr >= 0);
+        if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+            memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr);
+#endif //VERIFY_HEAP
+    }
+
+    void UnsetFree()
+    {
+        size_t size = free_object_base_size - plug_skew;
+
+        // since we only need to clear 2 ptr size, we do it manually
+        PTR_PTR m = (PTR_PTR) this;
+        for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
+            *(m++) = 0;
+    }
+
+    BOOL IsFree () const
+    {
+        return (GetMethodTable() == g_pFreeObjectMethodTable);
+    }
+
+#ifdef FEATURE_STRUCTALIGN
+    int GetRequiredAlignment () const
+    {
+        return GetMethodTable()->GetRequiredAlignment();
+    }
+#endif // FEATURE_STRUCTALIGN
+
+    BOOL ContainsPointers() const
+    {
+        return GetMethodTable()->ContainsPointers();
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    BOOL Collectible() const
+    {
+        return GetMethodTable()->Collectible();
+    }
+
+    FORCEINLINE BOOL ContainsPointersOrCollectible() const
+    {
+        MethodTable *pMethodTable = GetMethodTable();
+        return (pMethodTable->ContainsPointers() || pMethodTable->Collectible());
+    }
+#endif //COLLECTIBLE_CLASS
+
+    Object* GetObjectBase() const
+    {
+        return (Object*) this;
+    }
+};
+
+#define header(i) ((CObjectHeader*)(i))
+
+#define free_list_slot(x) ((uint8_t**)(x))[2]
+#define free_list_undo(x) ((uint8_t**)(x))[-1]
+#define UNDO_EMPTY ((uint8_t*)1)
+
+#ifdef SHORT_PLUGS
+inline 
+void set_plug_padded (uint8_t* node)
+{
+    header(node)->SetMarked();
+}
+inline
+void clear_plug_padded (uint8_t* node)
+{
+    header(node)->ClearMarked();
+}
+inline
+BOOL is_plug_padded (uint8_t* node)
+{
+    return header(node)->IsMarked();
+}
+#else //SHORT_PLUGS
+inline void set_plug_padded (uint8_t* node){}
+inline void clear_plug_padded (uint8_t* node){}
+inline
+BOOL is_plug_padded (uint8_t* node){return FALSE;}
+#endif //SHORT_PLUGS
+
+
+inline size_t unused_array_size(uint8_t * p)
+{
+    assert(((CObjectHeader*)p)->IsFree());
+
+    size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents());
+    return free_object_base_size + *numComponentsPtr;
+}
+
+heap_segment* heap_segment_rw (heap_segment* ns)
+{
+    if ((ns == 0) || !heap_segment_read_only_p (ns))
+    {
+        return ns;
+    }
+    else
+    {
+        do
+        {
+            ns = heap_segment_next (ns);
+        } while ((ns != 0) && heap_segment_read_only_p (ns));
+        return ns;
+    }
+}
+
+//returns the next non ro segment.
+heap_segment* heap_segment_next_rw (heap_segment* seg)
+{
+    heap_segment* ns = heap_segment_next (seg);
+    return heap_segment_rw (ns);
+}
+
+// returns the segment before seg.
+heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg)
+{
+    assert (begin != 0);
+    heap_segment* prev = begin;
+    heap_segment* current = heap_segment_next_rw (begin);
+
+    while (current && current != seg)
+    {
+        prev = current;
+        current = heap_segment_next_rw (current);
+    }
+
+    if (current == seg)
+    {
+        return prev;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+// returns the segment before seg.
+heap_segment* heap_segment_prev (heap_segment* begin, heap_segment* seg)
+{
+    assert (begin != 0);
+    heap_segment* prev = begin;
+    heap_segment* current = heap_segment_next (begin);
+
+    while (current && current != seg)
+    {
+        prev = current;
+        current = heap_segment_next (current);
+    }
+
+    if (current == seg)
+    {
+        return prev;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+heap_segment* heap_segment_in_range (heap_segment* ns)
+{
+    if ((ns == 0) || heap_segment_in_range_p (ns))
+    {
+        return ns;
+    }
+    else
+    {
+        do
+        {
+            ns = heap_segment_next (ns);
+        } while ((ns != 0) && !heap_segment_in_range_p (ns));
+        return ns;
+    }
+}
+
+heap_segment* heap_segment_next_in_range (heap_segment* seg)
+{
+    heap_segment* ns = heap_segment_next (seg);
+    return heap_segment_in_range (ns);
+}
+
+typedef struct
+{
+    uint8_t* memory_base;
+} imemory_data;
+
+typedef struct
+{
+    imemory_data *initial_memory;
+    imemory_data *initial_normal_heap; // points into initial_memory_array
+    imemory_data *initial_large_heap;  // points into initial_memory_array
+
+    size_t block_size_normal;
+    size_t block_size_large;
+
+    size_t block_count;                // # of blocks in each
+    size_t current_block_normal;
+    size_t current_block_large;
+
+    enum 
+    { 
+        ALLATONCE = 1, 
+        TWO_STAGE, 
+        EACH_BLOCK 
+    };
+
+    size_t allocation_pattern;
+} initial_memory_details;
+
+initial_memory_details memory_details;
+
+BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t num_heaps)
+{
+    BOOL reserve_success = FALSE;
+
+    // should only be called once
+    assert (memory_details.initial_memory == 0);
+
+    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps*2];
+    if (memory_details.initial_memory == 0)
+    {
+        dprintf (2, ("failed to reserve %Id bytes for imemory_data", num_heaps*2*sizeof(imemory_data)));
+        return FALSE;
+    }
+
+    memory_details.initial_normal_heap = memory_details.initial_memory;
+    memory_details.initial_large_heap = memory_details.initial_memory + num_heaps;
+    memory_details.block_size_normal = normal_size;
+    memory_details.block_size_large = large_size;
+    memory_details.block_count = num_heaps;
+
+    memory_details.current_block_normal = 0;
+    memory_details.current_block_large = 0;
+
+    g_lowest_address = MAX_PTR;
+    g_highest_address = 0;
+
+    if (((size_t)MAX_PTR - large_size) < normal_size)
+    {
+        // we are already overflowing with just one heap.
+        dprintf (2, ("0x%Ix + 0x%Ix already overflow", normal_size, large_size));
+        return FALSE;
+    }
+
+    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size))
+    {
+        dprintf (2, ("(0x%Ix + 0x%Ix)*0x%Ix overflow", normal_size, large_size, memory_details.block_count));
+        return FALSE;
+    }
+
+    size_t requestedMemory = memory_details.block_count * (normal_size + large_size);
+
+    uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory);
+    if (allatonce_block)
+    {
+        g_lowest_address =  allatonce_block;
+        g_highest_address = allatonce_block + (memory_details.block_count * (large_size + normal_size));
+        memory_details.allocation_pattern = initial_memory_details::ALLATONCE;
+
+        for(size_t i = 0; i < memory_details.block_count; i++)
+        {
+            memory_details.initial_normal_heap[i].memory_base = allatonce_block + (i*normal_size);
+            memory_details.initial_large_heap[i].memory_base = allatonce_block +
+                            (memory_details.block_count*normal_size) + (i*large_size);
+            reserve_success = TRUE;
+        }
+    }
+    else
+    {
+        // try to allocate 2 blocks
+        uint8_t* b1 = 0;
+        uint8_t* b2 = 0;
+        b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size);
+        if (b1)
+        {
+            b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size);
+            if (b2)
+            {
+                memory_details.allocation_pattern = initial_memory_details::TWO_STAGE;
+                g_lowest_address = min(b1,b2);
+                g_highest_address = max(b1 + memory_details.block_count*normal_size,
+                                        b2 + memory_details.block_count*large_size);
+                for(size_t i = 0; i < memory_details.block_count; i++)
+                {
+                    memory_details.initial_normal_heap[i].memory_base = b1 + (i*normal_size);
+                    memory_details.initial_large_heap[i].memory_base = b2 + (i*large_size);
+                    reserve_success = TRUE;
+                }
+            }
+            else
+            {
+                // b2 allocation failed, we'll go on to try allocating each block.
+                // We could preserve the b1 alloc, but code complexity increases
+                virtual_free (b1, memory_details.block_count * normal_size);
+            }
+        }
+
+        if ((b2==NULL) && ( memory_details.block_count > 1))
+        {
+            memory_details.allocation_pattern = initial_memory_details::EACH_BLOCK;
+
+            imemory_data *current_block = memory_details.initial_memory;
+            for(size_t i = 0; i < (memory_details.block_count*2); i++, current_block++)
+            {
+                size_t block_size = ((i < memory_details.block_count) ?
+                                     memory_details.block_size_normal :
+                                     memory_details.block_size_large);
+                current_block->memory_base =
+                    (uint8_t*)virtual_alloc (block_size);
+                if (current_block->memory_base == 0)
+                {
+                    // Free the blocks that we've allocated so far
+                    current_block = memory_details.initial_memory;
+                    for(size_t j = 0; j < i; j++, current_block++){
+                        if (current_block->memory_base != 0){
+                            block_size = ((j < memory_details.block_count) ?
+                                     memory_details.block_size_normal :
+                                     memory_details.block_size_large);
+                             virtual_free (current_block->memory_base , block_size);
+                        }
+                    }
+                    reserve_success = FALSE;
+                    break;
+                }
+                else
+                {
+                    if (current_block->memory_base < g_lowest_address)
+                        g_lowest_address =  current_block->memory_base;
+                    if (((uint8_t *) current_block->memory_base + block_size) > g_highest_address)
+                        g_highest_address = (current_block->memory_base + block_size);
+                }
+                reserve_success = TRUE;
+            }
+        }
+    }
+
+    return reserve_success;
+}
+
+void destroy_initial_memory()
+{
+    if (memory_details.initial_memory != NULL)
+    {
+        if (memory_details.allocation_pattern == initial_memory_details::ALLATONCE)
+        {
+            virtual_free(memory_details.initial_memory[0].memory_base,
+                memory_details.block_count*(memory_details.block_size_normal +
+                memory_details.block_size_large));
+        }
+        else if (memory_details.allocation_pattern == initial_memory_details::TWO_STAGE)
+        {
+            virtual_free (memory_details.initial_normal_heap[0].memory_base,
+                memory_details.block_count*memory_details.block_size_normal);
+
+            virtual_free (memory_details.initial_large_heap[0].memory_base,
+                memory_details.block_count*memory_details.block_size_large);
+        }
+        else
+        {
+            assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK);
+            imemory_data *current_block = memory_details.initial_memory;
+            for(size_t i = 0; i < (memory_details.block_count*2); i++, current_block++)
+            {
+                size_t block_size = (i < memory_details.block_count) ? memory_details.block_size_normal :
+                                                                       memory_details.block_size_large;
+                if (current_block->memory_base != NULL)
+                {
+                    virtual_free (current_block->memory_base, block_size);
+                }
+            }
+        }
+
+        delete [] memory_details.initial_memory;
+        memory_details.initial_memory = NULL;
+        memory_details.initial_normal_heap = NULL;
+        memory_details.initial_large_heap = NULL;
+    }
+}
+
+void* next_initial_memory (size_t size)
+{
+    assert ((size == memory_details.block_size_normal) || (size == memory_details.block_size_large));
+    void *res = NULL;
+
+    if ((size != memory_details.block_size_normal) ||
+        ((memory_details.current_block_normal == memory_details.block_count) &&
+         (memory_details.block_size_normal == memory_details.block_size_large)))
+    {
+        // If the block sizes are the same, flow block requests from normal to large
+        assert (memory_details.current_block_large < memory_details.block_count);
+        assert (memory_details.initial_large_heap != 0);
+
+        res = memory_details.initial_large_heap[memory_details.current_block_large].memory_base;
+        memory_details.current_block_large++;
+    }
+    else
+    {
+        assert (memory_details.current_block_normal < memory_details.block_count);
+        assert (memory_details.initial_normal_heap != NULL);
+
+        res = memory_details.initial_normal_heap[memory_details.current_block_normal].memory_base;
+        memory_details.current_block_normal++;
+    }
+
+    return res;
+}
+
+heap_segment* get_initial_segment (size_t size, int h_number)
+{
+    void* mem = next_initial_memory (size);
+    heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size , h_number);
+
+    return res;
+}
+
+void* virtual_alloc (size_t size)
+{
+    size_t requested_size = size;
+
+    if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
+    {
+        gc_heap::reserved_memory_limit =
+            GCScan::AskForMoreReservedMemory (gc_heap::reserved_memory_limit, requested_size);
+        if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
+        {
+            return 0;
+        }
+    }
+
+    uint32_t flags = VirtualReserveFlags::None;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    if (virtual_alloc_hardware_write_watch)
+    {
+        flags = VirtualReserveFlags::WriteWatch;
+    }
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    void* prgmem = GCToOSInterface::VirtualReserve (0, requested_size, card_size * card_word_width, flags);
+    void *aligned_mem = prgmem;
+
+    // We don't want (prgmem + size) to be right at the end of the address space 
+    // because we'd have to worry about that everytime we do (address + size).
+    // We also want to make sure that we leave LARGE_OBJECT_SIZE at the end 
+    // so we allocate a small object we don't need to worry about overflow there
+    // when we do alloc_ptr+size.
+    if (prgmem)
+    {
+        uint8_t* end_mem = (uint8_t*)prgmem + requested_size;
+
+        if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC))
+        {
+            GCToOSInterface::VirtualRelease (prgmem, requested_size);
+            dprintf (2, ("Virtual Alloc size %Id returned memory right against 4GB [%Ix, %Ix[ - discarding",
+                        requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
+            prgmem = 0;
+            aligned_mem = 0;
+        }
+    }
+
+    if (prgmem)
+    {
+        gc_heap::reserved_memory += requested_size;
+    }
+
+    dprintf (2, ("Virtual Alloc size %Id: [%Ix, %Ix[",
+                 requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
+
+    return aligned_mem;
+}
+
+void virtual_free (void* add, size_t size)
+{
+    GCToOSInterface::VirtualRelease (add, size);
+    gc_heap::reserved_memory -= size;
+    dprintf (2, ("Virtual Free size %Id: [%Ix, %Ix[",
+                 size, (size_t)add, (size_t)((uint8_t*)add+size)));
+}
+
+// We have a few places that call this but the seg size doesn't change so call it
+// once and save the result.
+// TODO: move back after we do this.
+static size_t get_valid_segment_size (BOOL large_seg=FALSE)
+{
+    size_t seg_size, initial_seg_size;
+
+    if (!large_seg)
+    {
+        initial_seg_size = INITIAL_ALLOC;
+        seg_size = g_pConfig->GetSegmentSize();
+    }
+    else
+    {
+        initial_seg_size = LHEAP_ALLOC;
+        seg_size = g_pConfig->GetSegmentSize() / 2;
+    }
+
+#ifdef MULTIPLE_HEAPS
+#ifdef BIT64
+    if (!large_seg)
+#endif // BIT64
+    {
+        if (g_SystemInfo.dwNumberOfProcessors > 4)
+            initial_seg_size /= 2;
+        if (g_SystemInfo.dwNumberOfProcessors > 8)
+            initial_seg_size /= 2;
+    }
+#endif //MULTIPLE_HEAPS
+
+    // if seg_size is small but not 0 (0 is default if config not set)
+    // then set the segment to the minimum size
+    if (!GCHeap::IsValidSegmentSize(seg_size))
+    {
+        // if requested size is between 1 byte and 4MB, use min
+        if ((seg_size >> 1) && !(seg_size >> 22))
+            seg_size = 1024*1024*4;
+        else
+            seg_size = initial_seg_size;
+    }
+
+    return (seg_size);
+}
+
+void
+gc_heap::compute_new_ephemeral_size()
+{
+    int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0);
+    size_t padding_size = 0;
+
+    for (int i = 0; i <= eph_gen_max; i++)
+    {
+        dynamic_data* dd = dynamic_data_of (i);
+        total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd));
+#ifdef RESPECT_LARGE_ALIGNMENT
+        total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE);
+#endif //RESPECT_LARGE_ALIGNMENT
+#ifdef FEATURE_STRUCTALIGN
+        total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN;
+#endif //FEATURE_STRUCTALIGN
+
+#ifdef SHORT_PLUGS
+        padding_size += dd_padding_size (dd);
+#endif //SHORT_PLUGS
+    }
+
+    total_ephemeral_size += eph_gen_starts_size;
+
+#ifdef RESPECT_LARGE_ALIGNMENT
+    size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) -
+                                       generation_plan_allocation_start (generation_of (max_generation-1));
+    total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size);
+#endif //RESPECT_LARGE_ALIGNMENT
+
+#ifdef SHORT_PLUGS
+    total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1);
+    total_ephemeral_size += Align (DESIRED_PLUG_LENGTH);
+#endif //SHORT_PLUGS
+
+    dprintf (3, ("total ephemeral size is %Ix, padding %Ix(%Ix)", 
+        total_ephemeral_size,
+        padding_size, (total_ephemeral_size - padding_size)));
+}
+
+#ifdef _MSC_VER
+#pragma warning(disable:4706) // "assignment within conditional expression" is intentional in this function.
+#endif // _MSC_VER
+
+heap_segment*
+gc_heap::soh_get_segment_to_expand()
+{
+    size_t size = get_valid_segment_size();
+
+    ordered_plug_indices_init = FALSE;
+    use_bestfit = FALSE;
+
+    //compute the size of the new ephemeral heap segment.
+    compute_new_ephemeral_size();
+
+    if ((settings.pause_mode != pause_low_latency) &&
+        (settings.pause_mode != pause_no_gc)
+#ifdef BACKGROUND_GC
+        && (!recursive_gc_sync::background_running_p())
+#endif //BACKGROUND_GC
+        )
+    {
+        allocator*  gen_alloc = ((settings.condemned_generation == max_generation) ? 0 :
+                              generation_allocator (generation_of (max_generation)));
+        dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation));
+
+        // try to find one in the gen 2 segment list, search backwards because the first segments
+        // tend to be more compact than the later ones.
+        heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+        PREFIX_ASSUME(fseg != NULL);
+
+#ifdef SEG_REUSE_STATS
+        int try_reuse = 0;
+#endif //SEG_REUSE_STATS
+
+        heap_segment* seg = ephemeral_heap_segment;
+        while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg))
+        {
+#ifdef SEG_REUSE_STATS
+        try_reuse++;
+#endif //SEG_REUSE_STATS
+
+            if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc))
+            {
+                get_gc_data_per_heap()->set_mechanism (gc_heap_expand, 
+                    (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal));
+                if (settings.condemned_generation == max_generation)
+                {
+                    if (use_bestfit)
+                    {
+                        build_ordered_free_spaces (seg);
+                        dprintf (GTC_LOG, ("can use best fit"));
+                    }
+
+#ifdef SEG_REUSE_STATS
+                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse", 
+                        settings.condemned_generation, try_reuse));
+#endif //SEG_REUSE_STATS
+                    dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %Ix", (size_t)seg));
+                    return seg;
+                }
+                else
+                {
+#ifdef SEG_REUSE_STATS
+                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning", 
+                        settings.condemned_generation, try_reuse));
+#endif //SEG_REUSE_STATS
+                    dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %Ix", (size_t)seg));
+
+                    // If we return 0 here, the allocator will think since we are short on end
+                    // of seg we neeed to trigger a full compacting GC. So if sustained low latency 
+                    // is set we should acquire a new seg instead, that way we wouldn't be short.
+                    // The real solution, of course, is to actually implement seg reuse in gen1.
+                    if (settings.pause_mode != pause_sustained_low_latency)
+                    {
+                        dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg"));
+                        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc);
+                        return 0;
+                    }
+                }
+            }
+        }
+    }
+
+    heap_segment* result = get_segment (size, FALSE);
+
+    if(result)
+    {
+#ifdef BACKGROUND_GC
+        if (current_c_gc_state == c_gc_state_planning)
+        {
+            // When we expand heap during bgc sweep, we set the seg to be swept so 
+            // we'll always look at cards for objects on the new segment.
+            result->flags |= heap_segment_flags_swept;
+        }
+#endif //BACKGROUND_GC
+
+        FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(result), 
+                                  (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)), 
+                                  ETW::GCLog::ETW_GC_INFO::SMALL_OBJECT_HEAP, 
+                                  GetClrInstanceId());
+    }
+
+    get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory));
+
+    if (result == 0)
+    {
+        dprintf (2, ("h%d: failed to allocate a new segment!", heap_number));
+    }
+    else
+    {
+#ifdef MULTIPLE_HEAPS
+        heap_segment_heap (result) = this;
+#endif //MULTIPLE_HEAPS
+    }
+
+    dprintf (GTC_LOG, ("(gen%d)creating new segment %Ix", settings.condemned_generation, result));
+    return result;
+}
+
+#ifdef _MSC_VER
+#pragma warning(default:4706)
+#endif // _MSC_VER
+
+//returns 0 in case of allocation failure
+heap_segment*
+gc_heap::get_segment (size_t size, BOOL loh_p)
+{
+    heap_segment* result = 0;
+
+    if (segment_standby_list != 0)
+    {
+        result = segment_standby_list;
+        heap_segment* last = 0;
+        while (result)
+        {
+            size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result);
+            if ((hs >= size) && ((hs / 2) < size))
+            {
+                dprintf (2, ("Hoarded segment %Ix found", (size_t) result));
+                if (last)
+                {
+                    heap_segment_next (last) = heap_segment_next (result);
+                }
+                else
+                {
+                    segment_standby_list = heap_segment_next (result);
+                }
+                break;
+            }
+            else
+            {
+                last = result;
+                result = heap_segment_next (result);
+            }
+        }
+    }
+
+    if (result)
+    {
+        init_heap_segment (result);
+#ifdef BACKGROUND_GC
+        if (should_commit_mark_array())
+        {
+            dprintf (GC_TABLE_LOG, ("hoarded seg %Ix, mark_array is %Ix", result, mark_array));
+            if (!commit_mark_array_new_seg (__this, result))
+            {
+                dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg"));
+                // If we can't use it we need to thread it back.
+                if (segment_standby_list != 0)
+                {
+                    heap_segment_next (result) = segment_standby_list;
+                    segment_standby_list = result;
+                }
+                else
+                {
+                    segment_standby_list = result;
+                }
+
+                result = 0;
+            }
+        }
+#endif //BACKGROUND_GC
+
+#ifdef SEG_MAPPING_TABLE
+        if (result)
+            seg_mapping_table_add_segment (result, __this);
+#endif //SEG_MAPPING_TABLE
+    }
+
+    if (!result)
+    {
+#ifndef SEG_MAPPING_TABLE
+        if (!seg_table->ensure_space_for_insert ())
+            return 0;
+#endif //SEG_MAPPING_TABLE
+        void* mem = virtual_alloc (size);
+        if (!mem)
+        {
+            fgm_result.set_fgm (fgm_reserve_segment, size, loh_p);
+            return 0;
+        }
+
+        result = gc_heap::make_heap_segment ((uint8_t*)mem, size, heap_number);
+
+        if (result)
+        {
+            uint8_t* start;
+            uint8_t* end;
+            if (mem < g_lowest_address)
+            {
+                start =  (uint8_t*)mem;
+            }
+            else
+            {
+                start = (uint8_t*)g_lowest_address;
+            }
+
+            if (((uint8_t*)mem + size) > g_highest_address)
+            {
+                end = (uint8_t*)mem + size;
+            }
+            else
+            {
+                end = (uint8_t*)g_highest_address;
+            }
+
+            if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, loh_p) != 0)
+            {
+                virtual_free (mem, size);
+                return 0;
+            }
+        }
+        else
+        {
+            fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, loh_p);
+            virtual_free (mem, size);
+        }
+
+        if (result)
+        {
+#ifdef SEG_MAPPING_TABLE
+            seg_mapping_table_add_segment (result, __this);
+#else //SEG_MAPPING_TABLE
+            gc_heap::seg_table->insert ((uint8_t*)result, delta);
+#endif //SEG_MAPPING_TABLE
+        }
+    }
+
+#ifdef BACKGROUND_GC
+    if (result)
+    {
+        ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result), 
+                            settings.gc_index, current_bgc_state,
+                            seg_added);
+        bgc_verify_mark_array_cleared (result);
+    }
+#endif //BACKGROUND_GC
+
+    dprintf (GC_TABLE_LOG, ("h%d: new seg: %Ix-%Ix (%Id)", heap_number, result, ((uint8_t*)result + size), size));
+    return result;
+}
+
+void release_segment (heap_segment* sg)
+{
+    ptrdiff_t delta = 0;
+    FireEtwGCFreeSegment_V1((size_t)heap_segment_mem(sg), GetClrInstanceId());
+    virtual_free (sg, (uint8_t*)heap_segment_reserved (sg)-(uint8_t*)sg);
+}
+
+heap_segment* gc_heap::get_segment_for_loh (size_t size
+#ifdef MULTIPLE_HEAPS
+                                           , gc_heap* hp
+#endif //MULTIPLE_HEAPS
+                                           )
+{
+#ifndef MULTIPLE_HEAPS
+    gc_heap* hp = 0;
+#endif //MULTIPLE_HEAPS
+    heap_segment* res = hp->get_segment (size, TRUE);
+    if (res != 0)
+    {
+#ifdef MULTIPLE_HEAPS
+        heap_segment_heap (res) = hp;
+#endif //MULTIPLE_HEAPS
+        res->flags |= heap_segment_flags_loh;
+
+        FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(res), (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)), ETW::GCLog::ETW_GC_INFO::LARGE_OBJECT_HEAP, GetClrInstanceId());
+
+#ifdef GC_PROFILING
+        if (CORProfilerTrackGC())
+            UpdateGenerationBounds();
+#endif // GC_PROFILING
+
+#ifdef MULTIPLE_HEAPS
+        hp->thread_loh_segment (res);
+#else
+        thread_loh_segment (res);
+#endif //MULTIPLE_HEAPS
+    }
+
+    return res;
+}
+
+void gc_heap::thread_loh_segment (heap_segment* new_seg)
+{
+    heap_segment* seg = generation_allocation_segment (generation_of (max_generation + 1));
+
+    while (heap_segment_next_rw (seg))
+        seg = heap_segment_next_rw (seg);
+    heap_segment_next (seg) = new_seg;
+}
+
+heap_segment*
+gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc)
+{
+    *did_full_compact_gc = FALSE;
+    size_t last_full_compact_gc_count = get_full_compact_gc_count();
+
+    //access to get_segment needs to be serialized
+    add_saved_spinlock_info (me_release, mt_get_large_seg);
+
+    dprintf (SPINLOCK_LOG, ("[%d]Seg: Lmsl", heap_number));
+    leave_spin_lock (&more_space_lock);
+    enter_spin_lock (&gc_heap::gc_lock);
+    dprintf (SPINLOCK_LOG, ("[%d]Seg: Egc", heap_number));
+    // if a GC happened between here and before we ask for a segment in 
+    // get_large_segment, we need to count that GC.
+    size_t current_full_compact_gc_count = get_full_compact_gc_count();
+
+    if (current_full_compact_gc_count > last_full_compact_gc_count)
+    {
+        *did_full_compact_gc = TRUE;
+    }
+
+#ifdef BACKGROUND_GC
+    while (current_c_gc_state == c_gc_state_planning)
+    {
+        dprintf (3, ("lh state planning, waiting to get a large seg"));
+
+        dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Lgc", heap_number));
+        leave_spin_lock (&gc_lock);
+        background_gc_wait_lh (awr_get_loh_seg);
+        enter_spin_lock (&gc_lock);
+        dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Egc", heap_number));
+    }
+    assert ((current_c_gc_state == c_gc_state_free) ||
+            (current_c_gc_state == c_gc_state_marking));
+#endif //BACKGROUND_GC
+
+    heap_segment* res = get_segment_for_loh (size
+#ifdef MULTIPLE_HEAPS
+                                            , this
+#endif //MULTIPLE_HEAPS
+                                            );
+
+    dprintf (SPINLOCK_LOG, ("[%d]Seg: A Lgc", heap_number));
+    leave_spin_lock (&gc_heap::gc_lock);
+    enter_spin_lock (&more_space_lock);
+    dprintf (SPINLOCK_LOG, ("[%d]Seg: A Emsl", heap_number));
+    add_saved_spinlock_info (me_acquire, mt_get_large_seg);
+    
+#ifdef BACKGROUND_GC
+    wait_for_background_planning (awr_get_loh_seg);
+#endif //BACKGROUND_GC
+
+    return res;
+}
+
+#if 0
+BOOL gc_heap::unprotect_segment (heap_segment* seg)
+{
+    uint8_t* start = align_lower_page (heap_segment_mem (seg));
+    ptrdiff_t region_size = heap_segment_allocated (seg) - start;
+
+    if (region_size != 0 )
+    {
+        dprintf (3, ("unprotecting segment %Ix:", (size_t)seg));
+
+        BOOL status = GCToOSInterface::VirtualUnprotect (start, region_size);
+        assert (status);
+        return status;
+    }
+    return FALSE;
+}
+#endif
+
+#ifdef MULTIPLE_HEAPS
+#ifdef _X86_
+#ifdef _MSC_VER
+#pragma warning(disable:4035)
+    static ptrdiff_t  get_cycle_count()
+    {
+        __asm   rdtsc
+    }
+#pragma warning(default:4035)
+#elif defined(__GNUC__)
+    static ptrdiff_t  get_cycle_count()
+    {
+        ptrdiff_t cycles;
+        ptrdiff_t cyclesHi;
+        __asm__ __volatile__
+        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
+        return cycles;
+    }
+#else //_MSC_VER
+#error Unknown compiler
+#endif //_MSC_VER
+#elif defined(_TARGET_AMD64_) 
+#ifdef _MSC_VER
+extern "C" uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+    static ptrdiff_t get_cycle_count()
+    {
+        return (ptrdiff_t)__rdtsc();
+    }
+#elif defined(__clang__)    
+    static ptrdiff_t get_cycle_count()
+    {
+        ptrdiff_t cycles;
+        ptrdiff_t cyclesHi;
+        __asm__ __volatile__
+        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
+        return (cyclesHi << 32) | cycles;
+    }
+#else // _MSC_VER
+    extern "C" ptrdiff_t get_cycle_count(void);
+#endif // _MSC_VER
+#elif defined(_TARGET_ARM_)
+    static ptrdiff_t get_cycle_count()
+    {
+        // @ARMTODO: cycle counter is not exposed to user mode by CoreARM. For now (until we can show this
+        // makes a difference on the ARM configurations on which we'll run) just return 0. This will result in
+        // all buffer access times being reported as equal in access_time().
+        return 0;
+    }
+#elif defined(_TARGET_ARM64_)
+    static ptrdiff_t get_cycle_count()
+    {
+        // @ARM64TODO: cycle counter is not exposed to user mode by CoreARM. For now (until we can show this
+        // makes a difference on the ARM configurations on which we'll run) just return 0. This will result in
+        // all buffer access times being reported as equal in access_time().
+        return 0;
+    }
+#else
+#error NYI platform: get_cycle_count
+#endif //_TARGET_X86_
+
+class heap_select
+{
+    heap_select() {}
+    static uint8_t* sniff_buffer;
+    static unsigned n_sniff_buffers;
+    static unsigned cur_sniff_index;
+
+    static uint8_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
+    static uint8_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
+    static uint8_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
+    static uint8_t heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
+    static uint8_t heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
+    static uint8_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+
+    static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
+    {
+        ptrdiff_t start_cycles = get_cycle_count();
+        uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE];
+        assert (sniff == 0);
+        ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles;
+        // add sniff here just to defeat the optimizer
+        elapsed_cycles += sniff;
+        return (int) elapsed_cycles;
+    }
+
+public:
+    static BOOL init(int n_heaps)
+    {
+        assert (sniff_buffer == NULL && n_sniff_buffers == 0);
+        if (!GCToOSInterface::CanGetCurrentProcessorNumber())
+        {
+            n_sniff_buffers = n_heaps*2+1;
+            size_t sniff_buf_size = 0;
+#ifdef FEATURE_REDHAWK
+            size_t n_cache_lines = 1 + n_heaps*n_sniff_buffers + 1;
+            sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE;
+#else
+            S_SIZE_T safe_sniff_buf_size = S_SIZE_T(1 + n_heaps*n_sniff_buffers + 1);
+            safe_sniff_buf_size *= HS_CACHE_LINE_SIZE;
+            if (safe_sniff_buf_size.IsOverflow())
+            {
+                return FALSE;
+            }
+            sniff_buf_size = safe_sniff_buf_size.Value();
+#endif //FEATURE_REDHAWK
+            sniff_buffer = new (nothrow) uint8_t[sniff_buf_size];
+            if (sniff_buffer == 0)
+                return FALSE;
+            memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t));
+        }
+
+        //can not enable gc numa aware, force all heaps to be in
+        //one numa node by filling the array with all 0s
+        if (!NumaNodeInfo::CanEnableGCNumaAware())
+            memset(heap_no_to_numa_node, 0, MAX_SUPPORTED_CPUS); 
+
+        return TRUE;
+    }
+
+    static void init_cpu_mapping(gc_heap * /*heap*/, int heap_number)
+    {
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+        {
+            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber() % gc_heap::n_heaps;
+            // We can safely cast heap_number to a BYTE 'cause GetCurrentProcessCpuCount
+            // only returns up to MAX_SUPPORTED_CPUS procs right now. We only ever create at most
+            // MAX_SUPPORTED_CPUS GC threads.
+            proc_no_to_heap_no[proc_no] = (uint8_t)heap_number;
+        }
+    }
+
+    static void mark_heap(int heap_number)
+    {
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+            return;
+
+        for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++)
+            sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
+    }
+
+    static int select_heap(alloc_context* acontext, int /*hint*/)
+    {
+        UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf
+
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+            return proc_no_to_heap_no[GCToOSInterface::GetCurrentProcessorNumber() % gc_heap::n_heaps];
+
+        unsigned sniff_index = Interlocked::Increment(&cur_sniff_index);
+        sniff_index %= n_sniff_buffers;
+
+        int best_heap = 0;
+        int best_access_time = 1000*1000*1000;
+        int second_best_access_time = best_access_time;
+
+        uint8_t *l_sniff_buffer = sniff_buffer;
+        unsigned l_n_sniff_buffers = n_sniff_buffers;
+        for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++)
+        {
+            int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers);
+            if (this_access_time < best_access_time)
+            {
+                second_best_access_time = best_access_time;
+                best_access_time = this_access_time;
+                best_heap = heap_number;
+            }
+            else if (this_access_time < second_best_access_time)
+            {
+                second_best_access_time = this_access_time;
+            }
+        }
+
+        if (best_access_time*2 < second_best_access_time)
+        {
+            sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
+
+            dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext));
+        }
+        else
+        {
+            dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext ));
+        }
+
+        return best_heap;
+    }
+
+    static bool can_find_heap_fast()
+    {
+        return GCToOSInterface::CanGetCurrentProcessorNumber();
+    }
+
+    static uint8_t find_proc_no_from_heap_no(int heap_number)
+    {
+        return heap_no_to_proc_no[heap_number];
+    }
+
+    static void set_proc_no_for_heap(int heap_number, uint8_t proc_no)
+    {
+        heap_no_to_proc_no[heap_number] = proc_no;
+    }
+
+    static uint8_t find_numa_node_from_heap_no(int heap_number)
+    {
+        return heap_no_to_numa_node[heap_number];
+    }
+
+    static void set_numa_node_for_heap(int heap_number, uint8_t numa_node)
+    {
+        heap_no_to_numa_node[heap_number] = numa_node;
+    }
+
+    static uint8_t find_cpu_group_from_heap_no(int heap_number)
+    {
+        return heap_no_to_cpu_group[heap_number];
+    }
+
+    static void set_cpu_group_for_heap(int heap_number, uint8_t group_number)
+    {
+        heap_no_to_cpu_group[heap_number] = group_number;
+    }
+
+    static uint8_t find_group_proc_from_heap_no(int heap_number)
+    {
+        return heap_no_to_group_proc[heap_number];
+    }
+
+    static void set_group_proc_for_heap(int heap_number, uint8_t group_proc)
+    {
+        heap_no_to_group_proc[heap_number] = group_proc;
+    }
+
+    static void init_numa_node_to_heap_map(int nheaps)
+    {   // called right after GCHeap::Init() for each heap is finished
+        // when numa is not enabled, heap_no_to_numa_node[] are all filled
+        // with 0s during initialization, and will be treated as one node
+        numa_node_to_heap_map[0] = 0;
+        int node_index = 1;
+
+        for (int i=1; i < nheaps; i++)
+        {
+            if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1])
+                numa_node_to_heap_map[node_index++] = (uint8_t)i;
+        }
+        numa_node_to_heap_map[node_index] = (uint8_t)nheaps; //mark the end with nheaps
+    }
+
+    static void get_heap_range_for_heap(int hn, int* start, int* end)
+    {   // 1-tier/no numa case: heap_no_to_numa_node[] all zeros, 
+        // and treated as in one node. thus: start=0, end=n_heaps
+        uint8_t numa_node = heap_no_to_numa_node[hn];
+        *start = (int)numa_node_to_heap_map[numa_node];
+        *end   = (int)(numa_node_to_heap_map[numa_node+1]);
+    }
+};
+uint8_t* heap_select::sniff_buffer;
+unsigned heap_select::n_sniff_buffers;
+unsigned heap_select::cur_sniff_index;
+uint8_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
+uint8_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
+uint8_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
+uint8_t heap_select::heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
+uint8_t heap_select::heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
+uint8_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+
+BOOL gc_heap::create_thread_support (unsigned number_of_heaps)
+{
+    BOOL ret = FALSE;
+    if (!gc_start_event.CreateOSManualEventNoThrow (FALSE))
+    {
+        goto cleanup;
+    }
+    if (!ee_suspend_event.CreateOSAutoEventNoThrow (FALSE))
+    {
+        goto cleanup;
+    }
+    if (!gc_t_join.init (number_of_heaps, join_flavor_server_gc))
+    {
+        goto cleanup;
+    }
+
+    ret = TRUE;
+
+cleanup:
+
+    if (!ret)
+    {
+        destroy_thread_support();
+    }
+
+    return ret;
+}
+
+void gc_heap::destroy_thread_support ()
+{
+    if (ee_suspend_event.IsValid())
+    {
+        ee_suspend_event.CloseEvent();
+    }
+    if (gc_start_event.IsValid())
+    {
+        gc_start_event.CloseEvent();
+    }
+}
+
+#if !defined(FEATURE_PAL)
+void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity)
+{
+    affinity->Group = GCThreadAffinity::None;
+    affinity->Processor = GCThreadAffinity::None;
+
+    uint16_t gn, gpn;
+    CPUGroupInfo::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+
+    int bit_number = 0;
+    for (uintptr_t mask = 1; mask !=0; mask <<=1) 
+    {
+        if (bit_number == gpn)
+        {
+            dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, mask, heap_number));
+            affinity->Processor = gpn;
+            affinity->Group = gn;
+            heap_select::set_cpu_group_for_heap(heap_number, (uint8_t)gn);
+            heap_select::set_group_proc_for_heap(heap_number, (uint8_t)gpn);
+            if (NumaNodeInfo::CanEnableGCNumaAware())
+            {  
+                PROCESSOR_NUMBER proc_no;
+                proc_no.Group    = gn;
+                proc_no.Number   = (uint8_t)gpn;
+                proc_no.Reserved = 0;
+
+                uint16_t node_no = 0;
+                if (NumaNodeInfo::GetNumaProcessorNodeEx(&proc_no, &node_no))
+                    heap_select::set_numa_node_for_heap(heap_number, (uint8_t)node_no);
+            }
+            else
+            {   // no numa setting, each cpu group is treated as a node
+                heap_select::set_numa_node_for_heap(heap_number, (uint8_t)gn);
+            }
+            return;
+        }
+        bit_number++;
+    }
+}
+
+void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affinity)
+{
+    affinity->Group = GCThreadAffinity::None;
+    affinity->Processor = GCThreadAffinity::None;
+
+    uintptr_t pmask, smask;
+    if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask))
+    {
+        pmask &= smask;
+        int bit_number = 0; 
+        uint8_t proc_number = 0;
+        for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+        {
+            if ((mask & pmask) != 0)
+            {
+                if (bit_number == heap_number)
+                {
+                    dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
+                    affinity->Processor = proc_number;
+                    heap_select::set_proc_no_for_heap(heap_number, proc_number);
+                    if (NumaNodeInfo::CanEnableGCNumaAware())
+                    {
+                        uint16_t node_no = 0;
+                        PROCESSOR_NUMBER proc_no;
+                        proc_no.Group = 0;
+                        proc_no.Number = (uint8_t)proc_number;
+                        proc_no.Reserved = 0;
+                        if (NumaNodeInfo::GetNumaProcessorNodeEx(&proc_no, &node_no))
+                        {
+                            heap_select::set_numa_node_for_heap(heap_number, (uint8_t)node_no);
+                        }
+                    }
+                    return;
+                }
+                bit_number++;
+            }
+            proc_number++;
+        }
+    }
+}
+#endif // !FEATURE_PAL
+
+bool gc_heap::create_gc_thread ()
+{
+    dprintf (3, ("Creating gc thread\n"));
+
+    GCThreadAffinity affinity;
+    affinity.Group = GCThreadAffinity::None;
+    affinity.Processor = GCThreadAffinity::None;
+
+#if !defined(FEATURE_PAL)
+    if (!gc_thread_no_affinitize_p)
+    {
+        //We are about to set affinity for GC threads, it is a good place to setup NUMA and
+        //CPU groups, because the process mask, processor number, group number are all
+        //readyly available.
+        if (CPUGroupInfo::CanEnableGCCPUGroups()) 
+            set_thread_group_affinity_for_heap(heap_number, &affinity);
+        else
+            set_thread_affinity_mask_for_heap(heap_number, &affinity);
+    }
+#endif // !FEATURE_PAL
+
+    return GCToOSInterface::CreateThread(gc_thread_stub, this, &affinity);
+}
+
+#ifdef _MSC_VER
+#pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path
+#endif //_MSC_VER
+void gc_heap::gc_thread_function ()
+{
+    assert (gc_done_event.IsValid());
+    assert (gc_start_event.IsValid());
+    dprintf (3, ("gc thread started"));
+
+    heap_select::init_cpu_mapping(this, heap_number);
+
+    while (1)
+    {
+        assert (!gc_t_join.joined());
+
+        if (heap_number == 0)
+        {
+            gc_heap::ee_suspend_event.Wait(INFINITE, FALSE);
+
+            BEGIN_TIMING(suspend_ee_during_log);
+            GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC);
+            END_TIMING(suspend_ee_during_log);
+
+            proceed_with_gc_p = TRUE;
+
+            if (!should_proceed_with_gc())
+            {
+                update_collection_counts_for_no_gc();
+                proceed_with_gc_p = FALSE;
+            }
+            else
+                settings.init_mechanisms();
+            dprintf (3, ("%d gc thread waiting...", heap_number));
+            gc_start_event.Set();
+        }
+        else
+        {
+            gc_start_event.Wait(INFINITE, FALSE);
+            dprintf (3, ("%d gc thread waiting... Done", heap_number));
+        }
+
+        if (proceed_with_gc_p)
+            garbage_collect (GCHeap::GcCondemnedGeneration);
+
+        if (heap_number == 0)
+        {
+            if (proceed_with_gc_p && (!settings.concurrent))
+            {
+                do_post_gc();
+            }
+
+#ifdef BACKGROUND_GC
+            recover_bgc_settings();
+#endif //BACKGROUND_GC
+
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < gc_heap::n_heaps; i++)
+            {
+                gc_heap* hp = gc_heap::g_heaps[i];
+                hp->add_saved_spinlock_info (me_release, mt_block_gc);
+                dprintf (SPINLOCK_LOG, ("[%d]GC Lmsl", i));
+                leave_spin_lock(&hp->more_space_lock);
+            }
+#endif //MULTIPLE_HEAPS
+
+            gc_heap::gc_started = FALSE;
+
+            BEGIN_TIMING(restart_ee_during_log);
+            GCToEEInterface::RestartEE(TRUE);
+            END_TIMING(restart_ee_during_log);
+            process_sync_log_stats();
+
+            dprintf (SPINLOCK_LOG, ("GC Lgc"));
+            leave_spin_lock (&gc_heap::gc_lock);
+
+            gc_heap::internal_gc_done = true;
+
+            set_gc_done();
+        }
+        else
+        {
+            int spin_count = 32 * (g_SystemInfo.dwNumberOfProcessors - 1);
+
+            // wait until RestartEE has progressed to a stage where we can restart user threads
+            while (!gc_heap::internal_gc_done && !GCHeap::SafeToRestartManagedThreads())
+            {
+                spin_and_switch (spin_count, (gc_heap::internal_gc_done || GCHeap::SafeToRestartManagedThreads()));
+            }
+            set_gc_done();
+        }
+    }
+}
+#ifdef _MSC_VER
+#pragma warning(default:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path
+#endif //_MSC_VER
+
+#endif //MULTIPLE_HEAPS
+
+bool virtual_alloc_commit_for_heap(void* addr, size_t size, int h_number)
+{
+#if defined(MULTIPLE_HEAPS) && !defined(FEATURE_REDHAWK) && !defined(FEATURE_PAL)
+    // Currently there is no way for us to specific the numa node to allocate on via hosting interfaces to
+    // a host. This will need to be added later.
+    if (!CLRMemoryHosted())
+    {
+        if (NumaNodeInfo::CanEnableGCNumaAware())
+        {
+            uint32_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
+            void * ret = NumaNodeInfo::VirtualAllocExNuma(GetCurrentProcess(), addr, size, 
+                                                          MEM_COMMIT, PAGE_READWRITE, numa_node);
+            if (ret != NULL)
+                return true;
+        }
+    }
+#else
+    UNREFERENCED_PARAMETER(h_number);
+#endif
+
+    //numa aware not enabled, or call failed --> fallback to VirtualCommit()
+    return GCToOSInterface::VirtualCommit(addr, size);
+}
+
+#ifndef SEG_MAPPING_TABLE
+inline
+heap_segment* gc_heap::segment_of (uint8_t* add, ptrdiff_t& delta, BOOL verify_p)
+{
+    uint8_t* sadd = add;
+    heap_segment* hs = 0;
+    heap_segment* hs1 = 0;
+    if (!((add >= g_lowest_address) && (add < g_highest_address)))
+    {
+        delta = 0;
+        return 0;
+    }
+    //repeat in case there is a concurrent insertion in the table.
+    do
+    {
+        hs = hs1;
+        sadd = add;
+        seg_table->lookup (sadd);
+        hs1 = (heap_segment*)sadd;
+    } while (hs1 && !in_range_for_segment (add, hs1) && (hs != hs1));
+
+    hs = hs1;
+
+    if ((hs == 0) ||
+        (verify_p && (add > heap_segment_reserved ((heap_segment*)(sadd + delta)))))
+        delta = 0;
+    return hs;
+}
+#endif //SEG_MAPPING_TABLE
+
+class mark
+{
+public:
+    uint8_t* first;
+    size_t len;
+
+    // If we want to save space we can have a pool of plug_and_gap's instead of 
+    // always having 2 allocated for each pinned plug.
+    gap_reloc_pair saved_pre_plug;
+    // If we decide to not compact, we need to restore the original values.
+    gap_reloc_pair saved_pre_plug_reloc;
+
+    gap_reloc_pair saved_post_plug;
+
+    // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke 
+    // frames. Also if it's an artificially pinned plug created by us, it can certainly 
+    // have references. 
+    // We know these cases will be rare so we can optimize this to be only allocated on decommand. 
+    gap_reloc_pair saved_post_plug_reloc;
+
+    // We need to calculate this after we are done with plan phase and before compact
+    // phase because compact phase will change the bricks so relocate_address will no 
+    // longer work.
+    uint8_t* saved_pre_plug_info_reloc_start;
+
+    // We need to save this because we will have no way to calculate it, unlike the 
+    // pre plug info start which is right before this plug.
+    uint8_t* saved_post_plug_info_start;
+
+#ifdef SHORT_PLUGS
+    uint8_t* allocation_context_start_region;
+#endif //SHORT_PLUGS
+
+    // How the bits in these bytes are organized:
+    // MSB --> LSB
+    // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit
+    // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0.
+    BOOL saved_pre_p;
+    BOOL saved_post_p;
+
+#ifdef _DEBUG
+    // We are seeing this is getting corrupted for a PP with a NP after.
+    // Save it when we first set it and make sure it doesn't change.
+    gap_reloc_pair saved_post_plug_debug;
+#endif //_DEBUG
+
+    size_t get_max_short_bits()
+    {
+        return (sizeof (gap_reloc_pair) / sizeof (uint8_t*));
+    }
+
+    // pre bits
+    size_t get_pre_short_start_bit ()
+    {
+        return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
+    }
+
+    BOOL pre_short_p()
+    {
+        return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1)));
+    }
+
+    void set_pre_short()
+    {
+        saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1));
+    }
+
+    void set_pre_short_bit (size_t bit)
+    {
+        saved_pre_p |= 1 << (get_pre_short_start_bit() + bit);
+    }
+
+    BOOL pre_short_bit_p (size_t bit)
+    {
+        return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit)));
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    void set_pre_short_collectible()
+    {
+        saved_pre_p |= 2;
+    }
+
+    BOOL pre_short_collectible_p()
+    {
+        return (saved_pre_p & 2);
+    }
+#endif //COLLECTIBLE_CLASS
+
+    // post bits
+    size_t get_post_short_start_bit ()
+    {
+        return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
+    }
+
+    BOOL post_short_p()
+    {
+        return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1)));
+    }
+
+    void set_post_short()
+    {
+        saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1));
+    }
+
+    void set_post_short_bit (size_t bit)
+    {
+        saved_post_p |= 1 << (get_post_short_start_bit() + bit);
+    }
+
+    BOOL post_short_bit_p (size_t bit)
+    {
+        return (saved_post_p & (1 << (get_post_short_start_bit() + bit)));
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    void set_post_short_collectible()
+    {
+        saved_post_p |= 2;
+    }
+
+    BOOL post_short_collectible_p()
+    {
+        return (saved_post_p & 2);
+    }
+#endif //COLLECTIBLE_CLASS
+
+    uint8_t* get_plug_address() { return first; }
+
+    BOOL has_pre_plug_info() { return saved_pre_p; }
+    BOOL has_post_plug_info() { return saved_post_p; }
+
+    gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; }
+    gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; }
+    void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; }
+    uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; }
+
+    // We need to temporarily recover the shortened plugs for compact phase so we can
+    // copy over the whole plug and their related info (mark bits/cards). But we will
+    // need to set the artificial gap back so compact phase can keep reading the plug info.
+    // We also need to recover the saved info because we'll need to recover it later.
+    // 
+    // So we would call swap_p*_plug_and_saved once to recover the object info; then call 
+    // it again to recover the artifical gap.
+    void swap_pre_plug_and_saved()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
+        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
+        saved_pre_plug_reloc = temp;
+    }
+
+    void swap_post_plug_and_saved()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
+        memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
+        saved_post_plug_reloc = temp;
+    }
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    void swap_pre_plug_and_saved_for_profiler()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
+        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
+        saved_pre_plug = temp;
+    }
+
+    void swap_post_plug_and_saved_for_profiler()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
+        memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
+        saved_post_plug = temp;
+    }
+#endif //GC_PROFILING || //FEATURE_EVENT_TRACE
+
+    // We should think about whether it's really necessary to have to copy back the pre plug
+    // info since it was already copied during compacting plugs. But if a plug doesn't move
+    // by < 3 ptr size, it means we'd have to recover pre plug info.
+    void recover_plug_info() 
+    {
+        if (saved_pre_p)
+        {
+            if (gc_heap::settings.compaction)
+            {
+                dprintf (3, ("%Ix: REC Pre: %Ix-%Ix", 
+                    first,
+                    &saved_pre_plug_reloc, 
+                    saved_pre_plug_info_reloc_start));
+                memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
+            }
+            else
+            {
+                dprintf (3, ("%Ix: REC Pre: %Ix-%Ix", 
+                    first,
+                    &saved_pre_plug, 
+                    (first - sizeof (plug_and_gap))));
+                memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
+            }
+        }
+
+        if (saved_post_p)
+        {
+            if (gc_heap::settings.compaction)
+            {
+                dprintf (3, ("%Ix: REC Post: %Ix-%Ix", 
+                    first,
+                    &saved_post_plug_reloc, 
+                    saved_post_plug_info_start));
+                memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
+            }
+            else
+            {
+                dprintf (3, ("%Ix: REC Post: %Ix-%Ix", 
+                    first,
+                    &saved_post_plug, 
+                    saved_post_plug_info_start));
+                memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
+            }
+        }
+    }
+};
+
+
+void gc_mechanisms::init_mechanisms()
+{
+    condemned_generation = 0;
+    promotion = FALSE;//TRUE;
+    compaction = TRUE;
+#ifdef FEATURE_LOH_COMPACTION
+    loh_compaction = gc_heap::should_compact_loh();
+#else
+    loh_compaction = FALSE;
+#endif //FEATURE_LOH_COMPACTION
+    heap_expansion = FALSE;
+    concurrent = FALSE;
+    demotion = FALSE;
+    elevation_reduced = FALSE;
+    found_finalizers = FALSE;
+#ifdef BACKGROUND_GC
+    background_p = recursive_gc_sync::background_running_p() != FALSE;
+    allocations_allowed = TRUE;
+#endif //BACKGROUND_GC
+
+#ifdef BIT64
+    entry_memory_load = 0;
+#endif // BIT64
+
+#ifdef STRESS_HEAP
+    stress_induced = FALSE;
+#endif // STRESS_HEAP
+}
+
+void gc_mechanisms::first_init()
+{
+    gc_index = 0;
+    gen0_reduction_count = 0;
+    should_lock_elevation = FALSE;
+    elevation_locked_count = 0;
+    reason = reason_empty;
+#ifdef BACKGROUND_GC
+    pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch;
+#ifdef _DEBUG
+    int debug_pause_mode = g_pConfig->GetGCLatencyMode();
+    if (debug_pause_mode >= 0)
+    {
+        assert (debug_pause_mode <= pause_sustained_low_latency);
+        pause_mode = (gc_pause_mode)debug_pause_mode;
+    }
+#endif //_DEBUG
+#else //BACKGROUND_GC
+    pause_mode = pause_batch;
+#endif //BACKGROUND_GC
+
+    init_mechanisms();
+}
+
+void gc_mechanisms::record (gc_history_global* history)
+{
+#ifdef MULTIPLE_HEAPS
+    history->num_heaps = gc_heap::n_heaps;
+#else
+    history->num_heaps = 1;
+#endif //MULTIPLE_HEAPS
+
+    history->condemned_generation = condemned_generation;
+    history->gen0_reduction_count = gen0_reduction_count;
+    history->reason = reason;
+    history->pause_mode = (int)pause_mode;
+    history->mem_pressure = entry_memory_load;
+    history->global_mechanims_p = 0;
+
+    // start setting the boolean values.
+    if (concurrent)
+        history->set_mechanism_p (global_concurrent);
+    
+    if (compaction)
+        history->set_mechanism_p (global_compaction);
+
+    if (promotion)
+        history->set_mechanism_p (global_promotion);
+    
+    if (demotion)
+        history->set_mechanism_p (global_demotion);
+
+    if (card_bundles)
+        history->set_mechanism_p (global_card_bundles);
+
+    if (elevation_reduced)
+        history->set_mechanism_p (global_elevation);
+}
+
+/**********************************
+   called at the beginning of GC to fix the allocated size to
+   what is really allocated, or to turn the free area into an unused object
+   It needs to be called after all of the other allocation contexts have been
+   fixed since it relies on alloc_allocated.
+ ********************************/
+
+//for_gc_p indicates that the work is being done for GC,
+//as opposed to concurrent heap verification
+void gc_heap::fix_youngest_allocation_area (BOOL for_gc_p)
+{
+    assert (alloc_allocated);
+    alloc_context* acontext = generation_alloc_context (youngest_generation);
+    dprintf (3, ("generation 0 alloc context: ptr: %Ix, limit %Ix",
+                 (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit));
+    fix_allocation_context (acontext, for_gc_p, get_alignment_constant (TRUE));
+    if (for_gc_p)
+    {
+        acontext->alloc_ptr = alloc_allocated;
+        acontext->alloc_limit = acontext->alloc_ptr;
+    }
+    heap_segment_allocated (ephemeral_heap_segment) =
+        alloc_allocated;
+}
+
+void gc_heap::fix_large_allocation_area (BOOL for_gc_p)
+{
+    UNREFERENCED_PARAMETER(for_gc_p);
+
+#ifdef _DEBUG
+    alloc_context* acontext = 
+#endif // _DEBUG
+        generation_alloc_context (large_object_generation);
+    assert (acontext->alloc_ptr == 0);
+    assert (acontext->alloc_limit == 0); 
+#if 0
+    dprintf (3, ("Large object alloc context: ptr: %Ix, limit %Ix",
+                 (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit));
+    fix_allocation_context (acontext, FALSE, get_alignment_constant (FALSE));
+    if (for_gc_p)
+    {
+        acontext->alloc_ptr = 0;
+        acontext->alloc_limit = acontext->alloc_ptr;
+    }
+#endif //0
+}
+
+//for_gc_p indicates that the work is being done for GC,
+//as opposed to concurrent heap verification
+void gc_heap::fix_allocation_context (alloc_context* acontext, BOOL for_gc_p,
+                                      int align_const)
+{
+    dprintf (3, ("Fixing allocation context %Ix: ptr: %Ix, limit: %Ix",
+                 (size_t)acontext,
+                 (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit));
+
+    if (((size_t)(alloc_allocated - acontext->alloc_limit) > Align (min_obj_size, align_const)) ||
+        !for_gc_p)
+    {
+        uint8_t*  point = acontext->alloc_ptr;
+        if (point != 0)
+        {
+            size_t  size = (acontext->alloc_limit - acontext->alloc_ptr);
+            // the allocation area was from the free list
+            // it was shortened by Align (min_obj_size) to make room for
+            // at least the shortest unused object
+            size += Align (min_obj_size, align_const);
+            assert ((size >= Align (min_obj_size)));
+
+            dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point,
+                       (size_t)point + size ));
+            make_unused_array (point, size);
+
+            if (for_gc_p)
+            {
+                generation_free_obj_space (generation_of (0)) += size;
+                alloc_contexts_used ++;
+            }
+        }
+    }
+    else if (for_gc_p)
+    {
+        alloc_allocated = acontext->alloc_ptr;
+        assert (heap_segment_allocated (ephemeral_heap_segment) <=
+                heap_segment_committed (ephemeral_heap_segment));
+        alloc_contexts_used ++;
+    }
+
+
+    if (for_gc_p)
+    {
+        acontext->alloc_ptr = 0;
+        acontext->alloc_limit = acontext->alloc_ptr;
+    }
+}
+
+//used by the heap verification for concurrent gc.
+//it nulls out the words set by fix_allocation_context for heap_verification
+void repair_allocation (alloc_context* acontext, void*)
+{
+    uint8_t*  point = acontext->alloc_ptr;
+
+    if (point != 0)
+    {
+        dprintf (3, ("Clearing [%Ix, %Ix[", (size_t)acontext->alloc_ptr,
+                     (size_t)acontext->alloc_limit+Align(min_obj_size)));
+        memclr (acontext->alloc_ptr - plug_skew,
+                (acontext->alloc_limit - acontext->alloc_ptr)+Align (min_obj_size));
+    }
+}
+
+void void_allocation (alloc_context* acontext, void*)
+{
+    uint8_t*  point = acontext->alloc_ptr;
+
+    if (point != 0)
+    {
+        dprintf (3, ("Void [%Ix, %Ix[", (size_t)acontext->alloc_ptr,
+                     (size_t)acontext->alloc_limit+Align(min_obj_size)));
+        acontext->alloc_ptr = 0;
+        acontext->alloc_limit = acontext->alloc_ptr;
+    }
+}
+
+void gc_heap::repair_allocation_contexts (BOOL repair_p)
+{
+    GCToEEInterface::GcEnumAllocContexts (repair_p ? repair_allocation : void_allocation, NULL);
+
+    alloc_context* acontext = generation_alloc_context (youngest_generation);
+    if (repair_p)
+        repair_allocation (acontext, NULL);
+    else
+        void_allocation (acontext, NULL);
+}
+
+struct fix_alloc_context_args
+{
+    BOOL for_gc_p;
+    void* heap;
+};
+
+void fix_alloc_context(alloc_context* acontext, void* param)
+{
+    fix_alloc_context_args* args = (fix_alloc_context_args*)param;
+    GCHeap::GetGCHeap()->FixAllocContext(acontext, FALSE, (void*)(size_t)(args->for_gc_p), args->heap);
+}
+
+void gc_heap::fix_allocation_contexts(BOOL for_gc_p)
+{
+    fix_alloc_context_args args;
+    args.for_gc_p = for_gc_p;
+    args.heap = __this;
+    GCToEEInterface::GcEnumAllocContexts(fix_alloc_context, &args);
+
+    fix_youngest_allocation_area(for_gc_p);
+    fix_large_allocation_area(for_gc_p);
+}
+
+void gc_heap::fix_older_allocation_area (generation* older_gen)
+{
+    heap_segment* older_gen_seg = generation_allocation_segment (older_gen);
+    if (generation_allocation_limit (older_gen) !=
+        heap_segment_plan_allocated (older_gen_seg))
+    {
+        uint8_t*  point = generation_allocation_pointer (older_gen);
+
+        size_t  size = (generation_allocation_limit (older_gen) -
+                               generation_allocation_pointer (older_gen));
+        if (size != 0)
+        {
+            assert ((size >= Align (min_obj_size)));
+            dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point, (size_t)point+size));
+            make_unused_array (point, size);
+        }
+    }
+    else
+    {
+        assert (older_gen_seg != ephemeral_heap_segment);
+        heap_segment_plan_allocated (older_gen_seg) =
+            generation_allocation_pointer (older_gen);
+        generation_allocation_limit (older_gen) =
+            generation_allocation_pointer (older_gen);
+    }
+}
+
+void gc_heap::set_allocation_heap_segment (generation* gen)
+{
+    uint8_t* p = generation_allocation_start (gen);
+    assert (p);
+    heap_segment* seg = generation_allocation_segment (gen);
+    if (in_range_for_segment (p, seg))
+        return;
+
+    // try ephemeral heap segment in case of heap expansion
+    seg = ephemeral_heap_segment;
+    if (!in_range_for_segment (p, seg))
+    {
+        seg = heap_segment_rw (generation_start_segment (gen));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        while (!in_range_for_segment (p, seg))
+        {
+            seg = heap_segment_next_rw (seg);
+            PREFIX_ASSUME(seg != NULL);
+        }
+    }
+
+    generation_allocation_segment (gen) = seg;
+}
+
+void gc_heap::reset_allocation_pointers (generation* gen, uint8_t* start)
+{
+    assert (start);
+    assert (Align ((size_t)start) == (size_t)start);
+    generation_allocation_start (gen) = start;
+    generation_allocation_pointer (gen) =  0;//start + Align (min_obj_size);
+    generation_allocation_limit (gen) = 0;//generation_allocation_pointer (gen);
+    set_allocation_heap_segment (gen);
+}
+
+#ifdef BACKGROUND_GC
+//TODO BACKGROUND_GC this is for test only
+void
+gc_heap::disallow_new_allocation (int gen_number)
+{
+    UNREFERENCED_PARAMETER(gen_number);
+    settings.allocations_allowed = FALSE;
+}
+void
+gc_heap::allow_new_allocation (int gen_number)
+{
+    UNREFERENCED_PARAMETER(gen_number);
+    settings.allocations_allowed = TRUE;
+}
+
+#endif //BACKGROUND_GC
+
+bool gc_heap::new_allocation_allowed (int gen_number)
+{
+#ifdef BACKGROUND_GC
+    //TODO BACKGROUND_GC this is for test only
+    if (!settings.allocations_allowed)
+    {
+        dprintf (2, ("new allocation not allowed"));
+        return FALSE;
+    }
+#endif //BACKGROUND_GC
+
+    if (dd_new_allocation (dynamic_data_of (gen_number)) < 0)
+    {
+        if (gen_number != 0)
+        {
+            // For LOH we will give it more budget before we try a GC.
+            if (settings.concurrent)
+            {
+                dynamic_data* dd2 = dynamic_data_of (max_generation + 1 );
+
+                if (dd_new_allocation (dd2) <= (ptrdiff_t)(-2 * dd_desired_allocation (dd2)))
+                {
+                    return TRUE;
+                }
+            }
+        }
+        return FALSE;
+    }
+#ifndef MULTIPLE_HEAPS
+    else if ((settings.pause_mode != pause_no_gc) && (gen_number == 0))
+    {
+        dprintf (3, ("evaluating allocation rate"));
+        dynamic_data* dd0 = dynamic_data_of (0);
+        if ((allocation_running_amount - dd_new_allocation (dd0)) >
+            dd_min_gc_size (dd0))
+        {
+            uint32_t ctime = GCToOSInterface::GetLowPrecisionTimeStamp();
+            if ((ctime - allocation_running_time) > 1000)
+            {
+                dprintf (2, (">1s since last gen0 gc"));
+                return FALSE;
+            }
+            else
+            {
+                allocation_running_amount = dd_new_allocation (dd0);
+            }
+        }
+    }
+#endif //MULTIPLE_HEAPS
+    return TRUE;
+}
+
+inline
+ptrdiff_t gc_heap::get_desired_allocation (int gen_number)
+{
+    return dd_desired_allocation (dynamic_data_of (gen_number));
+}
+
+inline
+ptrdiff_t  gc_heap::get_new_allocation (int gen_number)
+{
+    return dd_new_allocation (dynamic_data_of (gen_number));
+}
+
+//return the amount allocated so far in gen_number
+inline
+ptrdiff_t  gc_heap::get_allocation (int gen_number)
+{
+    dynamic_data* dd = dynamic_data_of (gen_number);
+
+    return dd_desired_allocation (dd) - dd_new_allocation (dd);
+}
+
+inline
+BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len)
+{
+    size_t new_size = max (init_len, 2*len);
+    mark* tmp = new (nothrow) mark [new_size];
+    if (tmp)
+    {
+        memcpy (tmp, m, len * sizeof (mark));
+        delete m;
+        m = tmp;
+        len = new_size;
+        return TRUE;
+    }
+    else
+    {
+        dprintf (1, ("Failed to allocate %Id bytes for mark stack", (len * sizeof (mark))));
+        return FALSE;
+    }
+}
+
+inline
+uint8_t* pinned_plug (mark* m)
+{
+   return m->first;
+}
+
+inline
+size_t& pinned_len (mark* m)
+{
+    return m->len;
+}
+
+inline
+void set_new_pin_info (mark* m, uint8_t* pin_free_space_start)
+{
+    m->len = pinned_plug (m) - pin_free_space_start;
+#ifdef SHORT_PLUGS
+    m->allocation_context_start_region = pin_free_space_start;
+#endif //SHORT_PLUGS
+}
+
+#ifdef SHORT_PLUGS
+inline
+uint8_t*& pin_allocation_context_start_region (mark* m)
+{
+    return m->allocation_context_start_region;
+}
+
+uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry)
+{
+    uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info());
+    uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap)));
+    //dprintf (1, ("detected a very short plug: %Ix before PP %Ix, pad %Ix", 
+    //    old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
+    dprintf (1, ("EP: %Ix(%Ix), %Ix", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
+    return plug_start_in_saved;
+}
+
+inline
+void set_padding_in_expand (uint8_t* old_loc,
+                            BOOL set_padding_on_saved_p,
+                            mark* pinned_plug_entry)
+{
+    if (set_padding_on_saved_p)
+    {
+        set_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry));
+    }
+    else
+    {
+        set_plug_padded (old_loc);
+    }
+}
+
+inline
+void clear_padding_in_expand (uint8_t* old_loc,
+                              BOOL set_padding_on_saved_p,
+                              mark* pinned_plug_entry)
+{
+    if (set_padding_on_saved_p)
+    {
+        clear_plug_padded (get_plug_start_in_saved (old_loc, pinned_plug_entry));
+    }
+    else
+    {
+        clear_plug_padded (old_loc);
+    }
+}
+#endif //SHORT_PLUGS
+
+void gc_heap::reset_pinned_queue()
+{
+    mark_stack_tos = 0;
+    mark_stack_bos = 0;
+}
+
+void gc_heap::reset_pinned_queue_bos()
+{
+    mark_stack_bos = 0;
+}
+
+// last_pinned_plug is only for asserting purpose.
+void gc_heap::merge_with_last_pinned_plug (uint8_t* last_pinned_plug, size_t plug_size)
+{
+    if (last_pinned_plug)
+    {
+        mark& last_m = mark_stack_array[mark_stack_tos - 1];
+        assert (last_pinned_plug == last_m.first);
+        if (last_m.saved_post_p)
+        {
+            last_m.saved_post_p = FALSE;
+            dprintf (3, ("setting last plug %Ix post to false", last_m.first));
+            // We need to recover what the gap has overwritten.
+            memcpy ((last_m.first + last_m.len - sizeof (plug_and_gap)), &(last_m.saved_post_plug), sizeof (gap_reloc_pair));
+        }
+        last_m.len += plug_size;
+        dprintf (3, ("recovered the last part of plug %Ix, setting its plug size to %Ix", last_m.first, last_m.len));
+    }
+}
+
+void gc_heap::set_allocator_next_pin (uint8_t* alloc_pointer, uint8_t*& alloc_limit)
+{
+    dprintf (3, ("sanp: ptr: %Ix, limit: %Ix", alloc_pointer, alloc_limit));
+    dprintf (3, ("oldest %Id: %Ix", mark_stack_bos, pinned_plug (oldest_pin())));
+    if (!(pinned_plug_que_empty_p()))
+    {
+        mark*  oldest_entry = oldest_pin();
+        uint8_t* plug = pinned_plug (oldest_entry);
+        if ((plug >= alloc_pointer) && (plug < alloc_limit))
+        {
+            alloc_limit = pinned_plug (oldest_entry);
+            dprintf (3, ("now setting alloc context: %Ix->%Ix(%Id)",
+                alloc_pointer, alloc_limit, (alloc_limit - alloc_pointer)));
+        }
+    }
+}
+
+void gc_heap::set_allocator_next_pin (generation* gen)
+{
+    dprintf (3, ("SANP: gen%d, ptr; %Ix, limit: %Ix", gen->gen_num, generation_allocation_pointer (gen), generation_allocation_limit (gen)));
+    if (!(pinned_plug_que_empty_p()))
+    {
+        mark*  oldest_entry = oldest_pin();
+        uint8_t* plug = pinned_plug (oldest_entry);
+        if ((plug >= generation_allocation_pointer (gen)) &&
+            (plug <  generation_allocation_limit (gen)))
+        {
+            generation_allocation_limit (gen) = pinned_plug (oldest_entry);
+            dprintf (3, ("SANP: get next pin free space in gen%d for alloc: %Ix->%Ix(%Id)", 
+                gen->gen_num,
+                generation_allocation_pointer (gen), generation_allocation_limit (gen),
+                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+        }
+        else
+            assert (!((plug < generation_allocation_pointer (gen)) &&
+                      (plug >= heap_segment_mem (generation_allocation_segment (gen)))));
+    }
+}
+
+// After we set the info, we increase tos.
+void gc_heap::set_pinned_info (uint8_t* last_pinned_plug, size_t plug_len, uint8_t* alloc_pointer, uint8_t*& alloc_limit)
+{
+    UNREFERENCED_PARAMETER(last_pinned_plug);
+
+    mark& m = mark_stack_array[mark_stack_tos];
+    assert (m.first == last_pinned_plug);
+
+    m.len = plug_len;
+    mark_stack_tos++;
+    set_allocator_next_pin (alloc_pointer, alloc_limit);
+}
+
+// After we set the info, we increase tos.
+void gc_heap::set_pinned_info (uint8_t* last_pinned_plug, size_t plug_len, generation* gen)
+{
+    UNREFERENCED_PARAMETER(last_pinned_plug);
+
+    mark& m = mark_stack_array[mark_stack_tos];
+    assert (m.first == last_pinned_plug);
+
+    m.len = plug_len;
+    mark_stack_tos++;
+    assert (gen != 0);
+    // Why are we checking here? gen is never 0.
+    if (gen != 0)
+    {
+        set_allocator_next_pin (gen);
+    }
+}
+
+size_t gc_heap::deque_pinned_plug ()
+{
+    dprintf (3, ("dequed: %Id", mark_stack_bos));
+    size_t m = mark_stack_bos;
+    mark_stack_bos++;
+    return m;
+}
+
+inline
+mark* gc_heap::pinned_plug_of (size_t bos)
+{
+    return &mark_stack_array [ bos ];
+}
+
+inline
+mark* gc_heap::oldest_pin ()
+{
+    return pinned_plug_of (mark_stack_bos);
+}
+
+inline
+BOOL gc_heap::pinned_plug_que_empty_p ()
+{
+    return (mark_stack_bos == mark_stack_tos);
+}
+
+inline
+mark* gc_heap::before_oldest_pin()
+{
+    if (mark_stack_bos >= 1)
+        return pinned_plug_of (mark_stack_bos-1);
+    else
+        return 0;
+}
+
+inline
+BOOL gc_heap::ephemeral_pointer_p (uint8_t* o)
+{
+    return ((o >= ephemeral_low) && (o < ephemeral_high));
+}
+
+#ifdef MH_SC_MARK
+inline
+int& gc_heap::mark_stack_busy()
+{
+    return  g_mark_stack_busy [(heap_number+2)*HS_CACHE_LINE_SIZE/sizeof(int)];
+}
+#endif //MH_SC_MARK
+
+void gc_heap::make_mark_stack (mark* arr)
+{
+    reset_pinned_queue();
+    mark_stack_array = arr;
+    mark_stack_array_length = MARK_STACK_INITIAL_LENGTH;
+#ifdef MH_SC_MARK
+    mark_stack_busy() = 0;
+#endif //MH_SC_MARK
+}
+
+#ifdef BACKGROUND_GC
+inline
+size_t& gc_heap::bpromoted_bytes(int thread)
+{
+#ifdef MULTIPLE_HEAPS
+    return g_bpromoted [thread*16];
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(thread);
+    return g_bpromoted;
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::make_background_mark_stack (uint8_t** arr)
+{
+    background_mark_stack_array = arr;
+    background_mark_stack_array_length = MARK_STACK_INITIAL_LENGTH;
+    background_mark_stack_tos = arr;
+}
+
+void gc_heap::make_c_mark_list (uint8_t** arr)
+{
+    c_mark_list = arr;
+    c_mark_list_index = 0;
+    c_mark_list_length = 1 + (page_size / MIN_OBJECT_SIZE);
+}
+#endif //BACKGROUND_GC
+
+#if defined (_TARGET_AMD64_)
+#define brick_size ((size_t)4096)
+#else
+#define brick_size ((size_t)2048)
+#endif //_TARGET_AMD64_
+
+inline
+size_t gc_heap::brick_of (uint8_t* add)
+{
+    return (size_t)(add - lowest_address) / brick_size;
+}
+
+inline
+uint8_t* gc_heap::brick_address (size_t brick)
+{
+    return lowest_address + (brick_size * brick);
+}
+
+
+void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end)
+{
+    for (size_t i = brick_of (from);i < brick_of (end); i++)
+        brick_table[i] = 0;
+}
+
+//codes for the brick entries:
+//entry == 0 -> not assigned
+//entry >0 offset is entry-1
+//entry <0 jump back entry bricks
+
+
+inline
+void gc_heap::set_brick (size_t index, ptrdiff_t val)
+{
+    if (val < -32767)
+    {
+        val = -32767;
+    }
+    assert (val < 32767);
+    if (val >= 0)
+        brick_table [index] = (short)val+1;
+    else
+        brick_table [index] = (short)val;
+}
+
+inline
+int gc_heap::brick_entry (size_t index)
+{
+    int val = brick_table [index];
+    if (val == 0)
+    {
+        return -32768;
+    }
+    else if (val < 0)
+    {
+        return val;
+    }
+    else
+        return val-1;
+}
+
+
+inline
+uint8_t* align_on_brick (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1));
+}
+
+inline
+uint8_t* align_lower_brick (uint8_t* add)
+{
+    return (uint8_t*)(((size_t)add) & ~(brick_size - 1));
+}
+
+size_t size_brick_of (uint8_t* from, uint8_t* end)
+{
+    assert (((size_t)from & (brick_size-1)) == 0);
+    assert (((size_t)end  & (brick_size-1)) == 0);
+
+    return ((end - from) / brick_size) * sizeof (short);
+}
+
+inline
+uint8_t* gc_heap::card_address (size_t card)
+{
+    return  (uint8_t*) (card_size * card);
+}
+
+inline
+size_t gc_heap::card_of ( uint8_t* object)
+{
+    return (size_t)(object) / card_size;
+}
+
+inline
+size_t gc_heap::card_to_brick (size_t card)
+{
+    return brick_of (card_address (card));
+}
+
+inline
+uint8_t* align_on_card (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 ));
+}
+inline
+uint8_t* align_on_card_word (uint8_t* add)
+{
+    return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1));
+}
+
+inline
+uint8_t* align_lower_card (uint8_t* add)
+{
+    return (uint8_t*)((size_t)add & ~(card_size-1));
+}
+
+inline
+void gc_heap::clear_card (size_t card)
+{
+    card_table [card_word (card)] =
+        (card_table [card_word (card)] & ~(1 << card_bit (card)));
+    dprintf (3,("Cleared card %Ix [%Ix, %Ix[", card, (size_t)card_address (card),
+              (size_t)card_address (card+1)));
+}
+
+inline
+void gc_heap::set_card (size_t card)
+{
+    card_table [card_word (card)] =
+        (card_table [card_word (card)] | (1 << card_bit (card)));
+}
+
+inline
+void gset_card (size_t card)
+{
+    g_card_table [card_word (card)] |= (1 << card_bit (card));
+}
+
+inline
+BOOL  gc_heap::card_set_p (size_t card)
+{
+    return ( card_table [ card_word (card) ] & (1 << card_bit (card)));
+}
+
+// Returns the number of DWORDs in the card table that cover the
+// range of addresses [from, end[.
+size_t count_card_of (uint8_t* from, uint8_t* end)
+{
+    return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1;
+}
+
+// Returns the number of bytes to allocate for a card table
+// that covers the range of addresses [from, end[.
+size_t size_card_of (uint8_t* from, uint8_t* end)
+{
+    return count_card_of (from, end) * sizeof(uint32_t);
+}
+
+#ifdef CARD_BUNDLE
+
+//The card bundle keeps track of groups of card words
+#define card_bundle_word_width ((size_t)32)
+//how do we express the fact that 32 bits (card_word_width) is one uint32_t?
+#define card_bundle_size ((size_t)(OS_PAGE_SIZE/(sizeof (uint32_t)*card_bundle_word_width)))
+
+inline
+size_t card_bundle_word (size_t cardb)
+{
+    return cardb / card_bundle_word_width;
+}
+
+inline
+uint32_t card_bundle_bit (size_t cardb)
+{
+    return (uint32_t)(cardb % card_bundle_word_width);
+}
+
+size_t align_cardw_on_bundle (size_t cardw)
+{
+    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
+}
+
+size_t cardw_card_bundle (size_t cardw)
+{
+    return cardw/card_bundle_size;
+}
+
+size_t card_bundle_cardw (size_t cardb)
+{
+    return cardb*card_bundle_size;
+}
+
+void gc_heap::card_bundle_clear(size_t cardb)
+{
+    card_bundle_table [card_bundle_word (cardb)] &= ~(1 << card_bundle_bit (cardb));
+    dprintf (3,("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb),
+              (size_t)card_bundle_cardw (cardb+1)));
+//    printf ("Cleared card bundle %Ix\n", cardb);
+}
+
+void gc_heap::card_bundles_set (size_t start_cardb, size_t end_cardb)
+{
+    size_t start_word = card_bundle_word (start_cardb);
+    size_t end_word = card_bundle_word (end_cardb);
+    if (start_word < end_word)
+    {
+        //set the partial words
+        card_bundle_table [start_word] |= highbits (~0u, card_bundle_bit (start_cardb));
+
+        if (card_bundle_bit (end_cardb))
+            card_bundle_table [end_word] |= lowbits (~0u, card_bundle_bit (end_cardb));
+
+        for (size_t i = start_word+1; i < end_word; i++)
+            card_bundle_table [i] = ~0u;
+
+    }
+    else
+    {
+        card_bundle_table [start_word] |= (highbits (~0u, card_bundle_bit (start_cardb)) &
+                                           lowbits (~0u, card_bundle_bit (end_cardb)));
+
+    }
+
+}
+
+BOOL gc_heap::card_bundle_set_p (size_t cardb)
+{
+    return ( card_bundle_table [ card_bundle_word (cardb) ] & (1 << card_bundle_bit (cardb)));
+}
+
+size_t size_card_bundle_of (uint8_t* from, uint8_t* end)
+{
+    //align from to lower
+    from = (uint8_t*)((size_t)from & ~(card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1));
+    //align to to upper
+    end = (uint8_t*)((size_t)(end + (card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1)) &
+                  ~(card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1));
+
+    assert (((size_t)from & ((card_size*card_word_width*card_bundle_size*card_bundle_word_width)-1)) == 0);
+    assert (((size_t)end  & ((card_size*card_word_width*card_bundle_size*card_bundle_word_width)-1)) == 0);
+
+    return ((end - from) / (card_size*card_word_width*card_bundle_size*card_bundle_word_width)) * sizeof (uint32_t);
+}
+
+uint32_t* translate_card_bundle_table (uint32_t* cb)
+{
+    return (uint32_t*)((uint8_t*)cb - ((((size_t)g_lowest_address) / (card_size*card_word_width*card_bundle_size*card_bundle_word_width)) * sizeof (uint32_t)));
+}
+
+void gc_heap::enable_card_bundles ()
+{
+    if (can_use_write_watch_for_card_table() && (!card_bundles_enabled()))
+    {
+        dprintf (3, ("Enabling card bundles"));
+        //set all of the card bundles
+        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
+                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
+        settings.card_bundles = TRUE;
+    }
+}
+
+BOOL gc_heap::card_bundles_enabled ()
+{
+    return settings.card_bundles;
+}
+
+#endif //CARD_BUNDLE
+
+// We don't store seg_mapping_table in card_table_info because there's only always one view.
+class card_table_info
+{
+public:
+    unsigned    recount;
+    uint8_t*    lowest_address;
+    uint8_t*    highest_address;
+    short*      brick_table;
+
+#ifdef CARD_BUNDLE
+    uint32_t*   card_bundle_table;
+#endif //CARD_BUNDLE
+
+    // mark_array is always at the end of the data structure because we
+    // want to be able to make one commit call for everything before it.
+#ifdef MARK_ARRAY
+    uint32_t*   mark_array;
+#endif //MARK_ARRAY
+
+    size_t      size;
+    uint32_t*   next_card_table;
+};
+
+//These are accessors on untranslated cardtable
+inline
+unsigned& card_table_refcount (uint32_t* c_table)
+{
+    return *(unsigned*)((char*)c_table - sizeof (card_table_info));
+}
+
+inline
+uint8_t*& card_table_lowest_address (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address;
+}
+
+uint32_t* translate_card_table (uint32_t* ct)
+{
+    return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t));
+}
+
+inline
+uint8_t*& card_table_highest_address (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address;
+}
+
+inline
+short*& card_table_brick_table (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table;
+}
+
+#ifdef CARD_BUNDLE
+inline
+uint32_t*& card_table_card_bundle_table (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table;
+}
+#endif //CARD_BUNDLE
+
+#ifdef MARK_ARRAY
+/* Support for mark_array */
+
+inline
+uint32_t*& card_table_mark_array (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array;
+}
+
+#ifdef BIT64
+#define mark_bit_pitch ((size_t)16)
+#else
+#define mark_bit_pitch ((size_t)8)
+#endif // BIT64
+#define mark_word_width ((size_t)32)
+#define mark_word_size (mark_word_width * mark_bit_pitch)
+
+inline
+uint8_t* align_on_mark_bit (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1));
+}
+
+inline
+uint8_t* align_lower_mark_bit (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1));
+}
+
+inline
+BOOL is_aligned_on_mark_word (uint8_t* add)
+{
+    return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1)));
+}
+
+inline
+uint8_t* align_on_mark_word (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1));
+}
+
+inline
+uint8_t* align_lower_mark_word (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1));
+}
+
+inline
+size_t mark_bit_of (uint8_t* add)
+{
+    return ((size_t)add / mark_bit_pitch);
+}
+
+inline
+unsigned int mark_bit_bit (size_t mark_bit)
+{
+    return (unsigned int)(mark_bit % mark_word_width);
+}
+
+inline
+size_t mark_bit_word (size_t mark_bit)
+{
+    return (mark_bit / mark_word_width);
+}
+
+inline
+size_t mark_word_of (uint8_t* add)
+{
+    return ((size_t)add) / mark_word_size;
+}
+
+uint8_t* mark_word_address (size_t wd)
+{
+    return (uint8_t*)(wd*mark_word_size);
+}
+
+uint8_t* mark_bit_address (size_t mark_bit)
+{
+    return (uint8_t*)(mark_bit*mark_bit_pitch);
+}
+
+inline
+size_t mark_bit_bit_of (uint8_t* add)
+{
+    return  (((size_t)add / mark_bit_pitch) % mark_word_width);
+}
+
+inline
+unsigned int gc_heap::mark_array_marked(uint8_t* add)
+{
+    return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add));
+}
+
+inline
+BOOL gc_heap::is_mark_bit_set (uint8_t* add)
+{
+    return (mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)));
+}
+
+inline
+void gc_heap::mark_array_set_marked (uint8_t* add)
+{
+    size_t index = mark_word_of (add);
+    uint32_t val = (1 << mark_bit_bit_of (add));
+#ifdef MULTIPLE_HEAPS
+    Interlocked::Or (&(mark_array [index]), val);
+#else
+    mark_array [index] |= val;
+#endif 
+}
+
+inline
+void gc_heap::mark_array_clear_marked (uint8_t* add)
+{
+    mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add));
+}
+
+size_t size_mark_array_of (uint8_t* from, uint8_t* end)
+{
+    assert (((size_t)from & ((mark_word_size)-1)) == 0);
+    assert (((size_t)end  & ((mark_word_size)-1)) == 0);
+    return sizeof (uint32_t)*(((end - from) / mark_word_size));
+}
+
+//In order to eliminate the lowest_address in the mark array
+//computations (mark_word_of, etc) mark_array is offset
+// according to the lowest_address.
+uint32_t* translate_mark_array (uint32_t* ma)
+{
+    return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_lowest_address));
+}
+
+// from and end must be page aligned addresses. 
+void gc_heap::clear_mark_array (uint8_t* from, uint8_t* end, BOOL check_only/*=TRUE*/
+#ifdef FEATURE_BASICFREEZE
+                                , BOOL read_only/*=FALSE*/
+#endif // FEATURE_BASICFREEZE
+                                )
+{
+    if(!gc_can_use_concurrent)
+        return;
+
+#ifdef FEATURE_BASICFREEZE
+    if (!read_only)
+#endif // FEATURE_BASICFREEZE
+    {
+        assert (from == align_on_mark_word (from));
+    }
+    assert (end == align_on_mark_word (end));
+
+#ifdef BACKGROUND_GC
+    uint8_t* current_lowest_address = background_saved_lowest_address;
+    uint8_t* current_highest_address = background_saved_highest_address;
+#else
+    uint8_t* current_lowest_address = lowest_address;
+    uint8_t* current_highest_address = highest_address;
+#endif //BACKGROUND_GC
+
+    //there is a possibility of the addresses to be
+    //outside of the covered range because of a newly allocated
+    //large object segment
+    if ((end <= current_highest_address) && (from >= current_lowest_address))
+    {
+        size_t beg_word = mark_word_of (align_on_mark_word (from));
+        MAYBE_UNUSED_VAR(beg_word);
+        //align end word to make sure to cover the address
+        size_t end_word = mark_word_of (align_on_mark_word (end));
+        MAYBE_UNUSED_VAR(end_word);
+        dprintf (3, ("Calling clearing mark array [%Ix, %Ix[ for addresses [%Ix, %Ix[(%s)",
+                     (size_t)mark_word_address (beg_word),
+                     (size_t)mark_word_address (end_word),
+                     (size_t)from, (size_t)end,
+                     (check_only ? "check_only" : "clear")));
+        if (!check_only)
+        {
+            uint8_t* op = from;
+            while (op < mark_word_address (beg_word))
+            {
+                mark_array_clear_marked (op);
+                op += mark_bit_pitch;
+            }
+
+            memset (&mark_array[beg_word], 0, (end_word - beg_word)*sizeof (uint32_t));
+        }
+#ifdef _DEBUG
+        else
+        {
+            //Beware, it is assumed that the mark array word straddling
+            //start has been cleared before
+            //verify that the array is empty.
+            size_t  markw = mark_word_of (align_on_mark_word (from));
+            size_t  markw_end = mark_word_of (align_on_mark_word (end));
+            while (markw < markw_end)
+            {
+                assert (!(mark_array [markw]));
+                markw++;
+            }
+            uint8_t* p = mark_word_address (markw_end);
+            while (p < end)
+            {
+                assert (!(mark_array_marked (p)));
+                p++;
+            }
+        }
+#endif //_DEBUG
+    }
+}
+#endif //MARK_ARRAY
+
+//These work on untranslated card tables
+inline
+uint32_t*& card_table_next (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table;
+}
+
+inline
+size_t& card_table_size (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size;
+}
+
+void own_card_table (uint32_t* c_table)
+{
+    card_table_refcount (c_table) += 1;
+}
+
+void destroy_card_table (uint32_t* c_table);
+
+void delete_next_card_table (uint32_t* c_table)
+{
+    uint32_t* n_table = card_table_next (c_table);
+    if (n_table)
+    {
+        if (card_table_next (n_table))
+        {
+            delete_next_card_table (n_table);
+        }
+        if (card_table_refcount (n_table) == 0)
+        {
+            destroy_card_table (n_table);
+            card_table_next (c_table) = 0;
+        }
+    }
+}
+
+void release_card_table (uint32_t* c_table)
+{
+    assert (card_table_refcount (c_table) >0);
+    card_table_refcount (c_table) -= 1;
+    if (card_table_refcount (c_table) == 0)
+    {
+        delete_next_card_table (c_table);
+        if (card_table_next (c_table) == 0)
+        {
+            destroy_card_table (c_table);
+            // sever the link from the parent
+            if (&g_card_table[card_word (gcard_of(g_lowest_address))] == c_table)
+            {
+                g_card_table = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                SoftwareWriteWatch::StaticClose();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+            }
+            else
+            {
+                uint32_t* p_table = &g_card_table[card_word (gcard_of(g_lowest_address))];
+                if (p_table)
+                {
+                    while (p_table && (card_table_next (p_table) != c_table))
+                        p_table = card_table_next (p_table);
+                    card_table_next (p_table) = 0;
+                }
+            }
+        }
+    }
+}
+
+void destroy_card_table (uint32_t* c_table)
+{
+//  delete (uint32_t*)&card_table_refcount(c_table);
+
+    GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), card_table_size(c_table));
+    dprintf (2, ("Table Virtual Free : %Ix", (size_t)&card_table_refcount(c_table)));
+}
+
+uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
+{
+    assert (g_lowest_address == start);
+    assert (g_highest_address == end);
+
+    uint32_t virtual_reserve_flags = VirtualReserveFlags::None;
+
+    size_t bs = size_brick_of (start, end);
+    size_t cs = size_card_of (start, end);
+#ifdef MARK_ARRAY
+    size_t ms = (gc_can_use_concurrent ? 
+                 size_mark_array_of (start, end) :
+                 0);
+#else
+    size_t ms = 0;
+#endif //MARK_ARRAY
+
+    size_t cb = 0;
+
+#ifdef CARD_BUNDLE
+    if (can_use_write_watch_for_card_table())
+    {
+        virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
+        cb = size_card_bundle_of (g_lowest_address, g_highest_address);
+    }
+#endif //CARD_BUNDLE
+
+    size_t wws = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    size_t sw_ww_table_offset = 0;
+    if (gc_can_use_concurrent)
+    {
+        size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb;
+        sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table);
+        wws = sw_ww_table_offset - sw_ww_size_before_table + SoftwareWriteWatch::GetTableByteSize(start, end);
+    }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+    size_t st = size_seg_mapping_table_of (g_lowest_address, g_highest_address);
+    size_t st_table_offset = sizeof(card_table_info) + cs + bs + cb + wws;
+    size_t st_table_offset_aligned = align_for_seg_mapping_table (st_table_offset);
+
+    st += (st_table_offset_aligned - st_table_offset);
+#else //GROWABLE_SEG_MAPPING_TABLE
+    size_t st = 0;
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+    // it is impossible for alloc_size to overflow due bounds on each of 
+    // its components.
+    size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms);
+    size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1);
+
+    uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags);
+
+    if (!mem)
+        return 0;
+
+    dprintf (2, ("Init - Card table alloc for %Id bytes: [%Ix, %Ix[",
+                 alloc_size, (size_t)mem, (size_t)(mem+alloc_size)));
+
+    // mark array will be committed separately (per segment).
+    size_t commit_size = alloc_size - ms;
+
+    if (!GCToOSInterface::VirtualCommit (mem, commit_size))
+    {
+        dprintf (2, ("Card table commit failed"));
+        GCToOSInterface::VirtualRelease (mem, alloc_size_aligned);
+        return 0;
+    }
+
+    // initialize the ref count
+    uint32_t* ct = (uint32_t*)(mem+sizeof (card_table_info));
+    card_table_refcount (ct) = 0;
+    card_table_lowest_address (ct) = start;
+    card_table_highest_address (ct) = end;
+    card_table_brick_table (ct) = (short*)((uint8_t*)ct + cs);
+    card_table_size (ct) = alloc_size_aligned;
+    card_table_next (ct) = 0;
+
+#ifdef CARD_BUNDLE
+    card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs);
+#endif //CARD_BUNDLE
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    if (gc_can_use_concurrent)
+    {
+        SoftwareWriteWatch::InitializeUntranslatedTable(mem + sw_ww_table_offset, start);
+    }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+    seg_mapping_table = (seg_mapping*)(mem + st_table_offset_aligned);
+    seg_mapping_table = (seg_mapping*)((uint8_t*)seg_mapping_table - 
+                                        size_seg_mapping_table_of (0, (align_lower_segment (g_lowest_address))));
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+#ifdef MARK_ARRAY
+    if (gc_can_use_concurrent)
+        card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st);
+    else
+        card_table_mark_array (ct) = NULL;
+#endif //MARK_ARRAY
+
+    return translate_card_table(ct);
+}
+
+void gc_heap::set_fgm_result (failure_get_memory f, size_t s, BOOL loh_p)
+{
+#ifdef MULTIPLE_HEAPS
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        hp->fgm_result.set_fgm (f, s, loh_p);
+    }
+#else //MULTIPLE_HEAPS
+    fgm_result.set_fgm (f, s, loh_p);
+#endif //MULTIPLE_HEAPS
+}
+
+//returns 0 for success, -1 otherwise
+// We are doing all the decommitting here because we want to make sure we have
+// enough memory to do so - if we do this during copy_brick_card_table and 
+// and fail to decommit it would make the failure case very complicated to 
+// handle. This way we can waste some decommit if we call this multiple 
+// times before the next FGC but it's easier to handle the failure case.
+int gc_heap::grow_brick_card_tables (uint8_t* start,
+                                     uint8_t* end,
+                                     size_t size,
+                                     heap_segment* new_seg, 
+                                     gc_heap* hp, 
+                                     BOOL loh_p)
+{
+    uint8_t* la = g_lowest_address;
+    uint8_t* ha = g_highest_address;
+    uint8_t* saved_g_lowest_address = min (start, g_lowest_address);
+    uint8_t* saved_g_highest_address = max (end, g_highest_address);
+#ifdef BACKGROUND_GC
+    // This value is only for logging purpose - it's not necessarily exactly what we 
+    // would commit for mark array but close enough for diagnostics purpose.
+    size_t logging_ma_commit_size = size_mark_array_of (0, (uint8_t*)size);
+#endif //BACKGROUND_GC
+
+    // See if the address is already covered
+    if ((la != saved_g_lowest_address ) || (ha != saved_g_highest_address))
+    {
+        {
+            //modify the higest address so the span covered
+            //is twice the previous one.
+            uint8_t* top = (uint8_t*)0 + Align (GCToOSInterface::GetVirtualMemoryLimit());
+            // On non-Windows systems, we get only an approximate value that can possibly be
+            // slightly lower than the saved_g_highest_address.
+            // In such case, we set the top to the saved_g_highest_address so that the
+            // card and brick tables always cover the whole new range.
+            if (top < saved_g_highest_address)
+            {
+                top = saved_g_highest_address;
+            }
+            size_t ps = ha-la;
+#ifdef BIT64
+            if (ps > (uint64_t)200*1024*1024*1024)
+                ps += (uint64_t)100*1024*1024*1024;
+            else
+#endif // BIT64
+                ps *= 2;
+
+            if (saved_g_lowest_address < g_lowest_address)
+            {
+                if (ps > (size_t)g_lowest_address)
+                    saved_g_lowest_address = (uint8_t*)OS_PAGE_SIZE;
+                else
+                {
+                    assert (((size_t)g_lowest_address - ps) >= OS_PAGE_SIZE);
+                    saved_g_lowest_address = min (saved_g_lowest_address, (g_lowest_address - ps));
+                }
+            }
+
+            if (saved_g_highest_address > g_highest_address)
+            {
+                saved_g_highest_address = max ((saved_g_lowest_address + ps), saved_g_highest_address);
+                if (saved_g_highest_address > top)
+                    saved_g_highest_address = top;
+            }
+        }
+        dprintf (GC_TABLE_LOG, ("Growing card table [%Ix, %Ix[",
+                                (size_t)saved_g_lowest_address,
+                                (size_t)saved_g_highest_address));
+
+        bool write_barrier_updated = false;
+        uint32_t virtual_reserve_flags = VirtualReserveFlags::None;
+        uint32_t* saved_g_card_table = g_card_table;
+        uint32_t* ct = 0;
+        uint32_t* translated_ct = 0;
+        short* bt = 0;
+
+        size_t cs = size_card_of (saved_g_lowest_address, saved_g_highest_address);
+        size_t bs = size_brick_of (saved_g_lowest_address, saved_g_highest_address);
+
+#ifdef MARK_ARRAY
+        size_t ms = (gc_heap::gc_can_use_concurrent ? 
+                    size_mark_array_of (saved_g_lowest_address, saved_g_highest_address) :
+                    0);
+#else
+        size_t ms = 0;
+#endif //MARK_ARRAY
+
+        size_t cb = 0;
+
+#ifdef CARD_BUNDLE
+        if (can_use_write_watch_for_card_table())
+        {
+            virtual_reserve_flags = VirtualReserveFlags::WriteWatch;
+            cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address);
+        }
+#endif //CARD_BUNDLE
+
+        size_t wws = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        size_t sw_ww_table_offset = 0;
+        if (gc_can_use_concurrent)
+        {
+            size_t sw_ww_size_before_table = sizeof(card_table_info) + cs + bs + cb;
+            sw_ww_table_offset = SoftwareWriteWatch::GetTableStartByteOffset(sw_ww_size_before_table);
+            wws =
+                sw_ww_table_offset -
+                sw_ww_size_before_table +
+                SoftwareWriteWatch::GetTableByteSize(saved_g_lowest_address, saved_g_highest_address);
+        }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+        size_t st = size_seg_mapping_table_of (saved_g_lowest_address, saved_g_highest_address);
+        size_t st_table_offset = sizeof(card_table_info) + cs + bs + cb + wws;
+        size_t st_table_offset_aligned = align_for_seg_mapping_table (st_table_offset);
+        st += (st_table_offset_aligned - st_table_offset);
+#else //GROWABLE_SEG_MAPPING_TABLE
+        size_t st = 0;
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+        // it is impossible for alloc_size to overflow due bounds on each of 
+        // its components.
+        size_t alloc_size = sizeof (uint8_t)*(sizeof(card_table_info) + cs + bs + cb + wws + st + ms);
+        size_t alloc_size_aligned = Align (alloc_size, g_SystemInfo.dwAllocationGranularity-1);
+        dprintf (GC_TABLE_LOG, ("card table: %Id; brick table: %Id; card bundle: %Id; sw ww table: %Id; seg table: %Id; mark array: %Id",
+                                  cs, bs, cb, wws, st, ms));
+
+        uint8_t* mem = (uint8_t*)GCToOSInterface::VirtualReserve (0, alloc_size_aligned, 0, virtual_reserve_flags);
+
+        if (!mem)
+        {
+            set_fgm_result (fgm_grow_table, alloc_size, loh_p);
+            goto fail;
+        }
+
+        dprintf (GC_TABLE_LOG, ("Table alloc for %Id bytes: [%Ix, %Ix[",
+                                 alloc_size, (size_t)mem, (size_t)((uint8_t*)mem+alloc_size)));
+
+        {   
+            // mark array will be committed separately (per segment).
+            size_t commit_size = alloc_size - ms;
+
+            if (!GCToOSInterface::VirtualCommit (mem, commit_size))
+            {
+                dprintf (GC_TABLE_LOG, ("Table commit failed"));
+                set_fgm_result (fgm_commit_table, commit_size, loh_p);
+                goto fail;
+            }
+        }
+
+        ct = (uint32_t*)(mem + sizeof (card_table_info));
+        card_table_refcount (ct) = 0;
+        card_table_lowest_address (ct) = saved_g_lowest_address;
+        card_table_highest_address (ct) = saved_g_highest_address;
+        card_table_next (ct) = &g_card_table[card_word (gcard_of (la))];
+
+        //clear the card table
+/*
+        memclr ((uint8_t*)ct,
+                (((saved_g_highest_address - saved_g_lowest_address)*sizeof (uint32_t) /
+                  (card_size * card_word_width))
+                 + sizeof (uint32_t)));
+*/
+
+        bt = (short*)((uint8_t*)ct + cs);
+
+        // No initialization needed, will be done in copy_brick_card
+
+        card_table_brick_table (ct) = bt;
+
+#ifdef CARD_BUNDLE
+        card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs);
+        //set all bundle to look at all of the cards
+        memset(card_table_card_bundle_table (ct), 0xFF, cb);
+#endif //CARD_BUNDLE
+
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+        {
+            seg_mapping* new_seg_mapping_table = (seg_mapping*)(mem + st_table_offset_aligned);
+            new_seg_mapping_table = (seg_mapping*)((uint8_t*)new_seg_mapping_table -
+                                              size_seg_mapping_table_of (0, (align_lower_segment (saved_g_lowest_address))));
+            memcpy(&new_seg_mapping_table[seg_mapping_word_of(g_lowest_address)],
+                &seg_mapping_table[seg_mapping_word_of(g_lowest_address)],
+                size_seg_mapping_table_of(g_lowest_address, g_highest_address));
+
+            seg_mapping_table = new_seg_mapping_table;
+        }
+#endif //GROWABLE_SEG_MAPPING_TABLE
+
+#ifdef MARK_ARRAY
+        if(gc_can_use_concurrent)
+            card_table_mark_array (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs + cb + wws + st);
+        else
+            card_table_mark_array (ct) = NULL;
+#endif //MARK_ARRAY
+
+        translated_ct = translate_card_table (ct);
+
+        dprintf (GC_TABLE_LOG, ("card table: %Ix(translated: %Ix), seg map: %Ix, mark array: %Ix", 
+            (size_t)ct, (size_t)translated_ct, (size_t)seg_mapping_table, (size_t)card_table_mark_array (ct)));
+
+#ifdef BACKGROUND_GC
+        if (hp->should_commit_mark_array())
+        {
+            dprintf (GC_TABLE_LOG, ("new low: %Ix, new high: %Ix, latest mark array is %Ix(translate: %Ix)", 
+                                    saved_g_lowest_address, saved_g_highest_address,
+                                    card_table_mark_array (ct),
+                                    translate_mark_array (card_table_mark_array (ct))));
+            uint32_t* new_mark_array = (uint32_t*)((uint8_t*)card_table_mark_array (ct) - size_mark_array_of (0, saved_g_lowest_address));
+            if (!commit_new_mark_array_global (new_mark_array))
+            {
+                dprintf (GC_TABLE_LOG, ("failed to commit portions in the mark array for existing segments"));
+                set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p);
+                goto fail;
+            }
+
+            if (!commit_mark_array_new_seg (hp, new_seg, translated_ct, saved_g_lowest_address))
+            {
+                dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg"));
+                set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p);
+                goto fail;
+            }
+        }
+        else
+        {
+            clear_commit_flag_global();
+        }
+#endif //BACKGROUND_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        if (gc_can_use_concurrent)
+        {
+            // The current design of software write watch requires that the runtime is suspended during resize. Suspending
+            // on resize is preferred because it is a far less frequent operation than GetWriteWatch() / ResetWriteWatch().
+            // Suspending here allows copying dirty state from the old table into the new table, and not have to merge old
+            // table info lazily as done for card tables.
+
+            // Either this thread was the thread that did the suspension which means we are suspended; or this is called
+            // from a GC thread which means we are in a blocking GC and also suspended.
+            BOOL is_runtime_suspended = IsGCThread();
+            if (!is_runtime_suspended)
+            {
+                // Note on points where the runtime is suspended anywhere in this function. Upon an attempt to suspend the
+                // runtime, a different thread may suspend first, causing this thread to block at the point of the suspend call.
+                // So, at any suspend point, externally visible state needs to be consistent, as code that depends on that state
+                // may run while this thread is blocked. This includes updates to g_card_table, g_lowest_address, and
+                // g_highest_address.
+                suspend_EE();
+            }
+
+            g_card_table = translated_ct;
+
+            SoftwareWriteWatch::SetResizedUntranslatedTable(
+                mem + sw_ww_table_offset,
+                saved_g_lowest_address,
+                saved_g_highest_address);
+
+            // Since the runtime is already suspended, update the write barrier here as well.
+            // This passes a bool telling whether we need to switch to the post
+            // grow version of the write barrier.  This test tells us if the new
+            // segment was allocated at a lower address than the old, requiring
+            // that we start doing an upper bounds check in the write barrier.
+            StompWriteBarrierResize(true, la != saved_g_lowest_address);
+            write_barrier_updated = true;
+
+            if (!is_runtime_suspended)
+            {
+                restart_EE();
+            }
+        }
+        else
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        {
+            g_card_table = translated_ct;
+        }
+
+        if (!write_barrier_updated)
+        {
+            // This passes a bool telling whether we need to switch to the post
+            // grow version of the write barrier.  This test tells us if the new
+            // segment was allocated at a lower address than the old, requiring
+            // that we start doing an upper bounds check in the write barrier.
+            // This will also suspend the runtime if the write barrier type needs
+            // to be changed, so we are doing this after all global state has
+            // been updated. See the comment above suspend_EE() above for more
+            // info.
+            StompWriteBarrierResize(!!IsGCThread(), la != saved_g_lowest_address);
+        }
+
+        // We need to make sure that other threads executing checked write barriers
+        // will see the g_card_table update before g_lowest/highest_address updates.
+        // Otherwise, the checked write barrier may AV accessing the old card table
+        // with address that it does not cover. Write barriers access card table 
+        // without memory barriers for performance reasons, so we need to flush 
+        // the store buffers here.
+        GCToOSInterface::FlushProcessWriteBuffers();
+
+        g_lowest_address = saved_g_lowest_address;
+        VolatileStore(&g_highest_address, saved_g_highest_address);
+
+        return 0;
+        
+fail:
+        //cleanup mess and return -1;
+
+        if (mem)
+        {
+            assert(g_card_table == saved_g_card_table);
+
+            //delete (uint32_t*)((uint8_t*)ct - sizeof(card_table_info));
+            if (!GCToOSInterface::VirtualRelease (mem, alloc_size_aligned))
+            {
+                dprintf (GC_TABLE_LOG, ("GCToOSInterface::VirtualRelease failed"));
+                assert (!"release failed");
+            }
+        }
+
+        return -1;
+    }
+    else
+    {
+#ifdef BACKGROUND_GC
+        if (hp->should_commit_mark_array())
+        {
+            dprintf (GC_TABLE_LOG, ("in range new seg %Ix, mark_array is %Ix", new_seg, hp->mark_array));
+            if (!commit_mark_array_new_seg (hp, new_seg))
+            {
+                dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg in range"));
+                set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p);
+                return -1;
+            }
+        }
+#endif //BACKGROUND_GC
+    }
+
+    return 0;
+}
+
+//copy all of the arrays managed by the card table for a page aligned range
+void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
+                                     short* old_brick_table,
+                                     heap_segment* seg,
+                                     uint8_t* start, uint8_t* end)
+{
+    ptrdiff_t brick_offset = brick_of (start) - brick_of (la);
+
+
+    dprintf (2, ("copying tables for range [%Ix %Ix[", (size_t)start, (size_t)end));
+
+    // copy brick table
+    short* brick_start = &brick_table [brick_of (start)];
+    if (old_brick_table)
+    {
+        // segments are always on page boundaries
+        memcpy (brick_start, &old_brick_table[brick_offset],
+                size_brick_of (start, end));
+
+    }
+    else
+    {
+        // This is a large heap, just clear the brick table
+    }
+
+    uint32_t* old_ct = &old_card_table[card_word (card_of (la))];
+#ifdef MARK_ARRAY
+#ifdef BACKGROUND_GC
+    UNREFERENCED_PARAMETER(seg);
+    if (recursive_gc_sync::background_running_p())
+    {
+        uint32_t* old_mark_array = card_table_mark_array (old_ct);
+
+        // We don't need to go through all the card tables here because 
+        // we only need to copy from the GC version of the mark array - when we
+        // mark (even in allocate_large_object) we always use that mark array.
+        if ((card_table_highest_address (old_ct) >= start) &&
+            (card_table_lowest_address (old_ct) <= end))
+        {
+            if ((background_saved_highest_address >= start) &&
+                (background_saved_lowest_address <= end))
+            {
+                //copy the mark bits
+                // segments are always on page boundaries
+                uint8_t* m_start = max (background_saved_lowest_address, start);
+                uint8_t* m_end = min (background_saved_highest_address, end);
+                memcpy (&mark_array[mark_word_of (m_start)],
+                        &old_mark_array[mark_word_of (m_start) - mark_word_of (la)],
+                        size_mark_array_of (m_start, m_end));
+            }
+        }
+        else
+        {
+            //only large segments can be out of range
+            assert (old_brick_table == 0);
+        }
+    }
+#else //BACKGROUND_GC
+    assert (seg != 0);
+    clear_mark_array (start, heap_segment_committed(seg));
+#endif //BACKGROUND_GC
+#endif //MARK_ARRAY
+
+    // n way merge with all of the card table ever used in between
+    uint32_t* ct = card_table_next (&card_table[card_word (card_of(lowest_address))]);
+
+    assert (ct);
+    while (card_table_next (old_ct) != ct)
+    {
+        //copy if old card table contained [start, end[
+        if ((card_table_highest_address (ct) >= end) &&
+            (card_table_lowest_address (ct) <= start))
+        {
+            // or the card_tables
+            uint32_t* dest = &card_table [card_word (card_of (start))];
+            uint32_t* src = &((translate_card_table (ct)) [card_word (card_of (start))]);
+            ptrdiff_t count = count_card_of (start, end);
+            for (int x = 0; x < count; x++)
+            {
+                *dest |= *src;
+                dest++;
+                src++;
+            }
+        }
+        ct = card_table_next (ct);
+    }
+}
+
+//initialize all of the arrays managed by the card table for a page aligned range when an existing ro segment becomes in range
+void gc_heap::init_brick_card_range (heap_segment* seg)
+{
+    dprintf (2, ("initialising tables for range [%Ix %Ix[",
+                 (size_t)heap_segment_mem (seg),
+                 (size_t)heap_segment_allocated (seg)));
+
+    // initialize the brick table
+    for (size_t b = brick_of (heap_segment_mem (seg));
+         b < brick_of (align_on_brick (heap_segment_allocated (seg)));
+         b++)
+    {
+        set_brick (b, -1);
+    }
+
+#ifdef MARK_ARRAY
+    if (recursive_gc_sync::background_running_p() && (seg->flags & heap_segment_flags_ma_committed))
+    {
+        assert (seg != 0);
+        clear_mark_array (heap_segment_mem (seg), heap_segment_committed(seg));
+    }
+#endif //MARK_ARRAY
+
+    clear_card_for_addresses (heap_segment_mem (seg),
+                              heap_segment_allocated (seg));
+}
+
+void gc_heap::copy_brick_card_table()
+{
+    uint8_t* la = lowest_address;
+    uint8_t* ha = highest_address;
+    MAYBE_UNUSED_VAR(ha);
+    uint32_t* old_card_table = card_table;
+    short* old_brick_table = brick_table;
+
+    assert (la == card_table_lowest_address (&old_card_table[card_word (card_of (la))]));
+    assert (ha == card_table_highest_address (&old_card_table[card_word (card_of (la))]));
+
+    /* todo: Need a global lock for this */
+    uint32_t* ct = &g_card_table[card_word (gcard_of (g_lowest_address))];
+    own_card_table (ct);
+    card_table = translate_card_table (ct);
+    /* End of global lock */
+    highest_address = card_table_highest_address (ct);
+    lowest_address = card_table_lowest_address (ct);
+
+    brick_table = card_table_brick_table (ct);
+
+#ifdef MARK_ARRAY
+    if (gc_can_use_concurrent)
+    {
+        mark_array = translate_mark_array (card_table_mark_array (ct));
+        assert (mark_word_of (g_highest_address) ==
+            mark_word_of (align_on_mark_word (g_highest_address)));
+    }
+    else
+        mark_array = NULL;
+#endif //MARK_ARRAY
+
+#ifdef CARD_BUNDLE
+#if defined(MARK_ARRAY) && defined(_DEBUG)
+#ifdef GROWABLE_SEG_MAPPING_TABLE
+    size_t st = size_seg_mapping_table_of (g_lowest_address, g_highest_address);
+#else  //GROWABLE_SEG_MAPPING_TABLE
+    size_t st = 0;
+#endif //GROWABLE_SEG_MAPPING_TABLE
+#endif //MARK_ARRAY && _DEBUG
+    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct));
+    assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_lowest_address))))] ==
+            card_table_card_bundle_table (ct));
+
+    //set the card table if we are in a heap growth scenario
+    if (card_bundles_enabled())
+    {
+        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
+                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
+    }
+    //check if we need to turn on card_bundles.
+#ifdef MULTIPLE_HEAPS
+    // use INT64 arithmetic here because of possible overflow on 32p
+    uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*gc_heap::n_heaps;
+#else
+    // use INT64 arithmetic here because of possible overflow on 32p
+    uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE;
+#endif //MULTIPLE_HEAPS
+    if (reserved_memory >= th)
+    {
+        enable_card_bundles();
+    }
+
+#endif //CARD_BUNDLE
+
+    // for each of the segments and heaps, copy the brick table and
+    // or the card table
+    heap_segment* seg = generation_start_segment (generation_of (max_generation));
+    while (seg)
+    {
+        if (heap_segment_read_only_p (seg) && !heap_segment_in_range_p (seg))
+        {
+            //check if it became in range
+            if ((heap_segment_reserved (seg) > lowest_address) &&
+                (heap_segment_mem (seg) < highest_address))
+            {
+                set_ro_segment_in_range (seg);
+            }
+        }
+        else
+        {
+
+            uint8_t* end = align_on_page (heap_segment_allocated (seg));
+            copy_brick_card_range (la, old_card_table,
+                                   old_brick_table,
+                                   seg,
+                                   align_lower_page (heap_segment_mem (seg)),
+                                   end);
+        }
+        seg = heap_segment_next (seg);
+    }
+
+    seg = generation_start_segment (large_object_generation);
+    while (seg)
+    {
+        if (heap_segment_read_only_p (seg) && !heap_segment_in_range_p (seg))
+        {
+            //check if it became in range
+            if ((heap_segment_reserved (seg) > lowest_address) &&
+                (heap_segment_mem (seg) < highest_address))
+            {
+                set_ro_segment_in_range (seg);
+            }
+        }
+        else
+        {
+            uint8_t* end = align_on_page (heap_segment_allocated (seg));
+            copy_brick_card_range (la, old_card_table,
+                                   0,
+                                   seg,
+                                   align_lower_page (heap_segment_mem (seg)),
+                                   end);
+        }
+        seg = heap_segment_next (seg);
+    }
+
+    release_card_table (&old_card_table[card_word (card_of(la))]);
+}
+
+#ifdef FEATURE_BASICFREEZE
+BOOL gc_heap::insert_ro_segment (heap_segment* seg)
+{
+    enter_spin_lock (&gc_heap::gc_lock);
+
+    if (!gc_heap::seg_table->ensure_space_for_insert ()
+        || (should_commit_mark_array() && !commit_mark_array_new_seg(__this, seg)))
+    {
+        leave_spin_lock(&gc_heap::gc_lock);
+        return FALSE;
+    }
+
+    //insert at the head of the segment list
+    generation* gen2 = generation_of (max_generation);
+    heap_segment* oldhead = generation_start_segment (gen2);
+    heap_segment_next (seg) = oldhead;
+    generation_start_segment (gen2) = seg;
+
+    seg_table->insert (heap_segment_mem(seg), (size_t)seg);
+
+#ifdef SEG_MAPPING_TABLE
+    seg_mapping_table_add_ro_segment (seg);
+#endif //SEG_MAPPING_TABLE
+
+    //test if in range
+    if ((heap_segment_reserved (seg) > lowest_address) &&
+        (heap_segment_mem (seg) < highest_address))
+    {
+        set_ro_segment_in_range (seg);
+    }
+
+    FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(seg), (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), ETW::GCLog::ETW_GC_INFO::READ_ONLY_HEAP, GetClrInstanceId());
+
+    leave_spin_lock (&gc_heap::gc_lock);
+    return TRUE;
+}
+
+// No one is calling this function right now. If this is getting called we need
+// to take care of decommitting the mark array for it - we will need to remember
+// which portion of the mark array was committed and only decommit that.
+void gc_heap::remove_ro_segment (heap_segment* seg)
+{
+//clear the mark bits so a new segment allocated in its place will have a clear mark bits
+#ifdef MARK_ARRAY
+    if (gc_can_use_concurrent)
+    {
+        clear_mark_array (align_lower_mark_word (max (heap_segment_mem (seg), lowest_address)),
+                      align_on_card_word (min (heap_segment_allocated (seg), highest_address)),
+                      false); // read_only segments need the mark clear
+    }
+#endif //MARK_ARRAY
+
+    enter_spin_lock (&gc_heap::gc_lock);
+
+    seg_table->remove ((uint8_t*)seg);
+
+#ifdef SEG_MAPPING_TABLE
+    seg_mapping_table_remove_ro_segment (seg);
+#endif //SEG_MAPPING_TABLE
+
+    // Locate segment (and previous segment) in the list.
+    generation* gen2 = generation_of (max_generation);
+    heap_segment* curr_seg = generation_start_segment (gen2);
+    heap_segment* prev_seg = NULL;
+
+    while (curr_seg && curr_seg != seg)
+    {
+        prev_seg = curr_seg;
+        curr_seg = heap_segment_next (curr_seg);
+    }
+    assert (curr_seg == seg);
+
+    // Patch previous segment (or list head if there is none) to skip the removed segment.
+    if (prev_seg)
+        heap_segment_next (prev_seg) = heap_segment_next (curr_seg);
+    else
+        generation_start_segment (gen2) = heap_segment_next (curr_seg);
+
+    leave_spin_lock (&gc_heap::gc_lock);
+}
+#endif //FEATURE_BASICFREEZE
+
+BOOL gc_heap::set_ro_segment_in_range (heap_segment* seg)
+{
+    //set it in range
+    seg->flags |= heap_segment_flags_inrange;
+//    init_brick_card_range (seg);
+    ro_segments_in_range = TRUE;
+    //right now, segments aren't protected
+    //unprotect_segment (seg);
+    return TRUE;
+}
+
+#ifdef MARK_LIST
+
+uint8_t** make_mark_list (size_t size)
+{
+    uint8_t** mark_list = new (nothrow) uint8_t* [size];
+    return mark_list;
+}
+
+#define swap(a,b){uint8_t* t; t = a; a = b; b = t;}
+
+void verify_qsort_array (uint8_t* *low, uint8_t* *high)
+{
+    uint8_t **i = 0;
+
+    for (i = low+1; i <= high; i++)
+    {
+        if (*i < *(i-1))
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+}
+
+#ifndef USE_INTROSORT
+void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth)
+{
+    if (((low + 16) >= high) || (depth > 100))
+    {
+        //insertion sort
+        uint8_t **i, **j;
+        for (i = low+1; i <= high; i++)
+        {
+            uint8_t* val = *i;
+            for (j=i;j >low && val<*(j-1);j--)
+            {
+                *j=*(j-1);
+            }
+            *j=val;
+        }
+    }
+    else
+    {
+        uint8_t *pivot, **left, **right;
+
+        //sort low middle and high
+        if (*(low+((high-low)/2)) < *low)
+            swap (*(low+((high-low)/2)), *low);
+        if (*high < *low)
+            swap (*low, *high);
+        if (*high < *(low+((high-low)/2)))
+            swap (*(low+((high-low)/2)), *high);
+
+        swap (*(low+((high-low)/2)), *(high-1));
+        pivot =  *(high-1);
+        left = low; right = high-1;
+        while (1) {
+            while (*(--right) > pivot);
+            while (*(++left)  < pivot);
+            if (left < right)
+            {
+                swap(*left, *right);
+            }
+            else
+                break;
+        }
+        swap (*left, *(high-1));
+        qsort1(low, left-1, depth+1);
+        qsort1(left+1, high, depth+1);
+    }
+}
+#endif //USE_INTROSORT
+void rqsort1( uint8_t* *low, uint8_t* *high)
+{
+    if ((low + 16) >= high)
+    {
+        //insertion sort
+        uint8_t **i, **j;
+        for (i = low+1; i <= high; i++)
+        {
+            uint8_t* val = *i;
+            for (j=i;j >low && val>*(j-1);j--)
+            {
+                *j=*(j-1);
+            }
+            *j=val;
+        }
+    }
+    else
+    {
+        uint8_t *pivot, **left, **right;
+
+        //sort low middle and high
+        if (*(low+((high-low)/2)) > *low)
+            swap (*(low+((high-low)/2)), *low);
+        if (*high > *low)
+            swap (*low, *high);
+        if (*high > *(low+((high-low)/2)))
+            swap (*(low+((high-low)/2)), *high);
+
+        swap (*(low+((high-low)/2)), *(high-1));
+        pivot =  *(high-1);
+        left = low; right = high-1;
+        while (1) {
+            while (*(--right) < pivot);
+            while (*(++left)  > pivot);
+            if (left < right)
+            {
+                swap(*left, *right);
+            }
+            else
+                break;
+        }
+        swap (*left, *(high-1));
+        rqsort1(low, left-1);
+        rqsort1(left+1, high);
+    }
+}
+
+#ifdef USE_INTROSORT
+class introsort 
+{
+
+private: 
+    static const int size_threshold = 64;
+    static const int max_depth = 100;
+
+
+inline static void swap_elements(uint8_t** i,uint8_t** j)
+    {
+        uint8_t* t=*i;
+        *i=*j; 
+        *j=t;
+    }
+
+public:
+    static void sort (uint8_t** begin, uint8_t** end, int ignored)
+    {
+        ignored = 0;
+        introsort_loop (begin, end, max_depth);
+        insertionsort (begin, end);
+    }
+
+private: 
+
+    static void introsort_loop (uint8_t** lo, uint8_t** hi, int depth_limit)
+    {
+        while (hi-lo >= size_threshold)
+        {
+            if (depth_limit == 0)
+            {
+                heapsort (lo, hi);
+                return;
+            }
+            uint8_t** p=median_partition (lo, hi);
+            depth_limit=depth_limit-1;
+            introsort_loop (p, hi, depth_limit);
+            hi=p-1;
+        }        
+    }
+
+    static uint8_t** median_partition (uint8_t** low, uint8_t** high)
+    {
+        uint8_t *pivot, **left, **right;
+
+        //sort low middle and high
+        if (*(low+((high-low)/2)) < *low)
+            swap_elements ((low+((high-low)/2)), low);
+        if (*high < *low)
+            swap_elements (low, high);
+        if (*high < *(low+((high-low)/2)))
+            swap_elements ((low+((high-low)/2)), high);
+
+        swap_elements ((low+((high-low)/2)), (high-1));
+        pivot =  *(high-1);
+        left = low; right = high-1;
+        while (1) {
+            while (*(--right) > pivot);
+            while (*(++left)  < pivot);
+            if (left < right)
+            {
+                swap_elements(left, right);
+            }
+            else
+                break;
+        }
+        swap_elements (left, (high-1));
+        return left;
+    }
+
+
+    static void insertionsort (uint8_t** lo, uint8_t** hi)
+    {
+        for (uint8_t** i=lo+1; i <= hi; i++)
+        {
+            uint8_t** j = i;
+            uint8_t* t = *i;
+            while((j > lo) && (t <*(j-1)))
+            {
+                *j = *(j-1);
+                j--;
+            }
+            *j = t;
+        }
+    }
+
+    static void heapsort (uint8_t** lo, uint8_t** hi)
+    { 
+        size_t n = hi - lo + 1;
+        for (size_t i=n / 2; i >= 1; i--)
+        {
+            downheap (i,n,lo);
+        }
+        for (size_t i = n; i > 1; i--)
+        {
+            swap_elements (lo, lo + i - 1);
+            downheap(1, i - 1,  lo);
+        }
+    }
+
+    static void downheap (size_t i, size_t n, uint8_t** lo)
+    {
+        uint8_t* d = *(lo + i - 1);
+        size_t child;
+        while (i <= n / 2)
+        {
+            child = 2*i;
+            if (child < n && *(lo + child - 1)<(*(lo + child)))
+            {
+                child++;
+            }
+            if (!(d<*(lo + child - 1))) 
+            {
+                break;
+            }
+            *(lo + i - 1) = *(lo + child - 1);
+            i = child;
+        }
+        *(lo + i - 1) = d;
+    }
+
+};
+
+#endif //USE_INTROSORT    
+
+#ifdef MULTIPLE_HEAPS
+#ifdef PARALLEL_MARK_LIST_SORT
+void gc_heap::sort_mark_list()
+{
+    // if this heap had a mark list overflow, we don't do anything
+    if (mark_list_index > mark_list_end)
+    {
+//        printf("sort_mark_list: overflow on heap %d\n", heap_number);
+        return;
+    }
+
+    // if any other heap had a mark list overflow, we fake one too,
+    // so we don't use an incomplete mark list by mistake
+    for (int i = 0; i < n_heaps; i++)
+    {
+        if (g_heaps[i]->mark_list_index > g_heaps[i]->mark_list_end)
+        {
+            mark_list_index = mark_list_end + 1;
+//            printf("sort_mark_list: overflow on heap %d\n", i);
+            return;
+        }
+    }
+
+//    unsigned long start = GetCycleCount32();
+
+    dprintf (3, ("Sorting mark lists"));
+    if (mark_list_index > mark_list)
+        _sort (mark_list, mark_list_index - 1, 0);
+
+//    printf("first phase of sort_mark_list for heap %d took %u cycles to sort %u entries\n", this->heap_number, GetCycleCount32() - start, mark_list_index - mark_list);
+//    start = GetCycleCount32();
+
+    // first set the pieces for all heaps to empty
+    int heap_num;
+    for (heap_num = 0; heap_num < n_heaps; heap_num++)
+    {
+        mark_list_piece_start[heap_num] = NULL;
+        mark_list_piece_end[heap_num] = NULL;
+    }
+
+    uint8_t** x = mark_list;
+
+// predicate means: x is still within the mark list, and within the bounds of this heap
+#define predicate(x) (((x) < mark_list_index) && (*(x) < heap->ephemeral_high))
+
+    heap_num = -1;
+    while (x < mark_list_index)
+    {
+        gc_heap* heap;
+        // find the heap x points into - searching cyclically from the last heap,
+        // because in many cases the right heap is the next one or comes soon after
+        int last_heap_num = heap_num;
+        MAYBE_UNUSED_VAR(last_heap_num);
+        do
+        {
+            heap_num++;
+            if (heap_num >= n_heaps)
+                heap_num = 0;
+            assert(heap_num != last_heap_num); // we should always find the heap - infinite loop if not!
+            heap = g_heaps[heap_num];
+        }
+        while (!(*x >= heap->ephemeral_low && *x < heap->ephemeral_high));
+
+        // x is the start of the mark list piece for this heap
+        mark_list_piece_start[heap_num] = x;
+
+        // to find the end of the mark list piece for this heap, find the first x
+        // that has !predicate(x), i.e. that is either not in this heap, or beyond the end of the list
+        if (predicate(x))
+        {
+            // let's see if we get lucky and the whole rest belongs to this piece
+            if (predicate(mark_list_index-1))
+            {
+                x = mark_list_index;
+                mark_list_piece_end[heap_num] = x;
+                break;
+            }
+
+            // we play a variant of binary search to find the point sooner.
+            // the first loop advances by increasing steps until the predicate turns false.
+            // then we retreat the last step, and the second loop advances by decreasing steps, keeping the predicate true.
+            unsigned inc = 1;
+            do
+            {
+                inc *= 2;
+                uint8_t** temp_x = x;
+                x += inc;
+                if (temp_x > x)
+                {
+                    break;
+                }
+            }
+            while (predicate(x));
+            // we know that only the last step was wrong, so we undo it
+            x -= inc;
+            do
+            {
+                // loop invariant - predicate holds at x, but not x + inc
+                assert (predicate(x) && !(((x + inc) > x) && predicate(x + inc)));
+                inc /= 2;
+                if (((x + inc) > x) && predicate(x + inc))
+                {
+                    x += inc;
+                }
+            }
+            while (inc > 1);
+            // the termination condition and the loop invariant together imply this:
+            assert(predicate(x) && !predicate(x + inc) && (inc == 1));
+            // so the spot we're looking for is one further
+            x += 1;
+        }
+        mark_list_piece_end[heap_num] = x;
+    }
+
+#undef predicate
+
+//    printf("second phase of sort_mark_list for heap %d took %u cycles\n", this->heap_number, GetCycleCount32() - start);
+}
+
+void gc_heap::append_to_mark_list(uint8_t **start, uint8_t **end)
+{
+    size_t slots_needed = end - start;
+    size_t slots_available = mark_list_end + 1 - mark_list_index;
+    size_t slots_to_copy = min(slots_needed, slots_available);
+    memcpy(mark_list_index, start, slots_to_copy*sizeof(*start));
+    mark_list_index += slots_to_copy;
+//    printf("heap %d: appended %Id slots to mark_list\n", heap_number, slots_to_copy);
+}
+
+void gc_heap::merge_mark_lists()
+{
+    uint8_t** source[MAX_SUPPORTED_CPUS];
+    uint8_t** source_end[MAX_SUPPORTED_CPUS];
+    int source_heap[MAX_SUPPORTED_CPUS];
+    int source_count = 0;
+
+    // in case of mark list overflow, don't bother
+    if (mark_list_index >  mark_list_end)
+    {
+//        printf("merge_mark_lists: overflow\n");
+        return;
+    }
+
+    dprintf(3, ("merge_mark_lists: heap_number = %d  starts out with %Id entries", heap_number, mark_list_index - mark_list));
+//    unsigned long start = GetCycleCount32();
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* heap = g_heaps[i];
+        if (heap->mark_list_piece_start[heap_number] < heap->mark_list_piece_end[heap_number])
+        {
+            source[source_count] = heap->mark_list_piece_start[heap_number];
+            source_end[source_count] = heap->mark_list_piece_end[heap_number];
+            source_heap[source_count] = i;
+            if (source_count < MAX_SUPPORTED_CPUS)
+                source_count++;
+        }
+    }
+//    printf("first phase of merge_mark_lists for heap %d took %u cycles\n", heap_number, GetCycleCount32() - start);
+
+    dprintf(3, ("heap_number = %d  has %d sources\n", heap_number, source_count));
+#if defined(_DEBUG) || defined(TRACE_GC)
+    for (int j = 0; j < source_count; j++)
+    {
+        dprintf(3, ("heap_number = %d  ", heap_number));
+        dprintf(3, (" source from heap %d = %Ix .. %Ix (%Id entries)",
+            (size_t)(source_heap[j]), (size_t)(source[j][0]), (size_t)(source_end[j][-1]), (size_t)(source_end[j] - source[j])));
+       // the sources should all be sorted
+        for (uint8_t **x = source[j]; x < source_end[j] - 1; x++)
+        {
+            if (x[0] > x[1])
+            {
+                dprintf(3, ("oops, mark_list from source %d for heap %d isn't sorted\n", j, heap_number));
+                assert (0);
+            }
+        }
+    }
+#endif //_DEBUG || TRACE_GC
+
+//    start = GetCycleCount32();
+
+    mark_list = &g_mark_list_copy [heap_number*mark_list_size];
+    mark_list_index = mark_list;
+    mark_list_end = &mark_list [mark_list_size-1];
+    int piece_count = 0;
+    if (source_count == 0)
+    {
+        ; // nothing to do
+    }
+    else if (source_count == 1)
+    {
+        mark_list = source[0];
+        mark_list_index = source_end[0];
+        mark_list_end = mark_list_index;
+        piece_count++;
+    }
+    else
+    {
+        while (source_count > 1)
+        {
+            // find the lowest and second lowest value in the sources we're merging from
+            int lowest_source = 0;
+            uint8_t *lowest = *source[0];
+            uint8_t *second_lowest = *source[1];
+            for (int i = 1; i < source_count; i++)
+            {
+                if (lowest > *source[i])
+                {
+                    second_lowest = lowest;
+                    lowest = *source[i];
+                    lowest_source = i;
+                }
+                else if (second_lowest > *source[i])
+                {
+                    second_lowest = *source[i];
+                }
+            }
+
+            // find the point in the lowest source where it either runs out or is not <= second_lowest anymore
+
+            // let's first try to get lucky and see if the whole source is <= second_lowest -- this is actually quite common
+            uint8_t **x;
+            if (source_end[lowest_source][-1] <= second_lowest)
+                x = source_end[lowest_source];
+            else
+            {
+                // use linear search to find the end -- could also use binary search as in sort_mark_list,
+                // but saw no improvement doing that
+                for (x = source[lowest_source]; x < source_end[lowest_source] && *x <= second_lowest; x++)
+                    ;
+            }
+
+            // blast this piece to the mark list
+            append_to_mark_list(source[lowest_source], x);
+            piece_count++;
+
+            source[lowest_source] = x;
+
+            // check whether this source is now exhausted
+            if (x >= source_end[lowest_source])
+            {
+                // if it's not the source with the highest index, copy the source with the highest index
+                // over it so the non-empty sources are always at the beginning
+                if (lowest_source < source_count-1)
+                {
+                    source[lowest_source] = source[source_count-1];
+                    source_end[lowest_source] = source_end[source_count-1];
+                }
+                source_count--;
+            }
+        }
+        // we're left with just one source that we copy
+        append_to_mark_list(source[0], source_end[0]);
+        piece_count++;
+    }
+
+//    printf("second phase of merge_mark_lists for heap %d took %u cycles to merge %d pieces\n", heap_number, GetCycleCount32() - start, piece_count);
+
+#if defined(_DEBUG) || defined(TRACE_GC)
+    // the final mark list must be sorted
+    for (uint8_t **x = mark_list; x < mark_list_index - 1; x++)
+    {
+        if (x[0] > x[1])
+        {
+            dprintf(3, ("oops, mark_list for heap %d isn't sorted at the end of merge_mark_lists", heap_number));
+            assert (0);
+        }
+    }
+#endif //defined(_DEBUG) || defined(TRACE_GC)
+}
+#else //PARALLEL_MARK_LIST_SORT
+void gc_heap::combine_mark_lists()
+{
+    dprintf (3, ("Combining mark lists"));
+    //verify if a heap has overflowed its mark list
+    BOOL use_mark_list = TRUE;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        if (g_heaps [i]->mark_list_index >  g_heaps [i]->mark_list_end)
+        {
+            use_mark_list = FALSE;
+            break;
+        }
+    }
+
+    if (use_mark_list)
+    {
+        dprintf (3, ("Using mark list"));
+        //compact the gaps out of the mark list
+        int gn = 0;
+        uint8_t** current_gap = g_heaps [gn]->mark_list_index;
+        uint8_t** current_gap_end = g_heaps[gn]->mark_list_end + 1;
+        uint8_t** dst_last = current_gap-1;
+
+        int srcn = n_heaps-1;
+        gc_heap* srch = g_heaps [srcn];
+        uint8_t** src = srch->mark_list_index - 1;
+        uint8_t** src_beg = srch->mark_list;
+
+        while (current_gap <= src)
+        {
+            while ((gn < n_heaps-1) && (current_gap >= current_gap_end))
+            {
+                //go to the next gap
+                gn++;
+                dprintf (3, ("Going to the next gap %d", gn));
+                assert (gn < n_heaps);
+                current_gap = g_heaps [gn]->mark_list_index;
+                current_gap_end = g_heaps[gn]->mark_list_end + 1;
+                assert ((gn == (n_heaps-1)) || (current_gap_end == g_heaps[gn+1]->mark_list));
+            }
+            while ((srcn > 0) && (src < src_beg))
+            {
+                //go to the previous source
+                srcn--;
+                dprintf (3, ("going to the previous source %d", srcn));
+                assert (srcn>=0);
+                gc_heap* srch = g_heaps [srcn];
+                src = srch->mark_list_index - 1;
+                src_beg = srch->mark_list;
+            }
+            if (current_gap < src)
+            {
+                dst_last = current_gap;
+                *current_gap++ = *src--;
+            }
+        }
+        dprintf (3, ("src: %Ix dst_last: %Ix", (size_t)src, (size_t)dst_last));
+
+        uint8_t** end_of_list = max (src, dst_last);
+
+        //sort the resulting compacted list
+        assert (end_of_list < &g_mark_list [n_heaps*mark_list_size]);
+        if (end_of_list > &g_mark_list[0])
+            _sort (&g_mark_list[0], end_of_list, 0);
+        //adjust the mark_list to the begining of the resulting mark list.
+        for (int i = 0; i < n_heaps; i++)
+        {
+            g_heaps [i]->mark_list = g_mark_list;
+            g_heaps [i]->mark_list_index = end_of_list + 1;
+            g_heaps [i]->mark_list_end = end_of_list + 1;
+        }
+    }
+    else
+    {
+        uint8_t** end_of_list = g_mark_list;
+        //adjust the mark_list to the begining of the resulting mark list.
+        //put the index beyond the end to turn off mark list processing
+        for (int i = 0; i < n_heaps; i++)
+        {
+            g_heaps [i]->mark_list = g_mark_list;
+            g_heaps [i]->mark_list_index = end_of_list + 1;
+            g_heaps [i]->mark_list_end = end_of_list;
+        }
+    }
+}
+#endif // PARALLEL_MARK_LIST_SORT
+#endif //MULTIPLE_HEAPS
+#endif //MARK_LIST
+
+#ifdef BIT64
+#define TOTAL_TIMES_TO_SHIFT 6
+#else
+#define TOTAL_TIMES_TO_SHIFT 5
+#endif // BIT64
+
+size_t round_up_power2 (size_t size)
+{
+    unsigned short shift = 1;
+    size_t shifted = 0;
+
+    size--;
+    for (unsigned short i = 0; i < TOTAL_TIMES_TO_SHIFT; i++)
+    {
+        shifted = size | (size >> shift);
+        if (shifted == size)
+        {
+            break;
+        }
+
+        size = shifted;
+        shift <<= 1;
+    }
+    shifted++;
+
+    return shifted;
+}
+
+inline
+size_t round_down_power2 (size_t size)
+{
+    size_t power2 = round_up_power2 (size);
+
+    if (power2 != size)
+    {
+        power2 >>= 1;
+    }
+
+    return power2;
+}
+
+// the index starts from 0.
+int index_of_set_bit (size_t power2)
+{
+    int low = 0;
+    int high = sizeof (size_t) * 8 - 1;
+    int mid; 
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        size_t temp = (size_t)1 << mid;
+        if (power2 & temp)
+        {
+            return mid;
+        }
+        else if (power2 < temp)
+        {
+            high = mid - 1;
+        }
+        else
+        {
+            low = mid + 1;
+        }
+    }
+
+    return -1;
+}
+
+inline
+int relative_index_power2_plug (size_t power2)
+{
+    int index = index_of_set_bit (power2);
+    assert (index <= MAX_INDEX_POWER2);
+
+    return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2));
+}
+
+inline
+int relative_index_power2_free_space (size_t power2)
+{
+    int index = index_of_set_bit (power2);
+    assert (index <= MAX_INDEX_POWER2);
+
+    return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2));
+}
+
+class seg_free_spaces
+{
+    struct seg_free_space
+    {
+        BOOL is_plug;
+        void* start;
+    };
+
+    struct free_space_bucket
+    {
+        seg_free_space* free_space;
+        ptrdiff_t count_add; // Assigned when we first contruct the array.
+        ptrdiff_t count_fit; // How many items left when we are fitting plugs.
+    };
+
+    void move_bucket (int old_power2, int new_power2)
+    {
+        // PREFAST warning 22015: old_power2 could be negative
+        assert (old_power2 >= 0);
+        assert (old_power2 >= new_power2);
+
+        if (old_power2 == new_power2)
+        {
+            return;
+        }
+
+        seg_free_space* src_index = free_space_buckets[old_power2].free_space;
+        for (int i = old_power2; i > new_power2; i--)
+        {
+            seg_free_space** dest = &(free_space_buckets[i].free_space);
+            (*dest)++;
+
+            seg_free_space* dest_index = free_space_buckets[i - 1].free_space;
+            if (i > (new_power2 + 1))
+            {
+                seg_free_space temp = *src_index;
+                *src_index = *dest_index;
+                *dest_index = temp;
+            }
+            src_index = dest_index;
+        }
+
+        free_space_buckets[old_power2].count_fit--;
+        free_space_buckets[new_power2].count_fit++;
+    }
+
+#ifdef _DEBUG
+
+    void dump_free_space (seg_free_space* item)
+    {
+        uint8_t* addr = 0;
+        size_t len = 0;
+
+        if (item->is_plug)
+        {
+            mark* m = (mark*)(item->start);
+            len = pinned_len (m);
+            addr = pinned_plug (m) - len;
+        }
+        else
+        {
+            heap_segment* seg = (heap_segment*)(item->start);
+            addr = heap_segment_plan_allocated (seg);
+            len = heap_segment_committed (seg) - addr;
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]0x%Ix %Id", heap_num, addr, len));
+    }
+
+    void dump()
+    {
+        seg_free_space* item = NULL;
+        int i = 0;
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num));
+        for (i = 0; i < (free_space_bucket_count - 1); i++)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
+            dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
+            item = free_space_buckets[i].free_space;
+            while (item < free_space_buckets[i + 1].free_space)
+            {
+                dump_free_space (item);
+                item++;
+            }
+            dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
+        dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
+        item = free_space_buckets[i].free_space;
+
+        while (item <= &seg_free_space_array[free_space_item_count - 1])
+        {
+            dump_free_space (item);
+            item++;
+        }
+        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
+    }
+
+#endif //_DEBUG
+
+    free_space_bucket* free_space_buckets;
+    seg_free_space* seg_free_space_array;
+    ptrdiff_t free_space_bucket_count;
+    ptrdiff_t free_space_item_count;
+    int base_power2;
+    int heap_num;
+#ifdef _DEBUG
+    BOOL has_end_of_seg;
+#endif //_DEBUG
+
+public:
+
+    seg_free_spaces (int h_number)
+    {
+        heap_num = h_number;
+    }
+
+    BOOL alloc ()
+    {
+        size_t total_prealloc_size = 
+            MAX_NUM_BUCKETS * sizeof (free_space_bucket) +
+            MAX_NUM_FREE_SPACES * sizeof (seg_free_space);
+
+        free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size];
+
+        return (!!free_space_buckets);
+    }
+
+    // We take the ordered free space array we got from the 1st pass,
+    // and feed the portion that we decided to use to this method, ie,
+    // the largest item_count free spaces.
+    void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count)
+    {
+        assert (free_space_buckets);
+        assert (item_count <= (size_t)MAX_PTR);
+
+        free_space_bucket_count = bucket_count;
+        free_space_item_count = item_count;
+        base_power2 = base;
+#ifdef _DEBUG
+        has_end_of_seg = FALSE;
+#endif //_DEBUG
+
+        ptrdiff_t total_item_count = 0;
+        ptrdiff_t i = 0;
+
+        seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count);
+
+        for (i = 0; i < (ptrdiff_t)item_count; i++)
+        {
+            seg_free_space_array[i].start = 0;
+            seg_free_space_array[i].is_plug = FALSE;
+        }
+
+        for (i = 0; i < bucket_count; i++)
+        {
+            free_space_buckets[i].count_add = ordered_free_spaces[i];
+            free_space_buckets[i].count_fit = ordered_free_spaces[i];
+            free_space_buckets[i].free_space = &seg_free_space_array[total_item_count];
+            total_item_count += free_space_buckets[i].count_add;
+        }
+
+        assert (total_item_count == (ptrdiff_t)item_count);
+    }
+
+    // If we are adding a free space before a plug we pass the
+    // mark stack position so we can update the length; we could
+    // also be adding the free space after the last plug in which
+    // case start is the segment which we'll need to update the 
+    // heap_segment_plan_allocated.
+    void add (void* start, BOOL plug_p, BOOL first_p)
+    {
+        size_t size = (plug_p ? 
+                       pinned_len ((mark*)start) : 
+                       (heap_segment_committed ((heap_segment*)start) - 
+                           heap_segment_plan_allocated ((heap_segment*)start)));
+        
+        if (plug_p)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %Id", heap_num, size));
+        }
+        else
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %Id", heap_num, size));
+#ifdef _DEBUG
+            has_end_of_seg = TRUE;
+#endif //_DEBUG
+        }
+                  
+        if (first_p)
+        {
+            size_t eph_gen_starts = gc_heap::eph_gen_starts_size;
+            size -= eph_gen_starts;
+            if (plug_p)
+            {
+                mark* m = (mark*)(start);
+                pinned_len (m) -= eph_gen_starts;
+            }
+            else
+            {
+                heap_segment* seg = (heap_segment*)start;
+                heap_segment_plan_allocated (seg) += eph_gen_starts;
+            }
+        }
+
+        int bucket_power2 = index_of_set_bit (round_down_power2 (size));
+        if (bucket_power2 < base_power2)
+        {
+            return;
+        }
+
+        free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2];
+
+        seg_free_space* bucket_free_space = bucket->free_space;
+        assert (plug_p || (!plug_p && bucket->count_add));
+
+        if (bucket->count_add == 0)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2));
+            return;
+        }
+
+        ptrdiff_t index = bucket->count_add - 1;
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %Ix; len: %Id (2^%d)", 
+                    heap_num, 
+                    (plug_p ? 
+                        (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) : 
+                        heap_segment_plan_allocated ((heap_segment*)start)),
+                    size,
+                    bucket_power2));
+
+        if (plug_p)
+        {
+            bucket_free_space[index].is_plug = TRUE;
+        }
+
+        bucket_free_space[index].start = start;
+        bucket->count_add--;
+    }
+
+#ifdef _DEBUG
+
+    // Do a consistency check after all free spaces are added.
+    void check()
+    {
+        ptrdiff_t i = 0;
+        int end_of_seg_count = 0;
+
+        for (i = 0; i < free_space_item_count; i++)
+        {
+            assert (seg_free_space_array[i].start);
+            if (!(seg_free_space_array[i].is_plug))
+            {
+                end_of_seg_count++;
+            }
+        }
+        
+        if (has_end_of_seg)
+        {
+            assert (end_of_seg_count == 1);
+        }
+        else
+        {
+            assert (end_of_seg_count == 0);
+        }
+
+        for (i = 0; i < free_space_bucket_count; i++)
+        {
+            assert (free_space_buckets[i].count_add == 0);
+        }
+    }
+
+#endif //_DEBUG
+
+    uint8_t* fit (uint8_t* old_loc,
+#ifdef SHORT_PLUGS
+               BOOL set_padding_on_saved_p,
+               mark* pinned_plug_entry,
+#endif //SHORT_PLUGS
+               size_t plug_size
+               REQD_ALIGN_AND_OFFSET_DCL)
+    {
+        if (old_loc)
+        {
+#ifdef SHORT_PLUGS
+            assert (!is_plug_padded (old_loc));
+#endif //SHORT_PLUGS
+            assert (!node_realigned (old_loc));
+        }
+
+        size_t saved_plug_size = plug_size;
+
+#ifdef FEATURE_STRUCTALIGN
+        // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account
+        _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false);
+#endif // FEATURE_STRUCTALIGN
+        // TODO: this is also not large alignment ready. We would need to consider alignment when chosing the 
+        // the bucket.
+
+        size_t plug_size_to_fit = plug_size;
+
+        int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+
+#ifdef SHORT_PLUGS
+        plug_size_to_fit += (pad_in_front ? Align(min_obj_size) : 0);
+#endif //SHORT_PLUGS
+
+        int plug_power2 = index_of_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
+        ptrdiff_t i;
+        uint8_t* new_address = 0;
+
+        if (plug_power2 < base_power2)
+        {
+            plug_power2 = base_power2;
+        }
+
+        int chosen_power2 = plug_power2 - base_power2;
+retry:
+        for (i = chosen_power2; i < free_space_bucket_count; i++)
+        {
+            if (free_space_buckets[i].count_fit != 0)
+            {
+                break;
+            }
+            chosen_power2++;
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %Id (2^%d) using 2^%d free space", 
+            heap_num, 
+            plug_size, 
+            plug_power2, 
+            (chosen_power2 + base_power2)));
+
+        assert (i < free_space_bucket_count);
+        
+        seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space;
+        ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit;
+        size_t new_free_space_size = 0;
+        BOOL can_fit = FALSE;
+        size_t pad = 0;
+
+        for (i = 0; i < free_space_count; i++)
+        {
+            size_t free_space_size = 0;
+            pad = 0;
+#ifdef SHORT_PLUGS
+            BOOL short_plugs_padding_p = FALSE;
+#endif //SHORT_PLUGS
+            BOOL realign_padding_p = FALSE;
+
+            if (bucket_free_space[i].is_plug)
+            {
+                mark* m = (mark*)(bucket_free_space[i].start);
+                uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m);
+                
+#ifdef SHORT_PLUGS
+                if ((pad_in_front & USE_PADDING_FRONT) &&
+                    (((plug_free_space_start - pin_allocation_context_start_region (m))==0) ||
+                    ((plug_free_space_start - pin_allocation_context_start_region (m))>=DESIRED_PLUG_LENGTH)))
+                {
+                    pad = Align (min_obj_size);
+                    short_plugs_padding_p = TRUE;
+                }
+#endif //SHORT_PLUGS
+
+                if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start+pad)))
+                {
+                    pad += switch_alignment_size (pad != 0);
+                    realign_padding_p = TRUE;
+                }
+
+                plug_size = saved_plug_size + pad;
+
+                free_space_size = pinned_len (m);
+                new_address = pinned_plug (m) - pinned_len (m);
+
+                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
+                    free_space_size == plug_size)
+                {
+                    new_free_space_size = free_space_size - plug_size;
+                    pinned_len (m) = new_free_space_size;
+#ifdef SIMPLE_DPRINTF
+                    dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%Ix->0x%Ix(%Ix)(%Ix), [0x%Ix (2^%d) -> [0x%Ix (2^%d)",
+                                heap_num, 
+                                old_loc,
+                                new_address, 
+                                (plug_size - pad),
+                                pad,
+                                pinned_plug (m), 
+                                index_of_set_bit (round_down_power2 (free_space_size)),
+                                (pinned_plug (m) - pinned_len (m)), 
+                                index_of_set_bit (round_down_power2 (new_free_space_size))));
+#endif //SIMPLE_DPRINTF
+
+#ifdef SHORT_PLUGS
+                    if (short_plugs_padding_p)
+                    {
+                        pin_allocation_context_start_region (m) = plug_free_space_start;
+                        set_padding_in_expand (old_loc, set_padding_on_saved_p, pinned_plug_entry);
+                    }
+#endif //SHORT_PLUGS
+
+                    if (realign_padding_p)
+                    {
+                        set_node_realigned (old_loc);
+                    }
+
+                    can_fit = TRUE;
+                }
+            }
+            else
+            {
+                heap_segment* seg = (heap_segment*)(bucket_free_space[i].start);
+                free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg);
+
+                if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg))))
+                {
+                    pad = switch_alignment_size (FALSE);
+                    realign_padding_p = TRUE;
+                }
+
+                plug_size = saved_plug_size + pad;
+
+                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
+                    free_space_size == plug_size)
+                {
+                    new_address = heap_segment_plan_allocated (seg);
+                    new_free_space_size = free_space_size - plug_size;
+                    heap_segment_plan_allocated (seg) = new_address + plug_size;
+#ifdef SIMPLE_DPRINTF
+                    dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%Ix-> 0x%Ix(%Ix) (2^%d) -> 0x%Ix (2^%d)",
+                                heap_num, 
+                                old_loc,
+                                new_address, 
+                                (plug_size - pad),
+                                index_of_set_bit (round_down_power2 (free_space_size)),
+                                heap_segment_plan_allocated (seg), 
+                                index_of_set_bit (round_down_power2 (new_free_space_size))));
+#endif //SIMPLE_DPRINTF
+
+                    if (realign_padding_p)
+                        set_node_realigned (old_loc);
+
+                    can_fit = TRUE;
+                }
+            }
+
+            if (can_fit)
+            {
+                break;
+            }
+        }
+
+        if (!can_fit)
+        {
+            assert (chosen_power2 == 0);
+            chosen_power2 = 1;
+            goto retry;
+        }
+        else
+        {
+            if (pad)
+            {
+                new_address += pad;
+            }
+            assert ((chosen_power2 && (i == 0)) ||
+                    (!chosen_power2) && (i < free_space_count));
+        }
+
+        int new_bucket_power2 = index_of_set_bit (round_down_power2 (new_free_space_size));
+
+        if (new_bucket_power2 < base_power2)
+        {
+            new_bucket_power2 = base_power2;
+        }
+
+        move_bucket (chosen_power2, new_bucket_power2 - base_power2);
+
+        //dump();
+
+        return new_address;
+    }
+
+    void cleanup ()
+    {
+        if (free_space_buckets)
+        {
+            delete [] free_space_buckets;
+        }
+        if (seg_free_space_array)
+        {
+            delete [] seg_free_space_array;
+        }
+    }
+};
+
+
+#define marked(i) header(i)->IsMarked()
+#define set_marked(i) header(i)->SetMarked()
+#define clear_marked(i) header(i)->ClearMarked()
+#define pinned(i) header(i)->IsPinned()
+#define set_pinned(i) header(i)->SetPinned()
+#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit();
+
+inline size_t my_get_size (Object* ob)
+{
+    MethodTable* mT = header(ob)->GetMethodTable();
+    return (mT->GetBaseSize() +
+            (mT->HasComponentSize() ?
+             ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0));
+}
+
+//#define size(i) header(i)->GetSize()
+#define size(i) my_get_size (header(i))
+
+#define contain_pointers(i) header(i)->ContainsPointers()
+#ifdef COLLECTIBLE_CLASS
+#define contain_pointers_or_collectible(i) header(i)->ContainsPointersOrCollectible()
+
+#define get_class_object(i) method_table(i)->GetLoaderAllocatorObjectForGC()
+#define is_collectible(i) method_table(i)->Collectible()
+#else //COLLECTIBLE_CLASS
+#define contain_pointers_or_collectible(i) header(i)->ContainsPointers()
+#endif //COLLECTIBLE_CLASS
+
+#if defined (MARK_ARRAY) && defined (BACKGROUND_GC)
+inline
+void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg)
+{
+    uint8_t* range_beg = 0;
+    uint8_t* range_end = 0;
+    if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end))
+    {
+        clear_mark_array (range_beg, align_on_mark_word (range_end), FALSE
+#ifdef FEATURE_BASICFREEZE
+            , TRUE
+#endif // FEATURE_BASICFREEZE
+            );
+    }
+}
+
+void gc_heap::clear_batch_mark_array_bits (uint8_t* start, uint8_t* end)
+{
+    if ((start < background_saved_highest_address) &&
+        (end > background_saved_lowest_address))
+    {
+        start = max (start, background_saved_lowest_address);
+        end = min (end, background_saved_highest_address);
+
+        size_t start_mark_bit = mark_bit_of (start);
+        size_t end_mark_bit = mark_bit_of (end);
+        unsigned int startbit = mark_bit_bit (start_mark_bit);
+        unsigned int endbit = mark_bit_bit (end_mark_bit);
+        size_t startwrd = mark_bit_word (start_mark_bit);
+        size_t endwrd = mark_bit_word (end_mark_bit);
+
+        dprintf (3, ("Clearing all mark array bits between [%Ix:%Ix-[%Ix:%Ix", 
+            (size_t)start, (size_t)start_mark_bit, 
+            (size_t)end, (size_t)end_mark_bit));
+
+        unsigned int firstwrd = lowbits (~0, startbit);
+        unsigned int lastwrd = highbits (~0, endbit);
+
+        if (startwrd == endwrd)
+        {
+            unsigned int wrd = firstwrd | lastwrd;
+            mark_array[startwrd] &= wrd;
+            return;
+        }
+
+        // clear the first mark word.
+        if (startbit)
+        {
+            mark_array[startwrd] &= firstwrd;
+            startwrd++;
+        }
+
+        for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
+        {
+            mark_array[wrdtmp] = 0;
+        }
+
+        // clear the last mark word.
+        if (endbit)
+        {
+            mark_array[endwrd] &= lastwrd;
+        }
+    }
+}
+
+void gc_heap::bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end)
+{
+    if ((start < background_saved_highest_address) &&
+        (end > background_saved_lowest_address))
+    {
+        start = max (start, background_saved_lowest_address);
+        end = min (end, background_saved_highest_address);
+
+        clear_batch_mark_array_bits (start, end);
+    }
+}
+
+void gc_heap::clear_mark_array_by_objects (uint8_t* from, uint8_t* end, BOOL loh_p)
+{
+    dprintf (3, ("clearing mark array bits by objects for addr [%Ix,[%Ix", 
+                  from, end));
+    int align_const = get_alignment_constant (!loh_p);
+
+    uint8_t* o = from;
+
+    while (o < end)
+    {
+        uint8_t*  next_o = o + Align (size (o), align_const);
+
+        if (background_object_marked (o, TRUE))
+        {
+            dprintf (3, ("%Ix was marked by bgc, is now cleared", o));
+        }
+
+        o = next_o;
+    }
+}
+#endif //MARK_ARRAY && BACKGROUND_GC
+
+inline
+BOOL gc_heap::is_mark_set (uint8_t* o)
+{
+    return marked (o);
+}
+
+#if defined (_MSC_VER) && defined (_TARGET_X86_)
+#pragma optimize("y", on)        // Small critical routines, don't put in EBP frame 
+#endif //_MSC_VER && _TARGET_X86_
+
+// return the generation number of an object.
+// It is assumed that the object is valid.
+//Note that this will return max_generation for a LOH object
+int gc_heap::object_gennum (uint8_t* o)
+{
+    if (in_range_for_segment (o, ephemeral_heap_segment) &&
+        (o >= generation_allocation_start (generation_of (max_generation-1))))
+    {
+        // in an ephemeral generation.
+        for ( int i = 0; i < max_generation-1; i++)
+        {
+            if ((o >= generation_allocation_start (generation_of (i))))
+                return i;
+        }
+        return max_generation-1;
+    }
+    else
+    {
+        return max_generation;
+    }
+}
+
+int gc_heap::object_gennum_plan (uint8_t* o)
+{
+    if (in_range_for_segment (o, ephemeral_heap_segment))
+    {
+        for (int i = 0; i <= max_generation-1; i++)
+        {
+            uint8_t* plan_start = generation_plan_allocation_start (generation_of (i));
+            if (plan_start && (o >= plan_start))
+            {
+                return i;
+            }
+        }
+    }
+    return max_generation;
+}
+
+#if defined(_MSC_VER) && defined(_TARGET_X86_)
+#pragma optimize("", on)        // Go back to command line default optimizations
+#endif //_MSC_VER && _TARGET_X86_
+
+heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, int h_number)
+{
+    size_t initial_commit = SEGMENT_INITIAL_COMMIT;
+
+    //Commit the first page
+    if (!virtual_alloc_commit_for_heap (new_pages, initial_commit, h_number))
+    {
+        return 0;
+    }
+
+    //overlay the heap_segment
+    heap_segment* new_segment = (heap_segment*)new_pages;
+
+    uint8_t* start = 0;
+#ifdef BACKGROUND_GC
+    //leave the first page to contain only segment info
+    //because otherwise we could need to revisit the first page frequently in 
+    // background GC.
+    start = new_pages + OS_PAGE_SIZE;
+#else
+    start = new_pages +
+        Align (sizeof (heap_segment), get_alignment_constant (FALSE));
+#endif //BACKGROUND_GC
+    heap_segment_mem (new_segment) = start;
+    heap_segment_used (new_segment) = start;
+    heap_segment_reserved (new_segment) = new_pages + size;
+    heap_segment_committed (new_segment) = new_pages + initial_commit;
+    init_heap_segment (new_segment);
+    dprintf (2, ("Creating heap segment %Ix", (size_t)new_segment));
+    return new_segment;
+}
+
+void gc_heap::init_heap_segment (heap_segment* seg)
+{
+    seg->flags = 0;
+    heap_segment_next (seg) = 0;
+    heap_segment_plan_allocated (seg) = heap_segment_mem (seg);
+    heap_segment_allocated (seg) = heap_segment_mem (seg);
+#ifdef BACKGROUND_GC
+    heap_segment_background_allocated (seg) = 0;
+    heap_segment_saved_bg_allocated (seg) = 0;
+#endif //BACKGROUND_GC
+}
+
+//Releases the segment to the OS.
+// this is always called on one thread only so calling seg_table->remove is fine.
+void gc_heap::delete_heap_segment (heap_segment* seg, BOOL consider_hoarding)
+{
+    if (!heap_segment_loh_p (seg))
+    {
+        //cleanup the brick table back to the empty value
+        clear_brick_table (heap_segment_mem (seg), heap_segment_reserved (seg));
+    }
+
+    if (consider_hoarding)
+    {
+        assert ((heap_segment_mem (seg) - (uint8_t*)seg) <= 2*OS_PAGE_SIZE);
+        size_t ss = (size_t) (heap_segment_reserved (seg) - (uint8_t*)seg);
+        //Don't keep the big ones.
+        if (ss <= INITIAL_ALLOC)
+        {
+            dprintf (2, ("Hoarding segment %Ix", (size_t)seg));
+#ifdef BACKGROUND_GC
+            // We don't need to clear the decommitted flag because when this segment is used
+            // for a new segment the flags will be cleared.
+            if (!heap_segment_decommitted_p (seg))
+#endif //BACKGROUND_GC
+            {
+                decommit_heap_segment (seg);
+            }
+
+#ifdef SEG_MAPPING_TABLE
+            seg_mapping_table_remove_segment (seg);
+#endif //SEG_MAPPING_TABLE
+
+            heap_segment_next (seg) = segment_standby_list;
+            segment_standby_list = seg;
+            seg = 0;
+        }
+    }
+
+    if (seg != 0)
+    {
+        dprintf (2, ("h%d: del seg: [%Ix, %Ix[", 
+                     heap_number, (size_t)seg,
+                     (size_t)(heap_segment_reserved (seg))));
+
+#ifdef BACKGROUND_GC
+        ::record_changed_seg ((uint8_t*)seg, heap_segment_reserved (seg), 
+                            settings.gc_index, current_bgc_state,
+                            seg_deleted);
+        decommit_mark_array_by_seg (seg);
+#endif //BACKGROUND_GC
+
+#ifdef SEG_MAPPING_TABLE
+        seg_mapping_table_remove_segment (seg);
+#else //SEG_MAPPING_TABLE
+        seg_table->remove ((uint8_t*)seg);
+#endif //SEG_MAPPING_TABLE
+
+        release_segment (seg);
+    }
+}
+
+//resets the pages beyond alloctes size so they won't be swapped out and back in
+
+void gc_heap::reset_heap_segment_pages (heap_segment* seg)
+{
+#ifndef FEATURE_PAL // No MEM_RESET support in PAL VirtualAlloc
+    size_t page_start = align_on_page ((size_t)heap_segment_allocated (seg));
+    size_t size = (size_t)heap_segment_committed (seg) - page_start;
+    if (size != 0)
+        GCToOSInterface::VirtualReset((void*)page_start, size, false /* unlock */);
+#endif //!FEATURE_PAL
+}
+
+void gc_heap::decommit_heap_segment_pages (heap_segment* seg,
+                                           size_t extra_space)
+{
+    uint8_t*  page_start = align_on_page (heap_segment_allocated(seg));
+    size_t size = heap_segment_committed (seg) - page_start;
+    extra_space = align_on_page (extra_space);
+    if (size >= max ((extra_space + 2*OS_PAGE_SIZE), 100*OS_PAGE_SIZE))
+    {
+        page_start += max(extra_space, 32*OS_PAGE_SIZE);
+        size -= max (extra_space, 32*OS_PAGE_SIZE);
+
+        GCToOSInterface::VirtualDecommit (page_start, size);
+        dprintf (3, ("Decommitting heap segment [%Ix, %Ix[(%d)", 
+            (size_t)page_start, 
+            (size_t)(page_start + size),
+            size));
+        heap_segment_committed (seg) = page_start;
+        if (heap_segment_used (seg) > heap_segment_committed (seg))
+        {
+            heap_segment_used (seg) = heap_segment_committed (seg);
+        }
+    }
+}
+
+//decommit all pages except one or 2
+void gc_heap::decommit_heap_segment (heap_segment* seg)
+{
+    uint8_t*  page_start = align_on_page (heap_segment_mem (seg));
+
+    dprintf (3, ("Decommitting heap segment %Ix", (size_t)seg));
+
+#ifdef BACKGROUND_GC
+    page_start += OS_PAGE_SIZE;
+#endif //BACKGROUND_GC
+
+    size_t size = heap_segment_committed (seg) - page_start;
+    GCToOSInterface::VirtualDecommit (page_start, size);
+
+    //re-init the segment object
+    heap_segment_committed (seg) = page_start;
+    if (heap_segment_used (seg) > heap_segment_committed (seg))
+    {
+        heap_segment_used (seg) = heap_segment_committed (seg);
+    }
+}
+
+void gc_heap::clear_gen0_bricks()
+{
+    if (!gen0_bricks_cleared)
+    {
+        gen0_bricks_cleared = TRUE;
+        //initialize brick table for gen 0
+        for (size_t b = brick_of (generation_allocation_start (generation_of (0)));
+                b < brick_of (align_on_brick
+                            (heap_segment_allocated (ephemeral_heap_segment)));
+                b++)
+        {
+            set_brick (b, -1);
+        }
+    }
+}
+
+#ifdef BACKGROUND_GC
+void gc_heap::rearrange_small_heap_segments()
+{
+    heap_segment* seg = freeable_small_heap_segment;
+    while (seg)
+    {
+        heap_segment* next_seg = heap_segment_next (seg);
+        // TODO: we need to consider hoarding here.
+        delete_heap_segment (seg, FALSE);
+        seg = next_seg;
+    }
+    freeable_small_heap_segment = 0;
+}
+#endif //BACKGROUND_GC
+
+void gc_heap::rearrange_large_heap_segments()
+{
+    dprintf (2, ("deleting empty large segments"));
+    heap_segment* seg = freeable_large_heap_segment;
+    while (seg)
+    {
+        heap_segment* next_seg = heap_segment_next (seg);
+        delete_heap_segment (seg, (g_pConfig->GetGCRetainVM() != 0));
+        seg = next_seg;
+    }
+    freeable_large_heap_segment = 0;
+}
+
+void gc_heap::rearrange_heap_segments(BOOL compacting)
+{
+    heap_segment* seg =
+        generation_start_segment (generation_of (max_generation));
+
+    heap_segment* prev_seg = 0;
+    heap_segment* next_seg = 0;
+    while (seg)
+    {
+        next_seg = heap_segment_next (seg);
+
+        //link ephemeral segment when expanding
+        if ((next_seg == 0) && (seg != ephemeral_heap_segment))
+        {
+            seg->next = ephemeral_heap_segment;
+            next_seg = heap_segment_next (seg);
+        }
+
+        //re-used expanded heap segment
+        if ((seg == ephemeral_heap_segment) && next_seg)
+        {
+            heap_segment_next (prev_seg) = next_seg;
+            heap_segment_next (seg) = 0;
+        }
+        else
+        {
+            uint8_t* end_segment = (compacting ?
+                                 heap_segment_plan_allocated (seg) : 
+                                 heap_segment_allocated (seg));
+            // check if the segment was reached by allocation
+            if ((end_segment == heap_segment_mem (seg))&&
+                !heap_segment_read_only_p (seg))
+            {
+                //if not, unthread and delete
+                assert (prev_seg);
+                assert (seg != ephemeral_heap_segment);
+                heap_segment_next (prev_seg) = next_seg;
+                delete_heap_segment (seg, (g_pConfig->GetGCRetainVM() != 0));
+
+                dprintf (2, ("Deleting heap segment %Ix", (size_t)seg));
+            }
+            else
+            {
+                if (!heap_segment_read_only_p (seg))
+                {
+                    if (compacting)
+                    {
+                        heap_segment_allocated (seg) =
+                            heap_segment_plan_allocated (seg);
+                    }
+
+                    // reset the pages between allocated and committed.
+                    if (seg != ephemeral_heap_segment)
+                    {
+                        decommit_heap_segment_pages (seg, 0);
+                    }
+                }
+                prev_seg = seg;
+            }
+        }
+
+        seg = next_seg;
+    }
+}
+
+
+#ifdef WRITE_WATCH
+
+uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch
+
+#ifdef TIME_WRITE_WATCH
+static unsigned int tot_cycles = 0;
+#endif //TIME_WRITE_WATCH
+
+#ifdef CARD_BUNDLE
+
+void gc_heap::update_card_table_bundle()
+{
+    if (card_bundles_enabled())
+    {
+        uint8_t* base_address = (uint8_t*)(&card_table[card_word (card_of (lowest_address))]);
+        uint8_t* saved_base_address = base_address;
+        uintptr_t bcount = array_size;
+        uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (highest_address))]);
+        size_t saved_region_size = align_on_page (high_address) - saved_base_address;
+
+        do
+        {
+            size_t region_size = align_on_page (high_address) - base_address;
+            dprintf (3,("Probing card table pages [%Ix, %Ix[", (size_t)base_address, (size_t)base_address+region_size));
+            bool success = GCToOSInterface::GetWriteWatch (false /* resetState */ , base_address, region_size,
+                                                           (void**)g_addresses,
+                                                           &bcount);
+            assert (success);
+            dprintf (3,("Found %d pages written", bcount));
+            for (unsigned  i = 0; i < bcount; i++)
+            {
+                size_t bcardw = (uint32_t*)(max(g_addresses[i],base_address)) - &card_table[0];
+                size_t ecardw = (uint32_t*)(min(g_addresses[i]+OS_PAGE_SIZE, high_address)) - &card_table[0];
+                assert (bcardw >= card_word (card_of (g_lowest_address)));
+
+                card_bundles_set (cardw_card_bundle (bcardw),
+                                  cardw_card_bundle (align_cardw_on_bundle (ecardw)));
+
+                dprintf (3,("Set Card bundle [%Ix, %Ix[",
+                            cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw))));
+
+#ifdef _DEBUG
+                for (size_t x = cardw_card_bundle (bcardw); x < cardw_card_bundle (ecardw); x++)
+                {
+                    if (!card_bundle_set_p (x))
+                    {
+                        assert (!"Card bundle not set");
+                        dprintf (3, ("Card bundle %Ix not set", x));
+                    }
+                }
+#endif //_DEBUG
+
+            }
+            if (bcount >= array_size){
+                base_address = g_addresses [array_size-1] + OS_PAGE_SIZE;
+                bcount = array_size;
+            }
+        } while ((bcount >= array_size) && (base_address < high_address));
+
+        GCToOSInterface::ResetWriteWatch (saved_base_address, saved_region_size);
+
+#ifdef _DEBUG
+
+        size_t lowest_card = card_word (card_of (lowest_address));
+        size_t highest_card = card_word (card_of (highest_address));
+        size_t cardb = cardw_card_bundle (lowest_card);
+        size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (highest_card));
+
+        //find a non null bundle
+        while (cardb < end_cardb)
+        {
+            if (card_bundle_set_p (cardb)==0)
+            {
+                //verify that the cards are indeed empty
+                uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)];
+                uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)];
+                while (card_word < card_word_end)
+                {
+                    if ((*card_word) != 0)
+                    {
+                        dprintf  (3, ("gc: %d, Card word %Ix for address %Ix set, card_bundle %Ix clear",
+                                dd_collection_count (dynamic_data_of (0)), 
+                                (size_t)(card_word-&card_table[0]),
+                                (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)), cardb));
+                    }
+                    assert((*card_word)==0);
+                    card_word++;
+                }
+            }
+            //end of verification
+            cardb++;
+        }
+#endif //_DEBUG
+    }
+}
+#endif //CARD_BUNDLE
+
+// static
+void gc_heap::reset_write_watch_for_gc_heap(void* base_address, size_t region_size)
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    SoftwareWriteWatch::ClearDirty(base_address, region_size);
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    GCToOSInterface::ResetWriteWatch(base_address, region_size);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
+// static
+void gc_heap::get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended)
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    SoftwareWriteWatch::GetDirty(base_address, region_size, dirty_pages, dirty_page_count_ref, reset, is_runtime_suspended);
+#else // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    UNREFERENCED_PARAMETER(is_runtime_suspended);
+    bool success = GCToOSInterface::GetWriteWatch(reset, base_address, region_size, dirty_pages, dirty_page_count_ref);
+    assert(success);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+}
+
+const size_t ww_reset_quantum = 128*1024*1024;
+
+inline
+void gc_heap::switch_one_quantum()
+{
+    Thread* current_thread = GetThread();
+    enable_preemptive (current_thread);
+    GCToOSInterface::Sleep (1);
+    disable_preemptive (current_thread, TRUE);
+}
+
+void gc_heap::reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size)
+{
+    size_t reset_size = 0;
+    size_t remaining_reset_size = 0;
+    size_t next_reset_size = 0;
+
+    while (reset_size != total_reset_size)
+    {
+        remaining_reset_size = total_reset_size - reset_size;
+        next_reset_size = ((remaining_reset_size >= ww_reset_quantum) ? ww_reset_quantum : remaining_reset_size);
+        if (next_reset_size)
+        {
+            reset_write_watch_for_gc_heap(start_address, next_reset_size);
+            reset_size += next_reset_size;
+
+            switch_one_quantum();
+        }
+    }
+
+    assert (reset_size == total_reset_size);
+}
+
+// This does a Sleep(1) for every reset ww_reset_quantum bytes of reset 
+// we do concurrently.
+void gc_heap::switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_size, size_t last_reset_size)
+{
+    if (concurrent_p)
+    {
+        *current_total_reset_size += last_reset_size;
+
+        dprintf (2, ("reset %Id bytes so far", *current_total_reset_size));
+
+        if (*current_total_reset_size > ww_reset_quantum)
+        {
+            switch_one_quantum();
+
+            *current_total_reset_size = 0;
+        }
+    }
+}
+
+void gc_heap::reset_write_watch (BOOL concurrent_p)
+{
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    // Software write watch currently requires the runtime to be suspended during reset. See SoftwareWriteWatch::ClearDirty().
+    assert(!concurrent_p);
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    size_t reset_size = 0;
+    size_t region_size = 0;
+
+    dprintf (2, ("bgc lowest: %Ix, bgc highest: %Ix", background_saved_lowest_address, background_saved_highest_address));
+
+    while (seg)
+    {
+        uint8_t* base_address = align_lower_page (heap_segment_mem (seg));
+        base_address = max (base_address, background_saved_lowest_address);
+
+        uint8_t* high_address = 0;
+        high_address = ((seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg));
+        high_address = min (high_address, background_saved_highest_address);
+        
+        if (base_address < high_address)
+        {
+            region_size = high_address - base_address;
+
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_start = GetCycleCount32();
+#endif //TIME_WRITE_WATCH
+            dprintf (3, ("h%d: soh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size));
+            //reset_ww_by_chunk (base_address, region_size);
+            reset_write_watch_for_gc_heap(base_address, region_size);
+
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_stop = GetCycleCount32();
+            tot_cycles += time_stop - time_start;
+            printf ("ResetWriteWatch Duration: %d, total: %d\n",
+                    time_stop - time_start, tot_cycles);
+#endif //TIME_WRITE_WATCH
+
+            switch_on_reset (concurrent_p, &reset_size, region_size);
+        }
+
+        seg = heap_segment_next_rw (seg);
+
+        concurrent_print_time_delta ("CRWW soh");
+    }
+
+    //concurrent_print_time_delta ("CRW soh");
+
+    seg = heap_segment_rw (generation_start_segment (large_object_generation));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    while (seg)
+    {
+        uint8_t* base_address = align_lower_page (heap_segment_mem (seg));
+        uint8_t* high_address =  heap_segment_allocated (seg);
+
+        base_address = max (base_address, background_saved_lowest_address);
+        high_address = min (high_address, background_saved_highest_address);
+
+        if (base_address < high_address)
+        {
+            region_size = high_address - base_address;
+            
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_start = GetCycleCount32();
+#endif //TIME_WRITE_WATCH
+            dprintf (3, ("h%d: loh ww: [%Ix(%Id)", heap_number, (size_t)base_address, region_size));
+            //reset_ww_by_chunk (base_address, region_size);
+            reset_write_watch_for_gc_heap(base_address, region_size);
+
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_stop = GetCycleCount32();
+            tot_cycles += time_stop - time_start;
+            printf ("ResetWriteWatch Duration: %d, total: %d\n",
+                    time_stop - time_start, tot_cycles);
+#endif //TIME_WRITE_WATCH
+    
+            switch_on_reset (concurrent_p, &reset_size, region_size);
+        }
+
+        seg = heap_segment_next_rw (seg);
+
+        concurrent_print_time_delta ("CRWW loh");
+    }
+
+#ifdef DEBUG_WRITE_WATCH
+    debug_write_watch = (uint8_t**)~0;
+#endif //DEBUG_WRITE_WATCH
+}
+
+#endif //WRITE_WATCH
+
+#ifdef BACKGROUND_GC
+void gc_heap::restart_vm()
+{
+    //assert (generation_allocation_pointer (youngest_generation) == 0);
+    dprintf (3, ("Restarting EE"));
+    STRESS_LOG0(LF_GC, LL_INFO10000, "Concurrent GC: Retarting EE\n");
+    ee_proceed_event.Set();
+}
+
+inline
+void fire_alloc_wait_event (alloc_wait_reason awr, BOOL begin_p)
+{
+    if (awr != awr_ignored)
+    {
+        if (begin_p)
+        {
+            FireEtwBGCAllocWaitBegin (awr, GetClrInstanceId());
+        }
+        else
+        {
+            FireEtwBGCAllocWaitEnd (awr, GetClrInstanceId());
+        }
+    }
+}
+
+
+void gc_heap::fire_alloc_wait_event_begin (alloc_wait_reason awr)
+{
+    fire_alloc_wait_event (awr, TRUE);
+}
+
+
+void gc_heap::fire_alloc_wait_event_end (alloc_wait_reason awr)
+{
+    fire_alloc_wait_event (awr, FALSE);
+}
+#endif //BACKGROUND_GC
+void gc_heap::make_generation (generation& gen, heap_segment* seg, uint8_t* start, uint8_t* pointer)
+{
+    gen.allocation_start = start;
+    gen.allocation_context.alloc_ptr = pointer;
+    gen.allocation_context.alloc_limit = pointer;
+    gen.allocation_context.alloc_bytes = 0;
+    gen.allocation_context.alloc_bytes_loh = 0;
+    gen.allocation_context_start_region = pointer;
+    gen.start_segment = seg;
+    gen.allocation_segment = seg;
+    gen.plan_allocation_start = 0;
+    gen.free_list_space = 0;
+    gen.pinned_allocated = 0; 
+    gen.free_list_allocated = 0; 
+    gen.end_seg_allocated = 0;
+    gen.condemned_allocated = 0; 
+    gen.free_obj_space = 0;
+    gen.allocation_size = 0;
+    gen.pinned_allocation_sweep_size = 0;
+    gen.pinned_allocation_compact_size = 0;
+    gen.allocate_end_seg_p = FALSE;
+    gen.free_list_allocator.clear();
+
+#ifdef FREE_USAGE_STATS
+    memset (gen.gen_free_spaces, 0, sizeof (gen.gen_free_spaces));
+    memset (gen.gen_current_pinned_free_spaces, 0, sizeof (gen.gen_current_pinned_free_spaces));
+    memset (gen.gen_plugs, 0, sizeof (gen.gen_plugs));
+#endif //FREE_USAGE_STATS
+}
+
+void gc_heap::adjust_ephemeral_limits (bool is_runtime_suspended)
+{
+    ephemeral_low = generation_allocation_start (generation_of (max_generation - 1));
+    ephemeral_high = heap_segment_reserved (ephemeral_heap_segment);
+
+    dprintf (3, ("new ephemeral low: %Ix new ephemeral high: %Ix",
+                 (size_t)ephemeral_low, (size_t)ephemeral_high))
+
+    // This updates the write barrier helpers with the new info.
+    StompWriteBarrierEphemeral(is_runtime_suspended);
+}
+
+#if defined(TRACE_GC) || defined(GC_CONFIG_DRIVEN)
+FILE* CreateLogFile(const CLRConfig::ConfigStringInfo & info, BOOL is_config)
+{
+    FILE* logFile;
+    TCHAR * temp_logfile_name = NULL;
+    CLRConfig::GetConfigValue(info, &temp_logfile_name);
+
+    TCHAR logfile_name[MAX_LONGPATH+1];
+    if (temp_logfile_name != 0)
+    {
+        _tcscpy(logfile_name, temp_logfile_name);
+    }
+
+    size_t logfile_name_len = _tcslen(logfile_name);
+    TCHAR* szPid = logfile_name + logfile_name_len;
+    size_t remaining_space = MAX_LONGPATH + 1 - logfile_name_len;
+
+    _stprintf_s(szPid, remaining_space, _T(".%d%s"), GCToOSInterface::GetCurrentProcessId(), (is_config ? _T(".config.log") : _T(".log")));
+
+    logFile = _tfopen(logfile_name, _T("wb"));
+
+    delete temp_logfile_name;
+
+    return logFile;
+}
+#endif //TRACE_GC || GC_CONFIG_DRIVEN
+
+HRESULT gc_heap::initialize_gc (size_t segment_size,
+                                size_t heap_size
+#ifdef MULTIPLE_HEAPS
+                                ,unsigned number_of_heaps
+#endif //MULTIPLE_HEAPS
+)
+{
+#ifdef TRACE_GC
+    int log_last_gcs = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCLogEnabled);
+    if (log_last_gcs)
+    {
+        gc_log = CreateLogFile(CLRConfig::UNSUPPORTED_GCLogFile, FALSE);
+
+        if (gc_log == NULL)
+            return E_FAIL;
+
+        // GCLogFileSize in MBs.
+        gc_log_file_size = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCLogFileSize);
+
+        if (gc_log_file_size > 500)
+        {
+            fclose (gc_log);
+            return E_FAIL;
+        }
+
+        gc_log_lock.Initialize();
+        gc_log_buffer = new (nothrow) uint8_t [gc_log_buffer_size];
+        if (!gc_log_buffer)
+        {
+            fclose(gc_log);
+            return E_FAIL;
+        }
+
+        memset (gc_log_buffer, '*', gc_log_buffer_size);
+
+        max_gc_buffers = gc_log_file_size * 1024 * 1024 / gc_log_buffer_size;
+    }
+#endif // TRACE_GC
+
+#ifdef GC_CONFIG_DRIVEN
+    gc_config_log_on = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCConfigLogEnabled);
+    if (gc_config_log_on)
+    {
+        gc_config_log = CreateLogFile(CLRConfig::UNSUPPORTED_GCConfigLogFile, TRUE);
+
+        if (gc_config_log == NULL)
+            return E_FAIL;
+
+        gc_config_log_buffer = new (nothrow) uint8_t [gc_config_log_buffer_size];
+        if (!gc_config_log_buffer)
+        {
+            fclose(gc_config_log);
+            return E_FAIL;
+        }
+
+        compact_ratio = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCCompactRatio);
+
+        //         h#  | GC  | gen | C   | EX   | NF  | BF  | ML  | DM  || PreS | PostS | Merge | Conv | Pre | Post | PrPo | PreP | PostP | 
+        cprintf (("%2s | %6s | %1s | %1s | %2s | %2s | %2s | %2s | %2s || %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s | %5s |",
+                "h#", // heap index
+                "GC", // GC index
+                "g", // generation
+                "C",  // compaction (empty means sweeping), 'M' means it was mandatory, 'W' means it was not
+                "EX", // heap expansion
+                "NF", // normal fit
+                "BF", // best fit (if it indicates neither NF nor BF it means it had to acquire a new seg.
+                "ML", // mark list
+                "DM", // demotion
+                "PreS", // short object before pinned plug
+                "PostS", // short object after pinned plug
+                "Merge", // merged pinned plugs
+                "Conv", // converted to pinned plug
+                "Pre", // plug before pinned plug but not after
+                "Post", // plug after pinned plug but not before
+                "PrPo", // plug both before and after pinned plug
+                "PreP", // pre short object padded
+                "PostP" // post short object padded
+                ));
+    }
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef GC_STATS
+    GCStatistics::logFileName = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCMixLog);
+    if (GCStatistics::logFileName != NULL)
+    {
+        GCStatistics::logFile = _tfopen(GCStatistics::logFileName, _T("a"));
+    }
+#endif // GC_STATS
+
+    HRESULT hres = S_OK;
+
+#ifdef WRITE_WATCH
+    hardware_write_watch_api_supported();
+#ifdef BACKGROUND_GC
+    if (can_use_write_watch_for_gc_heap() && g_pConfig->GetGCconcurrent() != 0)
+    {
+        gc_can_use_concurrent = true;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        virtual_alloc_hardware_write_watch = true;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    }
+    else
+    {
+        gc_can_use_concurrent = false;
+    }
+#endif //BACKGROUND_GC
+#endif //WRITE_WATCH
+
+    reserved_memory = 0;
+    unsigned block_count;
+#ifdef MULTIPLE_HEAPS
+    reserved_memory_limit = (segment_size + heap_size) * number_of_heaps;
+    block_count = number_of_heaps;
+#else //MULTIPLE_HEAPS
+    reserved_memory_limit = segment_size + heap_size;
+    block_count = 1;
+#endif //MULTIPLE_HEAPS
+
+    if (!reserve_initial_memory(segment_size,heap_size,block_count))
+        return E_OUTOFMEMORY;
+
+#ifdef CARD_BUNDLE
+    //check if we need to turn on card_bundles.
+#ifdef MULTIPLE_HEAPS
+    // use INT64 arithmetic here because of possible overflow on 32p
+    uint64_t th = (uint64_t)MH_TH_CARD_BUNDLE*number_of_heaps;
+#else
+    // use INT64 arithmetic here because of possible overflow on 32p
+    uint64_t th = (uint64_t)SH_TH_CARD_BUNDLE;
+#endif //MULTIPLE_HEAPS
+
+    if (can_use_write_watch_for_card_table() && reserved_memory >= th)
+    {
+        settings.card_bundles = TRUE;
+    }
+    else
+    {
+        settings.card_bundles = FALSE;
+    }
+#endif //CARD_BUNDLE
+
+    settings.first_init();
+
+    g_card_table = make_card_table (g_lowest_address, g_highest_address);
+
+    if (!g_card_table)
+        return E_OUTOFMEMORY;
+
+    gc_started = FALSE;
+
+#ifdef MULTIPLE_HEAPS
+    n_heaps = number_of_heaps;
+
+    g_heaps = new (nothrow) gc_heap* [number_of_heaps];
+    if (!g_heaps)
+        return E_OUTOFMEMORY;
+
+#ifdef _PREFAST_ 
+#pragma warning(push)
+#pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow
+#endif // _PREFAST_
+    g_promoted = new (nothrow) size_t [number_of_heaps*16];
+    g_bpromoted = new (nothrow) size_t [number_of_heaps*16];
+#ifdef MH_SC_MARK
+    g_mark_stack_busy = new (nothrow) int[(number_of_heaps+2)*HS_CACHE_LINE_SIZE/sizeof(int)];
+#endif //MH_SC_MARK
+#ifdef _PREFAST_ 
+#pragma warning(pop)
+#endif // _PREFAST_
+    if (!g_promoted || !g_bpromoted)
+        return E_OUTOFMEMORY;
+
+#ifdef MH_SC_MARK
+    if (!g_mark_stack_busy)
+        return E_OUTOFMEMORY;
+#endif //MH_SC_MARK
+
+    if (!create_thread_support (number_of_heaps))
+        return E_OUTOFMEMORY;
+
+    if (!heap_select::init (number_of_heaps))
+        return E_OUTOFMEMORY;
+
+#endif //MULTIPLE_HEAPS
+
+#ifdef TRACE_GC
+    print_level = g_pConfig->GetGCprnLvl();
+    gc_trace_fac = g_pConfig->GetGCtraceFac();
+#endif //TRACE_GC
+
+    if (!init_semi_shared())
+    {
+        hres = E_FAIL;
+    }
+
+    return hres;
+}
+
+//Initializes PER_HEAP_ISOLATED data members.
+int
+gc_heap::init_semi_shared()
+{
+    int ret = 0;
+
+    // This is used for heap expansion - it's to fix exactly the start for gen 0
+    // through (max_generation-1). When we expand the heap we allocate all these
+    // gen starts at the beginning of the new ephemeral seg. 
+    eph_gen_starts_size = (Align (min_obj_size)) * max_generation;
+
+#ifdef MARK_LIST
+    size_t gen0size = GCHeap::GetValidGen0MaxSize(get_valid_segment_size());
+    MAYBE_UNUSED_VAR(gen0size);
+
+#ifdef MULTIPLE_HEAPS
+
+    mark_list_size = min (150*1024, max (8192, get_valid_segment_size()/(2*10*32)));
+    g_mark_list = make_mark_list (mark_list_size*n_heaps);
+
+    min_balance_threshold = alloc_quantum_balance_units * CLR_SIZE * 2;
+#ifdef PARALLEL_MARK_LIST_SORT
+    g_mark_list_copy = make_mark_list (mark_list_size*n_heaps);
+    if (!g_mark_list_copy)
+    {
+        goto cleanup;
+    }
+#endif //PARALLEL_MARK_LIST_SORT
+
+#else //MULTIPLE_HEAPS
+
+    mark_list_size = max (8192, get_valid_segment_size()/(64*32));
+    g_mark_list = make_mark_list (mark_list_size);
+
+#endif //MULTIPLE_HEAPS
+
+    dprintf (3, ("gen0 size: %d, mark_list_size: %d",
+                 gen0size, mark_list_size));
+
+    if (!g_mark_list)
+    {
+        goto cleanup;
+    }
+#endif //MARK_LIST
+
+#if defined(SEG_MAPPING_TABLE) && !defined(GROWABLE_SEG_MAPPING_TABLE)
+    if (!seg_mapping_table_init())
+        goto cleanup;
+#endif //SEG_MAPPING_TABLE && !GROWABLE_SEG_MAPPING_TABLE
+
+#if !defined(SEG_MAPPING_TABLE) || defined(FEATURE_BASICFREEZE)
+    seg_table = sorted_table::make_sorted_table();
+
+    if (!seg_table)
+        goto cleanup;
+#endif //!SEG_MAPPING_TABLE || FEATURE_BASICFREEZE
+
+    segment_standby_list = 0;
+
+    if (!full_gc_approach_event.CreateManualEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+    if (!full_gc_end_event.CreateManualEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+
+    fgn_maxgen_percent = 0;
+    fgn_loh_percent = 0;
+    full_gc_approach_event_set = false;
+
+    memset (full_gc_counts, 0, sizeof (full_gc_counts));
+
+    last_gc_index = 0;
+    should_expand_in_full_gc = FALSE;
+
+#ifdef FEATURE_LOH_COMPACTION
+    loh_compaction_always_p = (g_pConfig->GetGCLOHCompactionMode() != 0);
+    loh_compaction_mode = loh_compaction_default;
+#endif //FEATURE_LOH_COMPACTION
+
+#ifdef BACKGROUND_GC
+    memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts));
+    bgc_alloc_spin_count = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_BGCSpinCount);
+    bgc_alloc_spin = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_BGCSpin);
+
+    {   
+        int number_bgc_threads = 1;
+#ifdef MULTIPLE_HEAPS
+        number_bgc_threads = n_heaps;
+#endif //MULTIPLE_HEAPS
+        if (!create_bgc_threads_support (number_bgc_threads))
+        {
+            goto cleanup;
+        }
+#endif //BACKGROUND_GC
+    }
+
+    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));
+
+#ifdef GC_CONFIG_DRIVEN
+    compact_or_sweep_gcs[0] = 0;
+    compact_or_sweep_gcs[1] = 0;
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef SHORT_PLUGS
+    short_plugs_pad_ratio = (double)DESIRED_PLUG_LENGTH / (double)(DESIRED_PLUG_LENGTH - Align (min_obj_size));
+#endif //SHORT_PLUGS
+
+    ret = 1;
+
+cleanup:
+
+    if (!ret)
+    {
+        if (full_gc_approach_event.IsValid())
+        {
+            full_gc_approach_event.CloseEvent();
+        }
+        if (full_gc_end_event.IsValid())
+        {
+            full_gc_end_event.CloseEvent();
+        }
+    }
+
+    return ret;
+}
+
+gc_heap* gc_heap::make_gc_heap (
+#ifdef MULTIPLE_HEAPS
+                                GCHeap* vm_hp,
+                                int heap_number
+#endif //MULTIPLE_HEAPS
+                                )
+{
+    gc_heap* res = 0;
+
+#ifdef MULTIPLE_HEAPS
+    res = new (nothrow) gc_heap;
+    if (!res)
+        return 0;
+
+    res->vm_heap = vm_hp;
+    res->alloc_context_count = 0;
+
+#ifdef MARK_LIST
+#ifdef PARALLEL_MARK_LIST_SORT
+    res->mark_list_piece_start = new (nothrow) uint8_t**[n_heaps];
+    if (!res->mark_list_piece_start)
+        return 0;
+
+#ifdef _PREFAST_ 
+#pragma warning(push)
+#pragma warning(disable:22011) // Suppress PREFast warning about integer underflow/overflow
+#endif // _PREFAST_
+    res->mark_list_piece_end = new (nothrow) uint8_t**[n_heaps + 32]; // +32 is padding to reduce false sharing
+#ifdef _PREFAST_ 
+#pragma warning(pop)
+#endif // _PREFAST_
+
+    if (!res->mark_list_piece_end)
+        return 0;
+#endif //PARALLEL_MARK_LIST_SORT
+#endif //MARK_LIST
+
+
+#endif //MULTIPLE_HEAPS
+
+    if (res->init_gc_heap (
+#ifdef MULTIPLE_HEAPS
+        heap_number
+#else  //MULTIPLE_HEAPS
+        0
+#endif //MULTIPLE_HEAPS
+        )==0)
+    {
+        return 0;
+    }
+
+#ifdef MULTIPLE_HEAPS
+    return res;
+#else
+    return (gc_heap*)1;
+#endif //MULTIPLE_HEAPS
+}
+
+uint32_t
+gc_heap::wait_for_gc_done(int32_t timeOut)
+{
+    Thread* current_thread = GetThread();
+    BOOL cooperative_mode = enable_preemptive (current_thread);
+
+    uint32_t dwWaitResult = NOERROR;
+
+    gc_heap* wait_heap = NULL;
+    while (gc_heap::gc_started)
+    {       
+#ifdef MULTIPLE_HEAPS
+        wait_heap = GCHeap::GetHeap(heap_select::select_heap(NULL, 0))->pGenGCHeap;
+        dprintf(2, ("waiting for the gc_done_event on heap %d", wait_heap->heap_number));
+#endif // MULTIPLE_HEAPS
+
+#ifdef _PREFAST_
+        PREFIX_ASSUME(wait_heap != NULL);
+#endif // _PREFAST_
+
+        dwWaitResult = wait_heap->gc_done_event.Wait(timeOut, FALSE); 
+    }
+    disable_preemptive (current_thread, cooperative_mode);
+
+    return dwWaitResult;
+}
+
+void 
+gc_heap::set_gc_done()
+{
+    enter_gc_done_event_lock();
+    if (!gc_done_event_set)
+    {
+        gc_done_event_set = true;
+        dprintf (2, ("heap %d: setting gc_done_event", heap_number));
+        gc_done_event.Set();
+    }
+    exit_gc_done_event_lock();
+}
+
+void 
+gc_heap::reset_gc_done()
+{
+    enter_gc_done_event_lock();
+    if (gc_done_event_set)
+    {
+        gc_done_event_set = false;
+        dprintf (2, ("heap %d: resetting gc_done_event", heap_number));
+        gc_done_event.Reset();
+    }
+    exit_gc_done_event_lock();
+}
+
+void 
+gc_heap::enter_gc_done_event_lock()
+{
+    uint32_t dwSwitchCount = 0;
+retry:
+
+    if (Interlocked::Exchange (&gc_done_event_lock, 0) >= 0)
+    {
+        while (gc_done_event_lock >= 0)
+        {
+            if  (g_SystemInfo.dwNumberOfProcessors > 1)
+            {
+                int spin_count = 32 * g_SystemInfo.dwNumberOfProcessors;
+                for (int j = 0; j < spin_count; j++)
+                {
+                    if  (gc_done_event_lock < 0)
+                        break;
+                    YieldProcessor();           // indicate to the processor that we are spining
+                }
+                if  (gc_done_event_lock >= 0)
+                    GCToOSInterface::YieldThread(++dwSwitchCount);
+            }
+            else
+                GCToOSInterface::YieldThread(++dwSwitchCount);
+        }
+        goto retry;
+    }
+}
+
+void 
+gc_heap::exit_gc_done_event_lock()
+{
+    gc_done_event_lock = -1;
+}
+
+#ifndef MULTIPLE_HEAPS
+
+#ifdef RECORD_LOH_STATE
+int gc_heap::loh_state_index = 0;
+gc_heap::loh_state_info gc_heap::last_loh_states[max_saved_loh_states];
+#endif //RECORD_LOH_STATE
+
+VOLATILE(int32_t) gc_heap::gc_done_event_lock;
+VOLATILE(bool) gc_heap::gc_done_event_set;
+CLREvent gc_heap::gc_done_event;
+#endif //!MULTIPLE_HEAPS
+VOLATILE(bool) gc_heap::internal_gc_done;
+
+void gc_heap::add_saved_spinlock_info (
+            msl_enter_state enter_state, 
+            msl_take_state take_state)
+
+{
+#ifdef SPINLOCK_HISTORY
+    spinlock_info* current = &last_spinlock_info[spinlock_info_index];
+
+    current->enter_state = enter_state;
+    current->take_state = take_state;
+    current->thread_id.SetToCurrentThread();
+
+    spinlock_info_index++;
+
+    assert (spinlock_info_index <= max_saved_spinlock_info);
+
+    if (spinlock_info_index >= max_saved_spinlock_info)
+    {
+        spinlock_info_index = 0;
+    }
+#else
+    MAYBE_UNUSED_VAR(enter_state);
+    MAYBE_UNUSED_VAR(take_state);
+#endif //SPINLOCK_HISTORY
+}
+
+int
+gc_heap::init_gc_heap (int  h_number)
+{
+#ifdef MULTIPLE_HEAPS
+
+    time_bgc_last = 0;
+
+#ifdef SPINLOCK_HISTORY
+    spinlock_info_index = 0;
+    memset (last_spinlock_info, 0, sizeof(last_spinlock_info));
+#endif //SPINLOCK_HISTORY
+
+    // initialize per heap members.
+    ephemeral_low = (uint8_t*)1;
+
+    ephemeral_high = MAX_PTR;
+
+    ephemeral_heap_segment = 0;
+
+    freeable_large_heap_segment = 0;
+
+    condemned_generation_num = 0;
+
+    blocking_collection = FALSE;
+
+    generation_skip_ratio = 100;
+
+    mark_stack_tos = 0;
+
+    mark_stack_bos = 0;
+
+    mark_stack_array_length = 0;
+
+    mark_stack_array = 0;
+
+    verify_pinned_queue_p = FALSE;
+
+    loh_pinned_queue_tos = 0;
+
+    loh_pinned_queue_bos = 0;
+
+    loh_pinned_queue_length = 0;
+
+    loh_pinned_queue_decay = LOH_PIN_DECAY;
+
+    loh_pinned_queue = 0;
+
+    min_overflow_address = MAX_PTR;
+
+    max_overflow_address = 0;
+
+    gen0_bricks_cleared = FALSE;
+
+    gen0_must_clear_bricks = 0;
+
+    allocation_quantum = CLR_SIZE;
+
+    more_space_lock = gc_lock;
+
+    ro_segments_in_range = FALSE;
+
+    loh_alloc_since_cg = 0;
+
+    new_heap_segment = NULL;
+
+#ifdef RECORD_LOH_STATE
+    loh_state_index = 0;
+#endif //RECORD_LOH_STATE
+#endif //MULTIPLE_HEAPS
+
+#ifdef MULTIPLE_HEAPS
+    if (h_number > n_heaps)
+    {
+        assert (!"Number of heaps exceeded");
+        return 0;
+    }
+
+    heap_number = h_number;
+#endif //MULTIPLE_HEAPS
+
+    memset (&oom_info, 0, sizeof (oom_info));
+    memset (&fgm_result, 0, sizeof (fgm_result));
+    if (!gc_done_event.CreateManualEventNoThrow(FALSE))
+    {
+        return 0;
+    }
+    gc_done_event_lock = -1;
+    gc_done_event_set = false;
+
+#ifndef SEG_MAPPING_TABLE
+    if (!gc_heap::seg_table->ensure_space_for_insert ())
+    {
+        return 0;
+    }
+#endif //!SEG_MAPPING_TABLE
+
+    heap_segment* seg = get_initial_segment (get_valid_segment_size(), h_number);
+    if (!seg)
+        return 0;
+
+    FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(seg), 
+                              (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), 
+                              ETW::GCLog::ETW_GC_INFO::SMALL_OBJECT_HEAP, 
+                              GetClrInstanceId());
+    
+#ifdef SEG_MAPPING_TABLE
+    seg_mapping_table_add_segment (seg, __this);
+#else //SEG_MAPPING_TABLE
+    seg_table->insert ((uint8_t*)seg, sdelta);
+#endif //SEG_MAPPING_TABLE
+
+#ifdef MULTIPLE_HEAPS
+    heap_segment_heap (seg) = this;
+#endif //MULTIPLE_HEAPS
+
+    /* todo: Need a global lock for this */
+    uint32_t* ct = &g_card_table [card_word (card_of (g_lowest_address))];
+    own_card_table (ct);
+    card_table = translate_card_table (ct);
+    /* End of global lock */
+
+    brick_table = card_table_brick_table (ct);
+    highest_address = card_table_highest_address (ct);
+    lowest_address = card_table_lowest_address (ct);
+
+#ifdef CARD_BUNDLE
+    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct));
+    assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_lowest_address))))] ==
+            card_table_card_bundle_table (ct));
+#endif //CARD_BUNDLE
+
+#ifdef MARK_ARRAY
+    if (gc_can_use_concurrent)
+        mark_array = translate_mark_array (card_table_mark_array (&g_card_table[card_word (card_of (g_lowest_address))]));
+    else
+        mark_array = NULL;
+#endif //MARK_ARRAY
+
+    uint8_t*  start = heap_segment_mem (seg);
+
+    for (int i = 0; i < 1 + max_generation; i++)
+    {
+        make_generation (generation_table [ (max_generation - i) ],
+                         seg, start, 0);
+        generation_table [(max_generation - i)].gen_num = max_generation - i;
+        start += Align (min_obj_size);
+    }
+
+    heap_segment_allocated (seg) = start;
+    alloc_allocated = start;
+    heap_segment_used (seg) = start - plug_skew;
+
+    ephemeral_heap_segment = seg;
+
+#ifndef SEG_MAPPING_TABLE
+    if (!gc_heap::seg_table->ensure_space_for_insert ())
+    {
+        return 0;
+    }
+#endif //!SEG_MAPPING_TABLE
+    //Create the large segment generation
+    heap_segment* lseg = get_initial_segment(get_valid_segment_size(TRUE), h_number);
+    if (!lseg)
+        return 0;
+    lseg->flags |= heap_segment_flags_loh;
+
+    FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(lseg), 
+                              (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)), 
+                              ETW::GCLog::ETW_GC_INFO::LARGE_OBJECT_HEAP, 
+                              GetClrInstanceId());
+#ifdef SEG_MAPPING_TABLE
+    seg_mapping_table_add_segment (lseg, __this);
+#else //SEG_MAPPING_TABLE
+    seg_table->insert ((uint8_t*)lseg, sdelta);
+#endif //SEG_MAPPING_TABLE
+
+    generation_table [max_generation].free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST, gen2_alloc_list);
+    //assign the alloc_list for the large generation 
+    generation_table [max_generation+1].free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST, loh_alloc_list);
+    generation_table [max_generation+1].gen_num = max_generation+1;
+    make_generation (generation_table [max_generation+1],lseg, heap_segment_mem (lseg), 0);
+    heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE));
+    heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew;
+
+    for (int gen_num = 0; gen_num <= 1 + max_generation; gen_num++)
+    {
+        generation*  gen = generation_of (gen_num);
+        make_unused_array (generation_allocation_start (gen), Align (min_obj_size));
+    }
+
+#ifdef MULTIPLE_HEAPS
+    heap_segment_heap (lseg) = this;
+
+    //initialize the alloc context heap
+    generation_alloc_context (generation_of (0))->alloc_heap = vm_heap;
+
+    //initialize the alloc context heap
+    generation_alloc_context (generation_of (max_generation+1))->alloc_heap = vm_heap;
+
+#endif //MULTIPLE_HEAPS
+
+    //Do this only once
+#ifdef MULTIPLE_HEAPS
+    if (h_number == 0)
+#endif //MULTIPLE_HEAPS
+    {
+#ifndef INTERIOR_POINTERS
+        //set the brick_table for large objects
+        //but default value is clearded
+        //clear_brick_table ((uint8_t*)heap_segment_mem (lseg),
+        //                   (uint8_t*)heap_segment_reserved (lseg));
+
+#else //INTERIOR_POINTERS
+
+        //Because of the interior pointer business, we have to clear
+        //the whole brick table
+        //but the default value is cleared
+        // clear_brick_table (lowest_address, highest_address);
+#endif //INTERIOR_POINTERS
+    }
+
+    if (!init_dynamic_data())
+    {
+        return 0;
+    }
+
+    etw_allocation_running_amount[0] = 0;
+    etw_allocation_running_amount[1] = 0;
+
+    //needs to be done after the dynamic data has been initialized
+#ifndef MULTIPLE_HEAPS
+    allocation_running_amount = dd_min_gc_size (dynamic_data_of (0));
+#endif //!MULTIPLE_HEAPS
+
+    fgn_last_alloc = dd_min_gc_size (dynamic_data_of (0));
+
+    mark* arr = new (nothrow) (mark [MARK_STACK_INITIAL_LENGTH]);
+    if (!arr)
+        return 0;
+
+    make_mark_stack(arr);
+
+#ifdef BACKGROUND_GC
+    freeable_small_heap_segment = 0;
+    gchist_index_per_heap = 0;
+    uint8_t** b_arr = new (nothrow) (uint8_t* [MARK_STACK_INITIAL_LENGTH]);
+    if (!b_arr)
+        return 0;
+
+    make_background_mark_stack (b_arr);
+#endif //BACKGROUND_GC
+
+    adjust_ephemeral_limits(true);
+
+#ifdef MARK_ARRAY
+    // why would we clear the mark array for this page? it should be cleared..
+    // clear the first committed page
+    //if(gc_can_use_concurrent)
+    //{
+    //    clear_mark_array (align_lower_page (heap_segment_mem (seg)), heap_segment_committed (seg));
+    //}
+#endif //MARK_ARRAY
+
+#ifdef MULTIPLE_HEAPS
+    //register the heap in the heaps array
+
+    if (!create_gc_thread ())
+        return 0;
+
+    g_heaps [heap_number] = this;
+
+#endif //MULTIPLE_HEAPS
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    HRESULT hr = AllocateCFinalize(&finalize_queue);
+    if (FAILED(hr))
+        return 0;
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+    max_free_space_items = MAX_NUM_FREE_SPACES;
+
+    bestfit_seg = new (nothrow) seg_free_spaces (heap_number);
+
+    if (!bestfit_seg)
+    {
+        return 0;
+    }
+
+    if (!bestfit_seg->alloc())
+    {
+        return 0;
+    }
+
+    last_gc_before_oom = FALSE;
+
+#ifdef MULTIPLE_HEAPS
+
+#ifdef HEAP_ANALYZE
+
+    heap_analyze_success = TRUE;
+
+    internal_root_array  = 0;
+
+    internal_root_array_index = 0;
+
+    internal_root_array_length = initial_internal_roots;
+
+    current_obj          = 0;
+
+    current_obj_size     = 0;
+
+#endif //HEAP_ANALYZE
+
+#endif // MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+    bgc_thread_id.Clear();
+
+    if (!create_bgc_thread_support())
+    {
+        return 0;
+    }
+
+    bgc_alloc_lock = new (nothrow) exclusive_sync;
+    if (!bgc_alloc_lock)
+    {
+        return 0;
+    }
+
+    bgc_alloc_lock->init();
+
+    if (h_number == 0)
+    {
+        if (!recursive_gc_sync::init())
+            return 0;
+    }
+
+    bgc_thread_running = 0;
+    bgc_thread = 0;
+    bgc_threads_timeout_cs.Initialize();
+    expanded_in_fgc = 0;
+    current_bgc_state = bgc_not_in_process;
+    background_soh_alloc_count = 0;
+    background_loh_alloc_count = 0;
+    bgc_overflow_count = 0;
+    end_loh_size = dd_min_gc_size (dynamic_data_of (max_generation + 1));
+#endif //BACKGROUND_GC
+
+#ifdef GC_CONFIG_DRIVEN
+    memset (interesting_data_per_heap, 0, sizeof (interesting_data_per_heap));
+    memset(compact_reasons_per_heap, 0, sizeof (compact_reasons_per_heap));
+    memset(expand_mechanisms_per_heap, 0, sizeof (expand_mechanisms_per_heap));
+    memset(interesting_mechanism_bits_per_heap, 0, sizeof (interesting_mechanism_bits_per_heap));
+#endif //GC_CONFIG_DRIVEN
+
+    return 1;
+}
+
+void
+gc_heap::destroy_semi_shared()
+{
+//TODO: will need to move this to per heap
+//#ifdef BACKGROUND_GC
+//    if (c_mark_list)
+//        delete c_mark_list;
+//#endif //BACKGROUND_GC
+
+#ifdef MARK_LIST
+    if (g_mark_list)
+        delete g_mark_list;
+#endif //MARK_LIST
+
+#if defined(SEG_MAPPING_TABLE) && !defined(GROWABLE_SEG_MAPPING_TABLE)
+    if (seg_mapping_table)
+        delete seg_mapping_table;
+#endif //SEG_MAPPING_TABLE && !GROWABLE_SEG_MAPPING_TABLE
+
+#if !defined(SEG_MAPPING_TABLE) || defined(FEATURE_BASICFREEZE)
+    //destroy the segment map
+    seg_table->delete_sorted_table();
+#endif //!SEG_MAPPING_TABLE || FEATURE_BASICFREEZE
+}
+
+void
+gc_heap::self_destroy()
+{
+#ifdef BACKGROUND_GC
+    kill_gc_thread();
+#endif //BACKGROUND_GC
+
+    if (gc_done_event.IsValid())
+    {
+        gc_done_event.CloseEvent();
+    }
+
+    // destroy every segment.
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    heap_segment* next_seg;
+    while (seg)
+    {
+        next_seg = heap_segment_next_rw (seg);
+        delete_heap_segment (seg);
+        seg = next_seg;
+    }
+
+    seg = heap_segment_rw (generation_start_segment (generation_of (max_generation+1)));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    while (seg)
+    {
+        next_seg = heap_segment_next_rw (seg);
+        delete_heap_segment (seg);
+        seg = next_seg;
+    }
+
+    // get rid of the card table
+    release_card_table (card_table);
+
+    // destroy the mark stack
+    delete mark_stack_array;
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    if (finalize_queue)
+        delete finalize_queue;
+#endif // FEATURE_PREMORTEM_FINALIZATION
+}
+
+void
+gc_heap::destroy_gc_heap(gc_heap* heap)
+{
+    heap->self_destroy();
+    delete heap;
+}
+
+// Destroys resources owned by gc. It is assumed that a last GC has been performed and that
+// the finalizer queue has been drained.
+void gc_heap::shutdown_gc()
+{
+    destroy_semi_shared();
+
+#ifdef MULTIPLE_HEAPS
+    //delete the heaps array
+    delete g_heaps;
+    destroy_thread_support();
+    n_heaps = 0;
+#endif //MULTIPLE_HEAPS
+    //destroy seg_manager
+
+    destroy_initial_memory();
+
+    GCToOSInterface::Shutdown();
+}
+
+inline
+BOOL gc_heap::size_fit_p (size_t size REQD_ALIGN_AND_OFFSET_DCL, uint8_t* alloc_pointer, uint8_t* alloc_limit,
+                          uint8_t* old_loc, int use_padding)
+{
+    BOOL already_padded = FALSE;
+#ifdef SHORT_PLUGS
+    if ((old_loc != 0) && (use_padding & USE_PADDING_FRONT))
+    {
+        alloc_pointer = alloc_pointer + Align (min_obj_size);
+        already_padded = TRUE;
+    }
+#endif //SHORT_PLUGS
+
+    if (!((old_loc == 0) || same_large_alignment_p (old_loc, alloc_pointer)))
+        size = size + switch_alignment_size (already_padded);
+
+#ifdef FEATURE_STRUCTALIGN
+    alloc_pointer = StructAlign(alloc_pointer, requiredAlignment, alignmentOffset);
+#endif // FEATURE_STRUCTALIGN
+
+    // in allocate_in_condemned_generation we can have this when we
+    // set the alloc_limit to plan_allocated which could be less than 
+    // alloc_ptr
+    if (alloc_limit < alloc_pointer)
+    {
+        return FALSE;
+    }
+
+    if (old_loc != 0)
+    {
+        return (((size_t)(alloc_limit - alloc_pointer) >= (size + ((use_padding & USE_PADDING_TAIL)? Align(min_obj_size) : 0))) 
+#ifdef SHORT_PLUGS
+                ||((!(use_padding & USE_PADDING_FRONT)) && ((alloc_pointer + size) == alloc_limit))
+#else //SHORT_PLUGS
+                ||((alloc_pointer + size) == alloc_limit)
+#endif //SHORT_PLUGS
+            );
+    }
+    else
+    {
+        assert (size == Align (min_obj_size));
+        return ((size_t)(alloc_limit - alloc_pointer) >= size);
+    }
+}
+
+inline
+BOOL gc_heap::a_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit,
+                            int align_const)
+{
+    // We could have run into cases where this is true when alloc_allocated is the 
+    // the same as the seg committed.
+    if (alloc_limit < alloc_pointer)
+    {
+        return FALSE;
+    }
+
+    return ((size_t)(alloc_limit - alloc_pointer) >= (size + Align(min_obj_size, align_const)));
+}
+
+// Grow by committing more pages
+BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address)
+{
+    assert (high_address <= heap_segment_reserved (seg));
+
+    //return 0 if we are at the end of the segment.
+    if (align_on_page (high_address) > heap_segment_reserved (seg))
+        return FALSE;
+
+    if (high_address <= heap_segment_committed (seg))
+        return TRUE;
+
+    size_t c_size = align_on_page ((size_t)(high_address - heap_segment_committed (seg)));
+    c_size = max (c_size, 16*OS_PAGE_SIZE);
+    c_size = min (c_size, (size_t)(heap_segment_reserved (seg) - heap_segment_committed (seg)));
+
+    if (c_size == 0)
+        return FALSE;
+
+    STRESS_LOG2(LF_GC, LL_INFO10000,
+                "Growing heap_segment: %Ix high address: %Ix\n",
+                (size_t)seg, (size_t)high_address);
+
+    dprintf(3, ("Growing segment allocation %Ix %Ix", (size_t)heap_segment_committed(seg),c_size));
+    
+    if (!virtual_alloc_commit_for_heap(heap_segment_committed (seg), c_size, heap_number))
+    {
+        dprintf(3, ("Cannot grow heap segment"));
+        return FALSE;
+    }
+#ifdef MARK_ARRAY
+#ifndef BACKGROUND_GC
+    clear_mark_array (heap_segment_committed (seg),
+                      heap_segment_committed (seg)+c_size, TRUE);
+#endif //BACKGROUND_GC
+#endif //MARK_ARRAY
+    heap_segment_committed (seg) += c_size;
+    STRESS_LOG1(LF_GC, LL_INFO10000, "New commit: %Ix",
+                (size_t)heap_segment_committed (seg));
+
+    assert (heap_segment_committed (seg) <= heap_segment_reserved (seg));
+
+    assert (high_address <= heap_segment_committed (seg));
+
+    return TRUE;
+}
+
+inline
+int gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* allocated, uint8_t* old_loc, size_t size, BOOL pad_front_p  REQD_ALIGN_AND_OFFSET_DCL)
+{
+#ifdef SHORT_PLUGS
+    if ((old_loc != 0) && pad_front_p)
+    {
+        allocated = allocated + Align (min_obj_size);
+    }
+#endif //SHORT_PLUGS
+
+    if (!((old_loc == 0) || same_large_alignment_p (old_loc, allocated)))
+        size = size + switch_alignment_size (FALSE);
+#ifdef FEATURE_STRUCTALIGN
+    size_t pad = ComputeStructAlignPad(allocated, requiredAlignment, alignmentOffset);
+    return grow_heap_segment (seg, allocated + pad + size);
+#else // FEATURE_STRUCTALIGN
+    return grow_heap_segment (seg, allocated + size);
+#endif // FEATURE_STRUCTALIGN
+}
+
+//used only in older generation allocation (i.e during gc).
+void gc_heap::adjust_limit (uint8_t* start, size_t limit_size, generation* gen,
+                            int gennum)
+{
+    UNREFERENCED_PARAMETER(gennum);
+    dprintf (3, ("gc Expanding segment allocation"));
+    heap_segment* seg = generation_allocation_segment (gen);
+    if ((generation_allocation_limit (gen) != start) || (start != heap_segment_plan_allocated (seg)))
+    {
+        if (generation_allocation_limit (gen) == heap_segment_plan_allocated (seg))
+        {
+            assert (generation_allocation_pointer (gen) >= heap_segment_mem (seg));
+            assert (generation_allocation_pointer (gen) <= heap_segment_committed (seg));
+            heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen);
+        }
+        else
+        {
+            uint8_t*  hole = generation_allocation_pointer (gen);
+            size_t  size = (generation_allocation_limit (gen) - generation_allocation_pointer (gen));
+
+            if (size != 0)
+            {
+                dprintf (3, ("filling up hole: %Ix, size %Ix", hole, size));
+                size_t allocated_size = generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen);
+                if (size >= Align (min_free_list))
+                {
+                    if (allocated_size < min_free_list)
+                    {
+                        if (size >= (Align (min_free_list) + Align (min_obj_size)))
+                        {
+                            //split hole into min obj + threadable free item
+                            make_unused_array (hole, min_obj_size);
+                            generation_free_obj_space (gen) += Align (min_obj_size);
+                            make_unused_array (hole + Align (min_obj_size), size - Align (min_obj_size));
+                            generation_free_list_space (gen) += size - Align (min_obj_size);
+                            generation_allocator(gen)->thread_item_front (hole + Align (min_obj_size), 
+                                                                          size - Align (min_obj_size));
+                            add_gen_free (gen->gen_num, (size - Align (min_obj_size)));
+                        }
+                        else
+                        {
+                            dprintf (3, ("allocated size too small, can't put back rest on free list %Ix", allocated_size));
+                            make_unused_array (hole, size);
+                            generation_free_obj_space (gen) += size;
+                        }
+                    }
+                    else 
+                    {
+                        dprintf (3, ("threading hole in front of free list"));
+                        make_unused_array (hole, size);
+                        generation_free_list_space (gen) += size;
+                        generation_allocator(gen)->thread_item_front (hole, size);
+                        add_gen_free (gen->gen_num, size);
+                    }
+                }
+                else
+                {
+                    make_unused_array (hole, size);
+                    generation_free_obj_space (gen) += size;
+                }
+            }
+        }
+        generation_allocation_pointer (gen) = start;
+        generation_allocation_context_start_region (gen) = start;
+    }
+    generation_allocation_limit (gen) = (start + limit_size);
+}
+
+void verify_mem_cleared (uint8_t* start, size_t size)
+{
+    if (!Aligned (size))
+    {
+        FATAL_GC_ERROR();
+    }
+
+    PTR_PTR curr_ptr = (PTR_PTR) start;
+    for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
+    {
+        if (*(curr_ptr++) != 0)
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+}
+
+#if defined (VERIFY_HEAP) && defined (BACKGROUND_GC)
+void gc_heap::set_batch_mark_array_bits (uint8_t* start, uint8_t* end)
+{
+    size_t start_mark_bit = mark_bit_of (start);
+    size_t end_mark_bit = mark_bit_of (end);
+    unsigned int startbit = mark_bit_bit (start_mark_bit);
+    unsigned int endbit = mark_bit_bit (end_mark_bit);
+    size_t startwrd = mark_bit_word (start_mark_bit);
+    size_t endwrd = mark_bit_word (end_mark_bit);
+
+    dprintf (3, ("Setting all mark array bits between [%Ix:%Ix-[%Ix:%Ix", 
+        (size_t)start, (size_t)start_mark_bit, 
+        (size_t)end, (size_t)end_mark_bit));
+
+    unsigned int firstwrd = ~(lowbits (~0, startbit));
+    unsigned int lastwrd = ~(highbits (~0, endbit));
+
+    if (startwrd == endwrd)
+    {
+        unsigned int wrd = firstwrd & lastwrd;
+        mark_array[startwrd] |= wrd;
+        return;
+    }
+
+    // set the first mark word.
+    if (startbit)
+    {
+        mark_array[startwrd] |= firstwrd;
+        startwrd++;
+    }
+
+    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
+    {
+        mark_array[wrdtmp] = ~(unsigned int)0;
+    }
+
+    // set the last mark word.
+    if (endbit)
+    {
+        mark_array[endwrd] |= lastwrd;
+    }
+}
+
+// makes sure that the mark array bits between start and end are 0.
+void gc_heap::check_batch_mark_array_bits (uint8_t* start, uint8_t* end)
+{
+    size_t start_mark_bit = mark_bit_of (start);
+    size_t end_mark_bit = mark_bit_of (end);
+    unsigned int startbit = mark_bit_bit (start_mark_bit);
+    unsigned int endbit = mark_bit_bit (end_mark_bit);
+    size_t startwrd = mark_bit_word (start_mark_bit);
+    size_t endwrd = mark_bit_word (end_mark_bit);
+
+    //dprintf (3, ("Setting all mark array bits between [%Ix:%Ix-[%Ix:%Ix", 
+    //    (size_t)start, (size_t)start_mark_bit, 
+    //    (size_t)end, (size_t)end_mark_bit));
+
+    unsigned int firstwrd = ~(lowbits (~0, startbit));
+    unsigned int lastwrd = ~(highbits (~0, endbit));
+
+    if (startwrd == endwrd)
+    {
+        unsigned int wrd = firstwrd & lastwrd;
+        if (mark_array[startwrd] & wrd)
+        {
+            dprintf  (3, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            wrd, startwrd, 
+                            mark_array [startwrd], mark_word_address (startwrd)));
+            FATAL_GC_ERROR();
+        }
+        return;
+    }
+
+    // set the first mark word.
+    if (startbit)
+    {
+        if (mark_array[startwrd] & firstwrd)
+        {
+            dprintf  (3, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            firstwrd, startwrd, 
+                            mark_array [startwrd], mark_word_address (startwrd)));
+            FATAL_GC_ERROR();
+        }
+
+        startwrd++;
+    }
+
+    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
+    {
+        if (mark_array[wrdtmp])
+        {
+            dprintf  (3, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            wrdtmp, 
+                            mark_array [wrdtmp], mark_word_address (wrdtmp)));
+            FATAL_GC_ERROR();
+        }
+    }
+
+    // set the last mark word.
+    if (endbit)
+    {
+        if (mark_array[endwrd] & lastwrd)
+        {
+            dprintf  (3, ("The %Ix portion of mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            lastwrd, lastwrd, 
+                            mark_array [lastwrd], mark_word_address (lastwrd)));
+            FATAL_GC_ERROR();
+        }
+    }
+}
+#endif //VERIFY_HEAP && BACKGROUND_GC
+
+allocator::allocator (unsigned int num_b, size_t fbs, alloc_list* b)
+{
+    assert (num_b < MAX_BUCKET_COUNT);
+    num_buckets = num_b;
+    frst_bucket_size = fbs;
+    buckets = b;
+}
+
+alloc_list& allocator::alloc_list_of (unsigned int bn)
+{
+    assert (bn < num_buckets);
+    if (bn == 0)
+        return first_bucket;
+    else
+        return buckets [bn-1];
+}
+
+size_t& allocator::alloc_list_damage_count_of (unsigned int bn)
+{
+    assert (bn < num_buckets);
+    if (bn == 0)
+        return first_bucket.alloc_list_damage_count();
+    else
+        return buckets [bn-1].alloc_list_damage_count();
+}
+
+void allocator::unlink_item (unsigned int bn, uint8_t* item, uint8_t* prev_item, BOOL use_undo_p)
+{
+    //unlink the free_item
+    alloc_list* al = &alloc_list_of (bn);
+    if (prev_item)
+    {
+        if (use_undo_p && (free_list_undo (prev_item) == UNDO_EMPTY))
+        {
+            assert (item == free_list_slot (prev_item));
+            free_list_undo (prev_item) = item;
+            alloc_list_damage_count_of (bn)++;
+        }
+        free_list_slot (prev_item) = free_list_slot(item);
+    }
+    else
+    {
+        al->alloc_list_head() = (uint8_t*)free_list_slot(item);
+    }
+    if (al->alloc_list_tail() == item)
+    {
+        al->alloc_list_tail() = prev_item;
+    }
+}
+
+void allocator::clear()
+{
+    for (unsigned int i = 0; i < num_buckets; i++)
+    {
+        alloc_list_head_of (i) = 0;
+        alloc_list_tail_of (i) = 0;
+    }
+}
+
+//always thread to the end.
+void allocator::thread_free_item (uint8_t* item, uint8_t*& head, uint8_t*& tail)
+{
+    free_list_slot (item) = 0;
+    free_list_undo (item) = UNDO_EMPTY;
+    assert (item != head);
+
+    if (head == 0)
+    {
+       head = item;
+    }
+    //TODO: This shouldn't happen anymore - verify that's the case.
+    //the following is necessary because the last free element
+    //may have been truncated, and tail isn't updated.
+    else if (free_list_slot (head) == 0)
+    {
+        free_list_slot (head) = item;
+    }
+    else
+    {
+        assert (item != tail);
+        assert (free_list_slot(tail) == 0);
+        free_list_slot (tail) = item;
+    }
+    tail = item;
+}
+
+void allocator::thread_item (uint8_t* item, size_t size)
+{
+    size_t sz = frst_bucket_size;
+    unsigned int a_l_number = 0; 
+
+    for (; a_l_number < (num_buckets-1); a_l_number++)
+    {
+        if (size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    alloc_list* al = &alloc_list_of (a_l_number);
+    thread_free_item (item, 
+                      al->alloc_list_head(),
+                      al->alloc_list_tail());
+}
+
+void allocator::thread_item_front (uint8_t* item, size_t size)
+{
+    //find right free list
+    size_t sz = frst_bucket_size;
+    unsigned int a_l_number = 0; 
+    for (; a_l_number < (num_buckets-1); a_l_number++)
+    {
+        if (size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    alloc_list* al = &alloc_list_of (a_l_number);
+    free_list_slot (item) = al->alloc_list_head();
+    free_list_undo (item) = UNDO_EMPTY;
+
+    if (al->alloc_list_tail() == 0)
+    {
+        al->alloc_list_tail() = al->alloc_list_head();
+    }
+    al->alloc_list_head() = item;
+    if (al->alloc_list_tail() == 0)
+    {
+        al->alloc_list_tail() = item;
+    }
+}
+
+void allocator::copy_to_alloc_list (alloc_list* toalist)
+{
+    for (unsigned int i = 0; i < num_buckets; i++)
+    {
+        toalist [i] = alloc_list_of (i);
+#ifdef FL_VERIFICATION
+        uint8_t* free_item = alloc_list_head_of (i);
+        size_t count = 0;
+        while (free_item)
+        {
+            count++;
+            free_item = free_list_slot (free_item);
+        }
+
+        toalist[i].item_count = count;
+#endif //FL_VERIFICATION
+    }
+}
+
+void allocator::copy_from_alloc_list (alloc_list* fromalist)
+{
+    BOOL repair_list = !discard_if_no_fit_p ();
+    for (unsigned int i = 0; i < num_buckets; i++)
+    {
+        size_t count = alloc_list_damage_count_of (i);
+        alloc_list_of (i) = fromalist [i];
+        assert (alloc_list_damage_count_of (i) == 0);
+
+        if (repair_list)
+        {
+            //repair the the list
+            //new items may have been added during the plan phase 
+            //items may have been unlinked. 
+            uint8_t* free_item = alloc_list_head_of (i);
+            while (free_item && count)
+            {
+                assert (((CObjectHeader*)free_item)->IsFree());
+                if ((free_list_undo (free_item) != UNDO_EMPTY))
+                {
+                    count--;
+                    free_list_slot (free_item) = free_list_undo (free_item);
+                    free_list_undo (free_item) = UNDO_EMPTY;
+                }
+
+                free_item = free_list_slot (free_item);
+            }
+
+#ifdef FL_VERIFICATION
+            free_item = alloc_list_head_of (i);
+            size_t item_count = 0;
+            while (free_item)
+            {
+                item_count++;
+                free_item = free_list_slot (free_item);
+            }
+
+            assert (item_count == alloc_list_of (i).item_count);
+#endif //FL_VERIFICATION
+        }
+#ifdef DEBUG
+        uint8_t* tail_item = alloc_list_tail_of (i);
+        assert ((tail_item == 0) || (free_list_slot (tail_item) == 0));
+#endif
+    }
+}
+
+void allocator::commit_alloc_list_changes()
+{
+    BOOL repair_list = !discard_if_no_fit_p ();
+    if (repair_list)
+    {
+        for (unsigned int i = 0; i < num_buckets; i++)
+        {
+            //remove the undo info from list. 
+            uint8_t* free_item = alloc_list_head_of (i);
+            size_t count = alloc_list_damage_count_of (i);
+            while (free_item && count)
+            {
+                assert (((CObjectHeader*)free_item)->IsFree());
+
+                if (free_list_undo (free_item) != UNDO_EMPTY)
+                {
+                    free_list_undo (free_item) = UNDO_EMPTY;
+                    count--;
+                }
+
+                free_item = free_list_slot (free_item);
+            }
+
+            alloc_list_damage_count_of (i) = 0; 
+        }
+    }
+}
+
+void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
+                                alloc_context* acontext, heap_segment* seg,
+                                int align_const, int gen_number)
+{
+    size_t aligned_min_obj_size = Align(min_obj_size, align_const);
+
+    //probably should pass seg==0 for free lists.
+    if (seg)
+    {
+        assert (heap_segment_used (seg) <= heap_segment_committed (seg));
+    }
+
+    dprintf (3, ("Expanding segment allocation [%Ix, %Ix[", (size_t)start,
+               (size_t)start + limit_size - aligned_min_obj_size));
+
+    if ((acontext->alloc_limit != start) &&
+        (acontext->alloc_limit + aligned_min_obj_size)!= start)
+    {
+        uint8_t*  hole = acontext->alloc_ptr;
+        if (hole != 0)
+        {
+            size_t  size = (acontext->alloc_limit - acontext->alloc_ptr);
+            dprintf (3, ("filling up hole [%Ix, %Ix[", (size_t)hole, (size_t)hole + size + Align (min_obj_size, align_const)));
+            // when we are finishing an allocation from a free list
+            // we know that the free area was Align(min_obj_size) larger
+            acontext->alloc_bytes -= size;
+            size_t free_obj_size = size + aligned_min_obj_size;
+            make_unused_array (hole, free_obj_size);
+            generation_free_obj_space (generation_of (gen_number)) += free_obj_size;
+        }
+        acontext->alloc_ptr = start;
+    }
+    acontext->alloc_limit = (start + limit_size - aligned_min_obj_size);
+    acontext->alloc_bytes += limit_size - ((gen_number < max_generation + 1) ? aligned_min_obj_size : 0);
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    if (g_fEnableARM)
+    {
+        AppDomain* alloc_appdomain = GetAppDomain();
+        alloc_appdomain->RecordAllocBytes (limit_size, heap_number);
+    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+    uint8_t* saved_used = 0;
+
+    if (seg)
+    {
+        saved_used = heap_segment_used (seg);
+    }
+
+    if (seg == ephemeral_heap_segment)
+    {
+        //Sometimes the allocated size is advanced without clearing the
+        //memory. Let's catch up here
+        if (heap_segment_used (seg) < (alloc_allocated - plug_skew))
+        {
+#ifdef MARK_ARRAY
+#ifndef BACKGROUND_GC
+            clear_mark_array (heap_segment_used (seg) + plug_skew, alloc_allocated);
+#endif //BACKGROUND_GC
+#endif //MARK_ARRAY
+            heap_segment_used (seg) = alloc_allocated - plug_skew;
+        }
+    }
+#ifdef BACKGROUND_GC
+    else if (seg)
+    {
+        uint8_t* old_allocated = heap_segment_allocated (seg) - plug_skew - limit_size;
+#ifdef FEATURE_LOH_COMPACTION
+        old_allocated -= Align (loh_padding_obj_size, align_const);
+#endif //FEATURE_LOH_COMPACTION
+
+        assert (heap_segment_used (seg) >= old_allocated);
+    }
+#endif //BACKGROUND_GC
+    if ((seg == 0) ||
+        (start - plug_skew + limit_size) <= heap_segment_used (seg))
+    {
+        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory(1)", heap_number));
+        add_saved_spinlock_info (me_release, mt_clr_mem);
+        leave_spin_lock (&more_space_lock);
+        dprintf (3, ("clearing memory at %Ix for %d bytes", (start - plug_skew), limit_size));
+        memclr (start - plug_skew, limit_size);
+    }
+    else
+    {
+        uint8_t* used = heap_segment_used (seg);
+        heap_segment_used (seg) = start + limit_size - plug_skew;
+
+        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory", heap_number));
+        add_saved_spinlock_info (me_release, mt_clr_mem);
+        leave_spin_lock (&more_space_lock);
+        if ((start - plug_skew) < used)
+        {
+            if (used != saved_used)
+            {
+                FATAL_GC_ERROR ();
+            }
+
+            dprintf (2, ("clearing memory before used at %Ix for %Id bytes", 
+                (start - plug_skew), (plug_skew + used - start)));
+            memclr (start - plug_skew, used - (start - plug_skew));
+        }
+    }
+
+    //this portion can be done after we release the lock
+    if (seg == ephemeral_heap_segment)
+    {
+#ifdef FFIND_OBJECT
+        if (gen0_must_clear_bricks > 0)
+        {
+            //set the brick table to speed up find_object
+            size_t b = brick_of (acontext->alloc_ptr);
+            set_brick (b, acontext->alloc_ptr - brick_address (b));
+            b++;
+            dprintf (3, ("Allocation Clearing bricks [%Ix, %Ix[",
+                         b, brick_of (align_on_brick (start + limit_size))));
+            volatile short* x = &brick_table [b];
+            short* end_x = &brick_table [brick_of (align_on_brick (start + limit_size))];
+
+            for (;x < end_x;x++)
+                *x = -1;
+        }
+        else
+#endif //FFIND_OBJECT
+        {
+            gen0_bricks_cleared = FALSE;
+        }
+    }
+
+    // verifying the memory is completely cleared.
+    //verify_mem_cleared (start - plug_skew, limit_size);
+}
+
+/* in order to make the allocator faster, allocate returns a
+ * 0 filled object. Care must be taken to set the allocation limit to the
+ * allocation pointer after gc
+ */
+
+size_t gc_heap::limit_from_size (size_t size, size_t room, int gen_number,
+                                 int align_const)
+{
+    size_t new_limit = new_allocation_limit ((size + Align (min_obj_size, align_const)),
+                                             min (room,max (size + Align (min_obj_size, align_const),
+                                                            ((gen_number < max_generation+1) ?
+                                                             allocation_quantum :
+                                                             0))),
+                                             gen_number);
+    assert (new_limit >= (size + Align (min_obj_size, align_const)));
+    dprintf (100, ("requested to allocate %Id bytes, actual size is %Id", size, new_limit));
+    return new_limit;
+}
+
+void gc_heap::handle_oom (int heap_num, oom_reason reason, size_t alloc_size, 
+                          uint8_t* allocated, uint8_t* reserved)
+{
+    dprintf (1, ("total committed on the heap is %Id", get_total_committed_size()));
+
+    UNREFERENCED_PARAMETER(heap_num);
+
+    if (reason == oom_budget)
+    {
+        alloc_size = dd_min_gc_size (dynamic_data_of (0)) / 2;
+    }
+
+    if ((reason == oom_budget) && ((!fgm_result.loh_p) && (fgm_result.fgm != fgm_no_failure)))
+    {
+        // This means during the last GC we needed to reserve and/or commit more memory
+        // but we couldn't. We proceeded with the GC and ended up not having enough
+        // memory at the end. This is a legitimate OOM situtation. Otherwise we 
+        // probably made a mistake and didn't expand the heap when we should have.
+        reason = oom_low_mem;
+    }
+
+    oom_info.reason = reason;
+    oom_info.allocated = allocated;
+    oom_info.reserved = reserved;
+    oom_info.alloc_size = alloc_size;
+    oom_info.gc_index = settings.gc_index;
+    oom_info.fgm = fgm_result.fgm;
+    oom_info.size = fgm_result.size;
+    oom_info.available_pagefile_mb = fgm_result.available_pagefile_mb;
+    oom_info.loh_p = fgm_result.loh_p;
+
+    fgm_result.fgm = fgm_no_failure;
+
+    // Break early - before the more_space_lock is release so no other threads
+    // could have allocated on the same heap when OOM happened.
+    if (g_pConfig->IsGCBreakOnOOMEnabled())
+    {
+        GCToOSInterface::DebugBreak();
+    }
+}
+
+#ifdef BACKGROUND_GC
+BOOL gc_heap::background_allowed_p()
+{
+    return ( gc_can_use_concurrent && ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency)) );
+}
+#endif //BACKGROUND_GC
+
+void gc_heap::check_for_full_gc (int gen_num, size_t size)
+{
+    BOOL should_notify = FALSE;
+    // if we detect full gc because of the allocation budget specified this is TRUE;
+    // it's FALSE if it's due to other factors.
+    BOOL alloc_factor = TRUE; 
+    int i = 0;
+    int n = 0;
+    int n_initial = gen_num;
+    BOOL local_blocking_collection = FALSE;
+    BOOL local_elevation_requested = FALSE;
+    int new_alloc_remain_percent = 0;
+
+    if (full_gc_approach_event_set)
+    {
+        return;
+    }
+    
+    if (gen_num != (max_generation + 1))
+    {
+        gen_num = max_generation;
+    }
+
+    dynamic_data* dd_full = dynamic_data_of (gen_num);
+    ptrdiff_t new_alloc_remain = 0;
+    uint32_t pct = ((gen_num == (max_generation + 1)) ? fgn_loh_percent : fgn_maxgen_percent);
+
+    for (int gen_index = 0; gen_index <= (max_generation + 1); gen_index++)
+    {
+        dprintf (2, ("FGN: h#%d: gen%d: %Id(%Id)", 
+                     heap_number, gen_index,
+                     dd_new_allocation (dynamic_data_of (gen_index)),
+                     dd_desired_allocation (dynamic_data_of (gen_index))));
+    }
+
+    // For small object allocations we only check every fgn_check_quantum bytes.
+    if (n_initial == 0)
+    {
+        dprintf (2, ("FGN: gen0 last recorded alloc: %Id", fgn_last_alloc));
+        dynamic_data* dd_0 = dynamic_data_of (n_initial);
+        if (((fgn_last_alloc - dd_new_allocation (dd_0)) < fgn_check_quantum) &&
+            (dd_new_allocation (dd_0) >= 0))
+        {
+            return;
+        }
+        else
+        {
+            fgn_last_alloc = dd_new_allocation (dd_0);
+            dprintf (2, ("FGN: gen0 last recorded alloc is now: %Id", fgn_last_alloc));
+        }
+
+        // We don't consider the size that came from soh 'cause it doesn't contribute to the
+        // gen2 budget.
+        size = 0;
+    }
+
+    for (i = n+1; i <= max_generation; i++)
+    {
+        if (get_new_allocation (i) <= 0)
+        {
+            n = min (i, max_generation);
+        }
+        else
+            break;
+    }
+
+    dprintf (2, ("FGN: h#%d: gen%d budget exceeded", heap_number, n));
+    if (gen_num == max_generation)
+    {
+        // If it's small object heap we should first see if we will even be looking at gen2 budget
+        // in the next GC or not. If not we should go directly to checking other factors.
+        if (n < (max_generation - 1))
+        {
+            goto check_other_factors;
+        }
+    }
+
+    new_alloc_remain = dd_new_allocation (dd_full) - size;
+
+    new_alloc_remain_percent = (int)(((float)(new_alloc_remain) / (float)dd_desired_allocation (dd_full)) * 100);
+
+    dprintf (2, ("FGN: alloc threshold for gen%d is %d%%, current threshold is %d%%", 
+                 gen_num, pct, new_alloc_remain_percent));
+
+    if (new_alloc_remain_percent <= (int)pct)
+    {
+#ifdef BACKGROUND_GC
+        // If background GC is enabled, we still want to check whether this will
+        // be a blocking GC or not because we only want to notify when it's a 
+        // blocking full GC.
+        if (background_allowed_p())
+        {
+            goto check_other_factors;
+        }
+#endif //BACKGROUND_GC
+
+        should_notify = TRUE;
+        goto done;
+    }
+
+check_other_factors:
+
+    dprintf (2, ("FGC: checking other factors"));
+    n = generation_to_condemn (n, 
+                               &local_blocking_collection, 
+                               &local_elevation_requested, 
+                               TRUE);
+
+    if (local_elevation_requested && (n == max_generation))
+    {
+        if (settings.should_lock_elevation)
+        {
+            int local_elevation_locked_count = settings.elevation_locked_count + 1;
+            if (local_elevation_locked_count != 6)
+            {
+                dprintf (2, ("FGN: lock count is %d - Condemning max_generation-1", 
+                    local_elevation_locked_count));
+                n = max_generation - 1;
+            }
+        }
+    }
+
+    dprintf (2, ("FGN: we estimate gen%d will be collected", n));
+
+#ifdef BACKGROUND_GC
+    // When background GC is enabled it decreases the accurancy of our predictability -
+    // by the time the GC happens, we may not be under BGC anymore. If we try to 
+    // predict often enough it should be ok.
+    if ((n == max_generation) &&
+        (recursive_gc_sync::background_running_p()))
+    {
+        n = max_generation - 1;
+        dprintf (2, ("FGN: bgc - 1 instead of 2"));
+    }
+
+    if ((n == max_generation) && !local_blocking_collection)
+    {
+        if (!background_allowed_p())
+        {
+            local_blocking_collection = TRUE;
+        }
+    }
+#endif //BACKGROUND_GC
+
+    dprintf (2, ("FGN: we estimate gen%d will be collected: %s", 
+                       n, 
+                       (local_blocking_collection ? "blocking" : "background")));
+
+    if ((n == max_generation) && local_blocking_collection)
+    {
+        alloc_factor = FALSE;
+        should_notify = TRUE;
+        goto done;
+    }
+
+done:
+
+    if (should_notify)
+    {
+        dprintf (2, ("FGN: gen%d detecting full GC approaching(%s) (GC#%d) (%Id%% left in gen%d)", 
+                     n_initial,
+                     (alloc_factor ? "alloc" : "other"),
+                     dd_collection_count (dynamic_data_of (0)),
+                     new_alloc_remain_percent, 
+                     gen_num));
+
+        send_full_gc_notification (n_initial, alloc_factor);
+    }
+}
+
+void gc_heap::send_full_gc_notification (int gen_num, BOOL due_to_alloc_p)
+{
+    if (!full_gc_approach_event_set)
+    {
+        assert (full_gc_approach_event.IsValid());
+        FireEtwGCFullNotify_V1 (gen_num, due_to_alloc_p, GetClrInstanceId());
+
+        full_gc_end_event.Reset();
+        full_gc_approach_event.Set();
+        full_gc_approach_event_set = true;
+    }
+}
+
+wait_full_gc_status gc_heap::full_gc_wait (CLREvent *event, int time_out_ms)
+{
+    if (fgn_maxgen_percent == 0)
+    {
+        return wait_full_gc_na;
+    }
+
+    uint32_t wait_result = user_thread_wait(event, FALSE, time_out_ms);
+
+    if ((wait_result == WAIT_OBJECT_0) || (wait_result == WAIT_TIMEOUT))
+    {
+        if (fgn_maxgen_percent == 0)
+        {
+            return wait_full_gc_cancelled;
+        }
+        
+        if (wait_result == WAIT_OBJECT_0)
+        {
+#ifdef BACKGROUND_GC
+            if (fgn_last_gc_was_concurrent)
+            {
+                fgn_last_gc_was_concurrent = FALSE;
+                return wait_full_gc_na;
+            }
+            else
+#endif //BACKGROUND_GC
+            {
+                return wait_full_gc_success;
+            }
+        }
+        else
+        {
+            return wait_full_gc_timeout;
+        }
+    }
+    else
+    {
+        return wait_full_gc_failed;
+    }
+}
+
+size_t gc_heap::get_full_compact_gc_count()
+{
+    return full_gc_counts[gc_type_compacting];
+}
+
+// DTREVIEW - we should check this in dt_low_ephemeral_space_p
+// as well.
+inline
+BOOL gc_heap::short_on_end_of_seg (int gen_number,
+                                   heap_segment* seg,
+                                   int align_const)
+{
+    UNREFERENCED_PARAMETER(gen_number);
+    uint8_t* allocated = heap_segment_allocated(seg);
+
+    return (!a_size_fit_p (end_space_after_gc(),
+                          allocated,
+                          heap_segment_reserved (seg), 
+                          align_const));
+}
+
+#ifdef _MSC_VER
+#pragma warning(disable:4706) // "assignment within conditional expression" is intentional in this function.
+#endif // _MSC_VER
+
+inline
+BOOL gc_heap::a_fit_free_list_p (int gen_number, 
+                                 size_t size, 
+                                 alloc_context* acontext,
+                                 int align_const)
+{
+    BOOL can_fit = FALSE;
+    generation* gen = generation_of (gen_number);
+    allocator* gen_allocator = generation_allocator (gen);
+    size_t sz_list = gen_allocator->first_bucket_size();
+    for (unsigned int a_l_idx = 0; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
+    {
+        if ((size < sz_list) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
+        {
+            uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+            uint8_t* prev_free_item = 0;
+
+            while (free_list != 0)
+            {
+                dprintf (3, ("considering free list %Ix", (size_t)free_list));
+                size_t free_list_size = unused_array_size (free_list);
+                if ((size + Align (min_obj_size, align_const)) <= free_list_size)
+                {
+                    dprintf (3, ("Found adequate unused area: [%Ix, size: %Id",
+                                 (size_t)free_list, free_list_size));
+
+                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                    // We ask for more Align (min_obj_size)
+                    // to make sure that we can insert a free object
+                    // in adjust_limit will set the limit lower
+                    size_t limit = limit_from_size (size, free_list_size, gen_number, align_const);
+
+                    uint8_t*  remain = (free_list + limit);
+                    size_t remain_size = (free_list_size - limit);
+                    if (remain_size >= Align(min_free_list, align_const))
+                    {
+                        make_unused_array (remain, remain_size);
+                        gen_allocator->thread_item_front (remain, remain_size);
+                        assert (remain_size >= Align (min_obj_size, align_const));
+                    }
+                    else
+                    {
+                        //absorb the entire free list
+                        limit += remain_size;
+                    }
+                    generation_free_list_space (gen) -= limit;
+
+                    adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number);
+
+                    can_fit = TRUE;
+                    goto end;
+                }
+                else if (gen_allocator->discard_if_no_fit_p())
+                {
+                    assert (prev_free_item == 0);
+                    dprintf (3, ("couldn't use this free area, discarding"));
+                    generation_free_obj_space (gen) += free_list_size;
+
+                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                    generation_free_list_space (gen) -= free_list_size;
+                }
+                else
+                {
+                    prev_free_item = free_list;
+                }
+                free_list = free_list_slot (free_list); 
+            }
+        }
+        sz_list = sz_list * 2;
+    }
+end:
+    return can_fit;
+}
+
+
+#ifdef BACKGROUND_GC
+void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start,
+                                 size_t size, 
+                                 alloc_context* acontext,
+                                 int align_const, 
+                                 int lock_index,
+                                 BOOL check_used_p,
+                                 heap_segment* seg)
+{
+    make_unused_array (alloc_start, size);
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    if (g_fEnableARM)
+    {
+        AppDomain* alloc_appdomain = GetAppDomain();
+        alloc_appdomain->RecordAllocBytes (size, heap_number);
+    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+    size_t size_of_array_base = sizeof(ArrayBase);
+
+    bgc_alloc_lock->loh_alloc_done_with_index (lock_index);
+
+    // clear memory while not holding the lock. 
+    size_t size_to_skip = size_of_array_base;
+    size_t size_to_clear = size - size_to_skip - plug_skew;
+    size_t saved_size_to_clear = size_to_clear;
+    if (check_used_p)
+    {
+        uint8_t* end = alloc_start + size - plug_skew;
+        uint8_t* used = heap_segment_used (seg);
+        if (used < end)
+        {
+            if ((alloc_start + size_to_skip) < used)
+            {
+                size_to_clear = used - (alloc_start + size_to_skip);
+            }
+            else
+            {
+                size_to_clear = 0;
+            }
+            dprintf (2, ("bgc loh: setting used to %Ix", end));
+            heap_segment_used (seg) = end;
+        }
+
+        dprintf (2, ("bgc loh: used: %Ix, alloc: %Ix, end of alloc: %Ix, clear %Id bytes",
+                     used, alloc_start, end, size_to_clear));
+    }
+    else
+    {
+        dprintf (2, ("bgc loh: [%Ix-[%Ix(%Id)", alloc_start, alloc_start+size, size));
+    }
+
+#ifdef VERIFY_HEAP
+    // since we filled in 0xcc for free object when we verify heap,
+    // we need to make sure we clear those bytes.
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        if (size_to_clear < saved_size_to_clear)
+        {
+            size_to_clear = saved_size_to_clear;
+        }
+    }
+#endif //VERIFY_HEAP
+    
+    dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear large obj", heap_number));
+    add_saved_spinlock_info (me_release, mt_clr_large_mem);
+    leave_spin_lock (&more_space_lock);
+    memclr (alloc_start + size_to_skip, size_to_clear);
+
+    bgc_alloc_lock->loh_alloc_set (alloc_start);
+
+    acontext->alloc_ptr = alloc_start;
+    acontext->alloc_limit = (alloc_start + size - Align (min_obj_size, align_const));
+
+    // need to clear the rest of the object before we hand it out.
+    clear_unused_array(alloc_start, size);
+}
+#endif //BACKGROUND_GC
+
+BOOL gc_heap::a_fit_free_list_large_p (size_t size, 
+                                       alloc_context* acontext,
+                                       int align_const)
+{
+#ifdef BACKGROUND_GC
+    wait_for_background_planning (awr_loh_alloc_during_plan);
+#endif //BACKGROUND_GC
+
+    BOOL can_fit = FALSE;
+    int gen_number = max_generation + 1;
+    generation* gen = generation_of (gen_number);
+    allocator* loh_allocator = generation_allocator (gen); 
+
+#ifdef FEATURE_LOH_COMPACTION
+    size_t loh_pad = Align (loh_padding_obj_size, align_const);
+#endif //FEATURE_LOH_COMPACTION
+
+#ifdef BACKGROUND_GC
+    int cookie = -1;
+#endif //BACKGROUND_GC
+    size_t sz_list = loh_allocator->first_bucket_size();
+    for (unsigned int a_l_idx = 0; a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++)
+    {
+        if ((size < sz_list) || (a_l_idx == (loh_allocator->number_of_buckets()-1)))
+        {
+            uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx);
+            uint8_t* prev_free_item = 0;
+            while (free_list != 0)
+            {
+                dprintf (3, ("considering free list %Ix", (size_t)free_list));
+
+                size_t free_list_size = unused_array_size(free_list);
+
+#ifdef FEATURE_LOH_COMPACTION
+                if ((size + loh_pad) <= free_list_size)
+#else
+                if (((size + Align (min_obj_size, align_const)) <= free_list_size)||
+                    (size == free_list_size))
+#endif //FEATURE_LOH_COMPACTION
+                {
+#ifdef BACKGROUND_GC
+                    cookie = bgc_alloc_lock->loh_alloc_set (free_list);
+#endif //BACKGROUND_GC
+
+                    //unlink the free_item
+                    loh_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+
+                    // Substract min obj size because limit_from_size adds it. Not needed for LOH
+                    size_t limit = limit_from_size (size - Align(min_obj_size, align_const), free_list_size, 
+                                                    gen_number, align_const);
+
+#ifdef FEATURE_LOH_COMPACTION
+                    make_unused_array (free_list, loh_pad);
+                    limit -= loh_pad;
+                    free_list += loh_pad;
+                    free_list_size -= loh_pad;
+#endif //FEATURE_LOH_COMPACTION
+
+                    uint8_t*  remain = (free_list + limit);
+                    size_t remain_size = (free_list_size - limit);
+                    if (remain_size != 0)
+                    {
+                        assert (remain_size >= Align (min_obj_size, align_const));
+                        make_unused_array (remain, remain_size);
+                    }
+                    if (remain_size >= Align(min_free_list, align_const))
+                    {
+                        loh_thread_gap_front (remain, remain_size, gen);
+                        assert (remain_size >= Align (min_obj_size, align_const));
+                    }
+                    else
+                    {
+                        generation_free_obj_space (gen) += remain_size;
+                    }
+                    generation_free_list_space (gen) -= free_list_size;
+                    dprintf (3, ("found fit on loh at %Ix", free_list));
+#ifdef BACKGROUND_GC
+                    if (cookie != -1)
+                    {
+                        bgc_loh_alloc_clr (free_list, limit, acontext, align_const, cookie, FALSE, 0);
+                    }
+                    else
+#endif //BACKGROUND_GC
+                    {
+                        adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number);
+                    }
+
+                    //fix the limit to compensate for adjust_limit_clr making it too short 
+                    acontext->alloc_limit += Align (min_obj_size, align_const);
+                    can_fit = TRUE;
+                    goto exit;
+                }
+                prev_free_item = free_list;
+                free_list = free_list_slot (free_list); 
+            }
+        }
+        sz_list = sz_list * 2;
+    }
+exit:
+    return can_fit;
+}
+
+#ifdef _MSC_VER
+#pragma warning(default:4706)
+#endif // _MSC_VER
+
+BOOL gc_heap::a_fit_segment_end_p (int gen_number,
+                                   heap_segment* seg,
+                                   size_t size, 
+                                   alloc_context* acontext,
+                                   int align_const,
+                                   BOOL* commit_failed_p)
+{
+    *commit_failed_p = FALSE;
+    size_t limit = 0;
+#ifdef BACKGROUND_GC
+    int cookie = -1;
+#endif //BACKGROUND_GC
+
+    uint8_t*& allocated = ((gen_number == 0) ?
+                        alloc_allocated : 
+                        heap_segment_allocated(seg));
+
+    size_t pad = Align (min_obj_size, align_const);
+
+#ifdef FEATURE_LOH_COMPACTION
+    if (gen_number == (max_generation + 1))
+    {
+        pad += Align (loh_padding_obj_size, align_const);
+    }
+#endif //FEATURE_LOH_COMPACTION
+
+    uint8_t* end = heap_segment_committed (seg) - pad;
+
+    if (a_size_fit_p (size, allocated, end, align_const))
+    {
+        limit = limit_from_size (size, 
+                                 (end - allocated), 
+                                 gen_number, align_const);
+        goto found_fit;
+    }
+
+    end = heap_segment_reserved (seg) - pad;
+
+    if (a_size_fit_p (size, allocated, end, align_const))
+    {
+        limit = limit_from_size (size, 
+                                 (end - allocated), 
+                                 gen_number, align_const);
+        if (grow_heap_segment (seg, allocated + limit))
+        {
+            goto found_fit;
+        }
+        else
+        {
+            dprintf (2, ("can't grow segment, doing a full gc"));
+            *commit_failed_p = TRUE;
+        }
+    }
+    goto found_no_fit;
+
+found_fit:
+
+#ifdef BACKGROUND_GC
+    if (gen_number != 0)
+    {
+        cookie = bgc_alloc_lock->loh_alloc_set (allocated);
+    }
+#endif //BACKGROUND_GC
+
+    uint8_t* old_alloc;
+    old_alloc = allocated;
+#ifdef FEATURE_LOH_COMPACTION
+    if (gen_number == (max_generation + 1))
+    {
+        size_t loh_pad = Align (loh_padding_obj_size, align_const);
+        make_unused_array (old_alloc, loh_pad);
+        old_alloc += loh_pad;
+        allocated += loh_pad;
+        limit -= loh_pad;
+    }
+#endif //FEATURE_LOH_COMPACTION
+
+#if defined (VERIFY_HEAP) && defined (_DEBUG)
+        ((void**) allocated)[-1] = 0;     //clear the sync block
+#endif //VERIFY_HEAP && _DEBUG
+    allocated += limit;
+
+    dprintf (3, ("found fit at end of seg: %Ix", old_alloc));
+
+#ifdef BACKGROUND_GC
+    if (cookie != -1)
+    {
+        bgc_loh_alloc_clr (old_alloc, limit, acontext, align_const, cookie, TRUE, seg);
+    }
+    else
+#endif //BACKGROUND_GC
+    {
+        adjust_limit_clr (old_alloc, limit, acontext, seg, align_const, gen_number);
+    }
+
+    return TRUE;
+
+found_no_fit:
+
+    return FALSE;
+}
+
+BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number,
+                                       size_t size, 
+                                       alloc_context* acontext,
+                                       int align_const,
+                                       BOOL* commit_failed_p,
+                                       oom_reason* oom_r)
+{
+    *commit_failed_p = FALSE;
+    heap_segment* seg = generation_allocation_segment (generation_of (gen_number));
+    BOOL can_allocate_p = FALSE;
+
+    while (seg)
+    {
+        if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), 
+                                 acontext, align_const, commit_failed_p))
+        {
+            acontext->alloc_limit += Align (min_obj_size, align_const);
+            can_allocate_p = TRUE;
+            break;
+        }
+        else
+        {
+            if (*commit_failed_p)
+            {
+                *oom_r = oom_cant_commit;
+                break;
+            }
+            else
+            {
+                seg = heap_segment_next_rw (seg);
+            }
+        }
+    }
+
+    return can_allocate_p;
+}
+
+#ifdef BACKGROUND_GC
+inline
+void gc_heap::wait_for_background (alloc_wait_reason awr)
+{
+    dprintf (2, ("BGC is already in progress, waiting for it to finish"));
+    dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc done", heap_number));
+    add_saved_spinlock_info (me_release, mt_wait_bgc);
+    leave_spin_lock (&more_space_lock);
+    background_gc_wait (awr);
+    enter_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (me_acquire, mt_wait_bgc);
+    dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc done", heap_number));
+}
+
+void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr)
+{
+    if (recursive_gc_sync::background_running_p())
+    {
+        uint32_t memory_load;
+        get_memory_info (&memory_load);
+        if (memory_load >= 95)
+        {
+            dprintf (GTC_LOG, ("high mem - wait for BGC to finish, wait reason: %d", awr));
+            wait_for_background (awr);
+        }
+    }
+}
+
+#endif //BACKGROUND_GC
+
+// We request to trigger an ephemeral GC but we may get a full compacting GC.
+// return TRUE if that's the case.
+BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr)
+{
+#ifdef BACKGROUND_GC
+    wait_for_bgc_high_memory (awr_loh_oos_bgc);
+#endif //BACKGROUND_GC
+
+    BOOL did_full_compact_gc = FALSE;
+
+    dprintf (2, ("triggering a gen1 GC"));
+    size_t last_full_compact_gc_count = get_full_compact_gc_count();
+    vm_heap->GarbageCollectGeneration(max_generation - 1, gr);
+
+#ifdef MULTIPLE_HEAPS
+    enter_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (me_acquire, mt_t_eph_gc);
+    dprintf (SPINLOCK_LOG, ("[%d]Emsl after a GC", heap_number));
+#endif //MULTIPLE_HEAPS
+
+    size_t current_full_compact_gc_count = get_full_compact_gc_count();
+
+    if (current_full_compact_gc_count > last_full_compact_gc_count)
+    {
+        dprintf (2, ("attempted to trigger an ephemeral GC and got a full compacting GC"));
+        did_full_compact_gc = TRUE;
+    }
+
+    return did_full_compact_gc;
+}
+
+BOOL gc_heap::soh_try_fit (int gen_number,
+                           size_t size, 
+                           alloc_context* acontext,
+                           int align_const,
+                           BOOL* commit_failed_p,
+                           BOOL* short_seg_end_p)
+{
+    BOOL can_allocate = TRUE;
+    if (short_seg_end_p)
+    {
+        *short_seg_end_p = FALSE;
+    }
+
+    can_allocate = a_fit_free_list_p (gen_number, size, acontext, align_const);
+    if (!can_allocate)
+    {
+        if (short_seg_end_p)
+        {
+            *short_seg_end_p = short_on_end_of_seg (gen_number, ephemeral_heap_segment, align_const);
+        }
+        // If the caller doesn't care, we always try to fit at the end of seg;
+        // otherwise we would only try if we are actually not short at end of seg.
+        if (!short_seg_end_p || !(*short_seg_end_p))
+        {
+            can_allocate = a_fit_segment_end_p (gen_number, ephemeral_heap_segment, size, 
+                                                acontext, align_const, commit_failed_p);
+        }
+    }
+
+    return can_allocate;
+}
+
+BOOL gc_heap::allocate_small (int gen_number,
+                              size_t size, 
+                              alloc_context* acontext,
+                              int align_const)
+{
+#if defined (BACKGROUND_GC) && !defined (MULTIPLE_HEAPS)
+    if (recursive_gc_sync::background_running_p())
+    {
+        background_soh_alloc_count++;
+        if ((background_soh_alloc_count % bgc_alloc_spin_count) == 0)
+        {
+            Thread* current_thread = GetThread();
+            add_saved_spinlock_info (me_release, mt_alloc_small);
+            dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl", heap_number));
+            leave_spin_lock (&more_space_lock);
+            BOOL cooperative_mode = enable_preemptive (current_thread);
+            GCToOSInterface::Sleep (bgc_alloc_spin);
+            disable_preemptive (current_thread, cooperative_mode);
+            enter_spin_lock (&more_space_lock);
+            add_saved_spinlock_info (me_acquire, mt_alloc_small);
+            dprintf (SPINLOCK_LOG, ("[%d]spin Emsl", heap_number));
+        }
+        else
+        {
+            //GCToOSInterface::YieldThread (0);
+        }
+    }
+#endif //BACKGROUND_GC && !MULTIPLE_HEAPS
+
+    gc_reason gr = reason_oos_soh;
+    oom_reason oom_r = oom_no_failure;
+
+    // No variable values should be "carried over" from one state to the other. 
+    // That's why there are local variable for each state
+
+    allocation_state soh_alloc_state = a_state_start;
+
+    // If we can get a new seg it means allocation will succeed.
+    while (1)
+    {
+        dprintf (3, ("[h%d]soh state is %s", heap_number, allocation_state_str[soh_alloc_state]));
+        switch (soh_alloc_state)
+        {
+            case a_state_can_allocate:
+            case a_state_cant_allocate:
+            {
+                goto exit;
+            }
+            case a_state_start:
+            {
+                soh_alloc_state = a_state_try_fit;
+                break;
+            }
+            case a_state_try_fit:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = soh_try_fit (gen_number, size, acontext,
+                                                  align_const, &commit_failed_p,
+                                                  NULL);
+                soh_alloc_state = (can_use_existing_p ?
+                                        a_state_can_allocate : 
+                                        (commit_failed_p ? 
+                                            a_state_trigger_full_compact_gc :
+                                            a_state_trigger_ephemeral_gc));
+                break;
+            }
+            case a_state_try_fit_after_bgc:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+                BOOL short_seg_end_p = FALSE;
+
+                can_use_existing_p = soh_try_fit (gen_number, size, acontext,
+                                                  align_const, &commit_failed_p,
+                                                  &short_seg_end_p);
+                soh_alloc_state = (can_use_existing_p ? 
+                                        a_state_can_allocate : 
+                                        (short_seg_end_p ? 
+                                            a_state_trigger_2nd_ephemeral_gc : 
+                                            a_state_trigger_full_compact_gc));
+                break;
+            }
+            case a_state_try_fit_after_cg:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+                BOOL short_seg_end_p = FALSE;
+
+                can_use_existing_p = soh_try_fit (gen_number, size, acontext,
+                                                  align_const, &commit_failed_p,
+                                                  &short_seg_end_p);
+                if (short_seg_end_p)
+                {
+                    soh_alloc_state = a_state_cant_allocate;
+                    oom_r = oom_budget;
+                }
+                else
+                {
+                    if (can_use_existing_p)
+                    {
+                        soh_alloc_state = a_state_can_allocate;
+                    }
+                    else
+                    {
+#ifdef MULTIPLE_HEAPS
+                        if (!commit_failed_p)
+                        {
+                            // some other threads already grabbed the more space lock and allocated
+                            // so we should attemp an ephemeral GC again.
+                            assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated);
+                            soh_alloc_state = a_state_trigger_ephemeral_gc; 
+                        }
+                        else
+#endif //MULTIPLE_HEAPS
+                        {
+                            assert (commit_failed_p);
+                            soh_alloc_state = a_state_cant_allocate;
+                            oom_r = oom_cant_commit;
+                        }
+                    }
+                }
+                break;
+            }
+            case a_state_check_and_wait_for_bgc:
+            {
+                BOOL bgc_in_progress_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+                bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc);
+                soh_alloc_state = (did_full_compacting_gc ? 
+                                        a_state_try_fit_after_cg : 
+                                        a_state_try_fit_after_bgc);
+                break;
+            }
+            case a_state_trigger_ephemeral_gc:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+                BOOL short_seg_end_p = FALSE;
+                BOOL bgc_in_progress_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+                did_full_compacting_gc = trigger_ephemeral_gc (gr);
+                if (did_full_compacting_gc)
+                {
+                    soh_alloc_state = a_state_try_fit_after_cg;
+                }
+                else
+                {
+                    can_use_existing_p = soh_try_fit (gen_number, size, acontext,
+                                                      align_const, &commit_failed_p,
+                                                      &short_seg_end_p);
+#ifdef BACKGROUND_GC
+                    bgc_in_progress_p = recursive_gc_sync::background_running_p();
+#endif //BACKGROUND_GC
+
+                    if (short_seg_end_p)
+                    {
+                        soh_alloc_state = (bgc_in_progress_p ? 
+                                                a_state_check_and_wait_for_bgc : 
+                                                a_state_trigger_full_compact_gc);
+
+                        if (fgn_maxgen_percent)
+                        {
+                            dprintf (2, ("FGN: doing last GC before we throw OOM"));
+                            send_full_gc_notification (max_generation, FALSE);
+                        }
+                    }
+                    else
+                    {
+                        if (can_use_existing_p)
+                        {
+                            soh_alloc_state = a_state_can_allocate;
+                        }
+                        else
+                        {
+#ifdef MULTIPLE_HEAPS
+                            if (!commit_failed_p)
+                            {
+                                // some other threads already grabbed the more space lock and allocated
+                                // so we should attemp an ephemeral GC again.
+                                assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated);
+                                soh_alloc_state = a_state_trigger_ephemeral_gc;
+                            }
+                            else
+#endif //MULTIPLE_HEAPS
+                            {
+                                soh_alloc_state = a_state_trigger_full_compact_gc;
+                                if (fgn_maxgen_percent)
+                                {
+                                    dprintf (2, ("FGN: failed to commit, doing full compacting GC"));
+                                    send_full_gc_notification (max_generation, FALSE);
+                                }
+                            }
+                        }
+                    }
+                }
+                break;
+            }
+            case a_state_trigger_2nd_ephemeral_gc:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+                BOOL short_seg_end_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+
+                did_full_compacting_gc = trigger_ephemeral_gc (gr);
+                
+                if (did_full_compacting_gc)
+                {
+                    soh_alloc_state = a_state_try_fit_after_cg;
+                }
+                else
+                {
+                    can_use_existing_p = soh_try_fit (gen_number, size, acontext,
+                                                      align_const, &commit_failed_p,
+                                                      &short_seg_end_p);
+                    if (short_seg_end_p || commit_failed_p)
+                    {
+                        soh_alloc_state = a_state_trigger_full_compact_gc;
+                    }
+                    else
+                    {
+                        assert (can_use_existing_p);
+                        soh_alloc_state = a_state_can_allocate;
+                    }
+                }
+                break;
+            }
+            case a_state_trigger_full_compact_gc:
+            {
+                BOOL got_full_compacting_gc = FALSE;
+
+                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r);
+                soh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
+                break;
+            }
+            default:
+            {
+                assert (!"Invalid state!");
+                break;
+            }
+        }
+    }
+
+exit:
+    if (soh_alloc_state == a_state_cant_allocate)
+    {
+        assert (oom_r != oom_no_failure);
+        handle_oom (heap_number, 
+                    oom_r, 
+                    size,
+                    heap_segment_allocated (ephemeral_heap_segment),
+                    heap_segment_reserved (ephemeral_heap_segment));
+
+        dprintf (SPINLOCK_LOG, ("[%d]Lmsl for oom", heap_number));
+        add_saved_spinlock_info (me_release, mt_alloc_small_cant);
+        leave_spin_lock (&more_space_lock);
+    }
+
+    return (soh_alloc_state == a_state_can_allocate);
+}
+
+#ifdef BACKGROUND_GC
+inline
+void gc_heap::wait_for_background_planning (alloc_wait_reason awr)
+{
+    while (current_c_gc_state == c_gc_state_planning)
+    {
+        dprintf (3, ("lh state planning, cannot allocate"));
+
+        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc plan", heap_number));
+        add_saved_spinlock_info (me_release, mt_wait_bgc_plan);
+        leave_spin_lock (&more_space_lock);
+        background_gc_wait_lh (awr);
+        enter_spin_lock (&more_space_lock);
+        add_saved_spinlock_info (me_acquire, mt_wait_bgc_plan);
+        dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc plan", heap_number));
+    }
+    assert ((current_c_gc_state == c_gc_state_free) ||
+            (current_c_gc_state == c_gc_state_marking));
+}
+
+BOOL gc_heap::bgc_loh_should_allocate()
+{
+    size_t min_gc_size = dd_min_gc_size(dynamic_data_of (max_generation + 1));
+
+    if ((bgc_begin_loh_size + bgc_loh_size_increased) < (min_gc_size * 10))
+    {
+        return TRUE;
+    }
+
+    if (((bgc_begin_loh_size / end_loh_size) >= 2) || (bgc_loh_size_increased >= bgc_begin_loh_size))
+    {
+        if ((bgc_begin_loh_size / end_loh_size) > 2)
+        {
+            dprintf (3, ("alloc-ed too much before bgc started"));
+        }
+        else
+        {
+            dprintf (3, ("alloc-ed too much after bgc started"));
+        }
+        return FALSE;
+    }
+    else
+    {
+        bgc_alloc_spin_loh = (uint32_t)(((float)bgc_loh_size_increased / (float)bgc_begin_loh_size) * 10);
+        return TRUE;
+    }
+}
+#endif //BACKGROUND_GC
+
+size_t gc_heap::get_large_seg_size (size_t size)
+{
+    size_t default_seg_size = get_valid_segment_size(TRUE);
+#ifdef SEG_MAPPING_TABLE
+    size_t align_size =  default_seg_size;
+#else //SEG_MAPPING_TABLE
+    size_t align_size =  default_seg_size / 2;
+#endif //SEG_MAPPING_TABLE
+    int align_const = get_alignment_constant (FALSE);
+    size_t large_seg_size = align_on_page (
+        max (default_seg_size,
+            ((size + 2 * Align(min_obj_size, align_const) + OS_PAGE_SIZE + 
+            align_size) / align_size * align_size)));
+    return large_seg_size;
+}
+
+BOOL gc_heap::loh_get_new_seg (generation* gen,
+                               size_t size,
+                               int align_const,
+                               BOOL* did_full_compact_gc,
+                               oom_reason* oom_r)
+{
+    UNREFERENCED_PARAMETER(gen);
+    UNREFERENCED_PARAMETER(align_const);
+
+    *did_full_compact_gc = FALSE;
+
+    size_t seg_size = get_large_seg_size (size);
+
+    heap_segment* new_seg = get_large_segment (seg_size, did_full_compact_gc);
+
+    if (new_seg)
+    {
+        loh_alloc_since_cg += seg_size;
+    }
+    else
+    {
+        *oom_r = oom_loh;
+    }
+
+    return (new_seg != 0);
+}
+
+BOOL gc_heap::retry_full_compact_gc (size_t size)
+{
+    size_t seg_size = get_large_seg_size (size);
+
+    if (loh_alloc_since_cg >= (2 * (uint64_t)seg_size))
+    {
+        return TRUE;
+    }
+
+#ifdef MULTIPLE_HEAPS
+    uint64_t total_alloc_size = 0;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        total_alloc_size += g_heaps[i]->loh_alloc_since_cg;
+    }
+
+    if (total_alloc_size >= (2 * (uint64_t)seg_size))
+    {
+        return TRUE;
+    }
+#endif //MULTIPLE_HEAPS
+
+    return FALSE;
+}
+
+BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr,
+                                      BOOL* did_full_compact_gc)
+{
+    BOOL bgc_in_progress = FALSE;
+    *did_full_compact_gc = FALSE;
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        bgc_in_progress = TRUE;
+        size_t last_full_compact_gc_count = get_full_compact_gc_count();
+        wait_for_background (awr);
+        size_t current_full_compact_gc_count = get_full_compact_gc_count();
+        if (current_full_compact_gc_count > last_full_compact_gc_count)
+        {
+            *did_full_compact_gc = TRUE;
+        }
+    }
+#endif //BACKGROUND_GC
+
+    return bgc_in_progress;
+}
+
+BOOL gc_heap::loh_try_fit (int gen_number,
+                           size_t size, 
+                           alloc_context* acontext,
+                           int align_const,
+                           BOOL* commit_failed_p,
+                           oom_reason* oom_r)
+{
+    BOOL can_allocate = TRUE;
+
+    if (!a_fit_free_list_large_p (size, acontext, align_const))
+    {
+        can_allocate = loh_a_fit_segment_end_p (gen_number, size, 
+                                                acontext, align_const, 
+                                                commit_failed_p, oom_r);
+
+#ifdef BACKGROUND_GC
+        if (can_allocate && recursive_gc_sync::background_running_p())
+        {
+            bgc_loh_size_increased += size;
+        }
+#endif //BACKGROUND_GC
+    }
+#ifdef BACKGROUND_GC
+    else
+    {
+        if (recursive_gc_sync::background_running_p())
+        {
+            bgc_loh_allocated_in_free += size;
+        }
+    }
+#endif //BACKGROUND_GC
+
+    return can_allocate;
+}
+
+BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, 
+                                       oom_reason* oom_r)
+{
+    BOOL did_full_compact_gc = FALSE;
+
+    size_t last_full_compact_gc_count = get_full_compact_gc_count();
+
+    // Set this so the next GC will be a full compacting GC.
+    if (!last_gc_before_oom)
+    {
+        last_gc_before_oom = TRUE;
+    }
+
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        wait_for_background ((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc);
+        dprintf (2, ("waited for BGC - done"));
+    }
+#endif //BACKGROUND_GC
+
+    size_t current_full_compact_gc_count = get_full_compact_gc_count();
+    if (current_full_compact_gc_count > last_full_compact_gc_count)
+    {
+        dprintf (3, ("a full compacting GC triggered while waiting for BGC (%d->%d)", last_full_compact_gc_count, current_full_compact_gc_count));
+        assert (current_full_compact_gc_count > last_full_compact_gc_count);
+        did_full_compact_gc = TRUE;
+        goto exit;
+    }
+
+    dprintf (3, ("h%d full GC", heap_number));
+    vm_heap->GarbageCollectGeneration(max_generation, gr);
+
+#ifdef MULTIPLE_HEAPS
+    enter_spin_lock (&more_space_lock);
+    dprintf (SPINLOCK_LOG, ("[%d]Emsl after full gc", heap_number));
+    add_saved_spinlock_info (me_acquire, mt_t_full_gc);
+#endif //MULTIPLE_HEAPS
+
+    current_full_compact_gc_count = get_full_compact_gc_count();
+
+    if (current_full_compact_gc_count == last_full_compact_gc_count)
+    {
+        dprintf (2, ("attempted to trigger a full compacting GC but didn't get it"));
+        // We requested a full GC but didn't get because of the elevation logic
+        // which means we should fail.
+        *oom_r = oom_unproductive_full_gc;
+    }
+    else
+    {
+        dprintf (3, ("h%d: T full compacting GC (%d->%d)", 
+            heap_number, 
+            last_full_compact_gc_count, 
+            current_full_compact_gc_count));
+
+        assert (current_full_compact_gc_count > last_full_compact_gc_count);
+        did_full_compact_gc = TRUE;
+    }
+
+exit:
+    return did_full_compact_gc;
+}
+
+#ifdef RECORD_LOH_STATE
+void gc_heap::add_saved_loh_state (allocation_state loh_state_to_save, EEThreadId thread_id)
+{
+    // When the state is can_allocate we already have released the more
+    // space lock. So we are not logging states here since this code
+    // is not thread safe.
+    if (loh_state_to_save != a_state_can_allocate)
+    {
+        last_loh_states[loh_state_index].alloc_state = loh_state_to_save;
+        last_loh_states[loh_state_index].thread_id = thread_id;
+        loh_state_index++;
+
+        if (loh_state_index == max_saved_loh_states)
+        {
+            loh_state_index = 0;
+        }
+
+        assert (loh_state_index < max_saved_loh_states);
+    }
+}
+#endif //RECORD_LOH_STATE
+
+BOOL gc_heap::allocate_large (int gen_number,
+                              size_t size, 
+                              alloc_context* acontext,
+                              int align_const)
+{
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p() && (current_c_gc_state != c_gc_state_planning))
+    {
+        background_loh_alloc_count++;
+        //if ((background_loh_alloc_count % bgc_alloc_spin_count_loh) == 0)
+        {
+            if (bgc_loh_should_allocate())
+            {
+                if (!bgc_alloc_spin_loh)
+                {
+                    Thread* current_thread = GetThread();
+                    add_saved_spinlock_info (me_release, mt_alloc_large);
+                    dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl loh", heap_number));
+                    leave_spin_lock (&more_space_lock);
+                    BOOL cooperative_mode = enable_preemptive (current_thread);
+                    GCToOSInterface::YieldThread (bgc_alloc_spin_loh);
+                    disable_preemptive (current_thread, cooperative_mode);
+                    enter_spin_lock (&more_space_lock);
+                    add_saved_spinlock_info (me_acquire, mt_alloc_large);
+                    dprintf (SPINLOCK_LOG, ("[%d]spin Emsl loh", heap_number));
+                }
+            }
+            else
+            {
+                wait_for_background (awr_loh_alloc_during_bgc);
+            }
+        }
+    }
+#endif //BACKGROUND_GC
+
+    gc_reason gr = reason_oos_loh;
+    generation* gen = generation_of (gen_number);
+    oom_reason oom_r = oom_no_failure;
+    size_t current_full_compact_gc_count = 0;
+
+    // No variable values should be "carried over" from one state to the other. 
+    // That's why there are local variable for each state
+    allocation_state loh_alloc_state = a_state_start;
+#ifdef RECORD_LOH_STATE
+    EEThreadId current_thread_id;
+    current_thread_id.SetToCurrentThread();
+#endif //RECORD_LOH_STATE
+
+    // If we can get a new seg it means allocation will succeed.
+    while (1)
+    {
+        dprintf (3, ("[h%d]loh state is %s", heap_number, allocation_state_str[loh_alloc_state]));
+
+#ifdef RECORD_LOH_STATE
+        add_saved_loh_state (loh_alloc_state, current_thread_id);
+#endif //RECORD_LOH_STATE
+        switch (loh_alloc_state)
+        {
+            case a_state_can_allocate:
+            case a_state_cant_allocate:
+            {
+                goto exit;
+            }
+            case a_state_start:
+            {
+                loh_alloc_state = a_state_try_fit;
+                break;
+            }
+            case a_state_try_fit:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                loh_alloc_state = (can_use_existing_p ?
+                                        a_state_can_allocate : 
+                                        (commit_failed_p ? 
+                                            a_state_trigger_full_compact_gc :
+                                            a_state_acquire_seg));
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                break;
+            }
+            case a_state_try_fit_new_seg:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                // Even after we got a new seg it doesn't necessarily mean we can allocate,
+                // another LOH allocating thread could have beat us to acquire the msl so 
+                // we need to try again.
+                loh_alloc_state = (can_use_existing_p ? a_state_can_allocate : a_state_try_fit);
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                break;
+            }
+            case a_state_try_fit_new_seg_after_cg:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                // Even after we got a new seg it doesn't necessarily mean we can allocate,
+                // another LOH allocating thread could have beat us to acquire the msl so 
+                // we need to try again. However, if we failed to commit, which means we 
+                // did have space on the seg, we bail right away 'cause we already did a 
+                // full compacting GC.
+                loh_alloc_state = (can_use_existing_p ? 
+                                        a_state_can_allocate : 
+                                        (commit_failed_p ? 
+                                            a_state_cant_allocate :
+                                            a_state_acquire_seg_after_cg));
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                break;
+            }
+            case a_state_try_fit_no_seg:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                loh_alloc_state = (can_use_existing_p ? a_state_can_allocate : a_state_cant_allocate);
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
+                break;
+            }
+            case a_state_try_fit_after_cg:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                loh_alloc_state = (can_use_existing_p ?
+                                        a_state_can_allocate : 
+                                        (commit_failed_p ? 
+                                            a_state_cant_allocate :
+                                            a_state_acquire_seg_after_cg));
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                break;
+            }
+            case a_state_try_fit_after_bgc:
+            {
+                BOOL commit_failed_p = FALSE;
+                BOOL can_use_existing_p = FALSE;
+
+                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
+                                                  align_const, &commit_failed_p, &oom_r);
+                loh_alloc_state = (can_use_existing_p ?
+                                        a_state_can_allocate : 
+                                        (commit_failed_p ? 
+                                            a_state_trigger_full_compact_gc :
+                                            a_state_acquire_seg_after_bgc));
+                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
+                break;
+            }
+            case a_state_acquire_seg:
+            {
+                BOOL can_get_new_seg_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+                current_full_compact_gc_count = get_full_compact_gc_count();
+
+                can_get_new_seg_p = loh_get_new_seg (gen, size, align_const, &did_full_compacting_gc, &oom_r);
+                loh_alloc_state = (can_get_new_seg_p ? 
+                                        a_state_try_fit_new_seg : 
+                                        (did_full_compacting_gc ? 
+                                            a_state_check_retry_seg :
+                                            a_state_check_and_wait_for_bgc));
+                break;
+            }
+            case a_state_acquire_seg_after_cg:
+            {
+                BOOL can_get_new_seg_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+                current_full_compact_gc_count = get_full_compact_gc_count();
+
+                can_get_new_seg_p = loh_get_new_seg (gen, size, align_const, &did_full_compacting_gc, &oom_r);
+                // Since we release the msl before we try to allocate a seg, other
+                // threads could have allocated a bunch of segments before us so
+                // we might need to retry.
+                loh_alloc_state = (can_get_new_seg_p ? 
+                                        a_state_try_fit_new_seg_after_cg : 
+                                        a_state_check_retry_seg);
+                break;
+            }
+            case a_state_acquire_seg_after_bgc:
+            {
+                BOOL can_get_new_seg_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+             
+                current_full_compact_gc_count = get_full_compact_gc_count();
+
+                can_get_new_seg_p = loh_get_new_seg (gen, size, align_const, &did_full_compacting_gc, &oom_r); 
+                loh_alloc_state = (can_get_new_seg_p ? 
+                                        a_state_try_fit_new_seg : 
+                                        (did_full_compacting_gc ? 
+                                            a_state_check_retry_seg :
+                                            a_state_trigger_full_compact_gc));
+                assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
+                break;
+            }
+            case a_state_check_and_wait_for_bgc:
+            {
+                BOOL bgc_in_progress_p = FALSE;
+                BOOL did_full_compacting_gc = FALSE;
+
+                if (fgn_maxgen_percent)
+                {
+                    dprintf (2, ("FGN: failed to acquire seg, may need to do a full blocking GC"));
+                    send_full_gc_notification (max_generation, FALSE);
+                }
+
+                bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc);
+                loh_alloc_state = (!bgc_in_progress_p ?
+                                        a_state_trigger_full_compact_gc : 
+                                        (did_full_compacting_gc ? 
+                                            a_state_try_fit_after_cg :
+                                            a_state_try_fit_after_bgc));
+                break;
+            }
+            case a_state_trigger_full_compact_gc:
+            {
+                BOOL got_full_compacting_gc = FALSE;
+
+                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r);
+                loh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
+                assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
+                break;
+            }
+            case a_state_check_retry_seg:
+            {
+                BOOL should_retry_gc = retry_full_compact_gc (size);
+                BOOL should_retry_get_seg = FALSE;
+                if (!should_retry_gc)
+                {
+                    size_t last_full_compact_gc_count = current_full_compact_gc_count;
+                    current_full_compact_gc_count = get_full_compact_gc_count();
+
+                    if (current_full_compact_gc_count > (last_full_compact_gc_count + 1))
+                    {
+                        should_retry_get_seg = TRUE;
+                    }
+                }
+    
+                loh_alloc_state = (should_retry_gc ? 
+                                        a_state_trigger_full_compact_gc : 
+                                        (should_retry_get_seg ?
+                                            a_state_acquire_seg_after_cg :
+                                            a_state_cant_allocate));
+                assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
+                break;
+            }
+            default:
+            {
+                assert (!"Invalid state!");
+                break;
+            }
+        }
+    }
+
+exit:
+    if (loh_alloc_state == a_state_cant_allocate)
+    {
+        assert (oom_r != oom_no_failure);
+        handle_oom (heap_number, 
+                    oom_r, 
+                    size,
+                    0,
+                    0);
+
+        add_saved_spinlock_info (me_release, mt_alloc_large_cant);
+        dprintf (SPINLOCK_LOG, ("[%d]Lmsl for loh oom", heap_number));
+        leave_spin_lock (&more_space_lock);
+    }
+
+    return (loh_alloc_state == a_state_can_allocate);
+}
+
+int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
+                                   int gen_number)
+{
+    if (gc_heap::gc_started)
+    {
+        wait_for_gc_done();
+        return -1;
+    }
+
+#ifdef SYNCHRONIZATION_STATS
+    unsigned int msl_acquire_start = GetCycleCount32();
+#endif //SYNCHRONIZATION_STATS
+    enter_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (me_acquire, mt_try_alloc);
+    dprintf (SPINLOCK_LOG, ("[%d]Emsl for alloc", heap_number));
+#ifdef SYNCHRONIZATION_STATS
+    unsigned int msl_acquire = GetCycleCount32() - msl_acquire_start;
+    total_msl_acquire += msl_acquire;
+    num_msl_acquired++;
+    if (msl_acquire > 200)
+    {
+        num_high_msl_acquire++;
+    }
+    else
+    {
+        num_low_msl_acquire++;
+    }
+#endif //SYNCHRONIZATION_STATS
+
+    /*
+    // We are commenting this out 'cause we don't see the point - we already
+    // have checked gc_started when we were acquiring the msl - no need to check
+    // again. This complicates the logic in bgc_suspend_EE 'cause that one would
+    // need to release msl which causes all sorts of trouble.
+    if (gc_heap::gc_started)
+    {
+#ifdef SYNCHRONIZATION_STATS
+        good_suspension++;
+#endif //SYNCHRONIZATION_STATS
+        BOOL fStress = (g_pConfig->GetGCStressLevel() & EEConfig::GCSTRESS_TRANSITION) != 0;
+        if (!fStress)
+        {
+            //Rendez vous early (MP scaling issue)
+            //dprintf (1, ("[%d]waiting for gc", heap_number));
+            wait_for_gc_done();
+#ifdef MULTIPLE_HEAPS
+            return -1;
+#endif //MULTIPLE_HEAPS
+        }
+    }
+    */
+
+    dprintf (3, ("requested to allocate %d bytes on gen%d", size, gen_number));
+
+    int align_const = get_alignment_constant (gen_number != (max_generation+1));
+
+    if (fgn_maxgen_percent)
+    {
+        check_for_full_gc (gen_number, size);
+    }
+
+    if (!(new_allocation_allowed (gen_number)))
+    {
+        if (fgn_maxgen_percent && (gen_number == 0))
+        {
+            // We only check gen0 every so often, so take this opportunity to check again.
+            check_for_full_gc (gen_number, size);
+        }
+
+#ifdef BACKGROUND_GC
+        wait_for_bgc_high_memory (awr_gen0_alloc);
+#endif //BACKGROUND_GC
+
+#ifdef SYNCHRONIZATION_STATS
+        bad_suspension++;
+#endif //SYNCHRONIZATION_STATS
+        dprintf (/*100*/ 2, ("running out of budget on gen%d, gc", gen_number));
+
+        if (!settings.concurrent || (gen_number == 0))
+        {
+            vm_heap->GarbageCollectGeneration (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh));
+#ifdef MULTIPLE_HEAPS
+            enter_spin_lock (&more_space_lock);
+            add_saved_spinlock_info (me_acquire, mt_try_budget);
+            dprintf (SPINLOCK_LOG, ("[%d]Emsl out budget", heap_number));
+#endif //MULTIPLE_HEAPS
+        }
+    }
+
+    BOOL can_allocate = ((gen_number == 0) ?
+        allocate_small (gen_number, size, acontext, align_const) :
+        allocate_large (gen_number, size, acontext, align_const));
+   
+    if (can_allocate)
+    {
+        //ETW trace for allocation tick
+        size_t alloc_context_bytes = acontext->alloc_limit + Align (min_obj_size, align_const) - acontext->alloc_ptr;
+        int etw_allocation_index = ((gen_number == 0) ? 0 : 1);
+
+        etw_allocation_running_amount[etw_allocation_index] += alloc_context_bytes;
+
+        if (etw_allocation_running_amount[etw_allocation_index] > etw_allocation_tick)
+        {
+#ifdef FEATURE_REDHAWK
+            FireEtwGCAllocationTick_V1((uint32_t)etw_allocation_running_amount[etw_allocation_index], 
+                                    ((gen_number == 0) ? ETW::GCLog::ETW_GC_INFO::AllocationSmall : ETW::GCLog::ETW_GC_INFO::AllocationLarge), 
+                                    GetClrInstanceId());
+#else
+            // Unfortunately some of the ETW macros do not check whether the ETW feature is enabled.
+            // The ones that do are much less efficient.
+#if defined(FEATURE_EVENT_TRACE)
+            if (EventEnabledGCAllocationTick_V2())
+            {
+                fire_etw_allocation_event (etw_allocation_running_amount[etw_allocation_index], gen_number, acontext->alloc_ptr);
+            }
+#endif //FEATURE_EVENT_TRACE
+#endif //FEATURE_REDHAWK
+            etw_allocation_running_amount[etw_allocation_index] = 0;
+        }
+    }
+
+    return (int)can_allocate;
+}
+
+#ifdef MULTIPLE_HEAPS
+void gc_heap::balance_heaps (alloc_context* acontext)
+{
+
+    if (acontext->alloc_count < 4)
+    {
+        if (acontext->alloc_count == 0)
+        {
+            acontext->home_heap = GCHeap::GetHeap( heap_select::select_heap(acontext, 0) );
+            gc_heap* hp = acontext->home_heap->pGenGCHeap;
+            dprintf (3, ("First allocation for context %Ix on heap %d\n", (size_t)acontext, (size_t)hp->heap_number));
+            acontext->alloc_heap = acontext->home_heap;
+            hp->alloc_context_count++;
+        }
+    }
+    else
+    {
+        BOOL set_home_heap = FALSE;
+        int hint = 0;
+
+        if (heap_select::can_find_heap_fast())
+        {
+            if (acontext->home_heap != NULL)
+                hint = acontext->home_heap->pGenGCHeap->heap_number;
+            if (acontext->home_heap != GCHeap::GetHeap(hint = heap_select::select_heap(acontext, hint)) || ((acontext->alloc_count & 15) == 0))
+            {
+                set_home_heap = TRUE;
+            }
+        }
+        else
+        {
+            // can't use gdt
+            if ((acontext->alloc_count & 3) == 0)
+                set_home_heap = TRUE;
+        }
+
+        if (set_home_heap)
+        {
+/*
+            // Since we are balancing up to MAX_SUPPORTED_CPUS, no need for this.
+            if (n_heaps > MAX_SUPPORTED_CPUS)
+            {
+                // on machines with many processors cache affinity is really king, so don't even try
+                // to balance on these.
+                acontext->home_heap = GCHeap::GetHeap( heap_select::select_heap(acontext, hint) );
+                acontext->alloc_heap = acontext->home_heap;
+            }
+            else
+*/
+            {
+                gc_heap* org_hp = acontext->alloc_heap->pGenGCHeap;
+
+                dynamic_data* dd = org_hp->dynamic_data_of (0);
+                ptrdiff_t org_size = dd_new_allocation (dd);
+                int org_alloc_context_count;
+                int max_alloc_context_count;
+                gc_heap* max_hp;
+                ptrdiff_t max_size;
+                size_t delta = dd_min_size (dd)/4;
+
+                int start, end, finish;
+                heap_select::get_heap_range_for_heap(org_hp->heap_number, &start, &end);
+                finish = start + n_heaps;
+
+try_again:
+                do
+                {
+                    max_hp = org_hp;
+                    max_size = org_size + delta;
+                    acontext->home_heap = GCHeap::GetHeap( heap_select::select_heap(acontext, hint) );
+
+                    if (org_hp == acontext->home_heap->pGenGCHeap)
+                        max_size = max_size + delta;
+
+                    org_alloc_context_count = org_hp->alloc_context_count;
+                    max_alloc_context_count = org_alloc_context_count;
+                    if (max_alloc_context_count > 1)
+                        max_size /= max_alloc_context_count;
+
+                    for (int i = start; i < end; i++)
+                    {
+                        gc_heap* hp = GCHeap::GetHeap(i%n_heaps)->pGenGCHeap;
+                        dd = hp->dynamic_data_of (0);
+                        ptrdiff_t size = dd_new_allocation (dd);
+                        if (hp == acontext->home_heap->pGenGCHeap)
+                            size = size + delta;
+                        int hp_alloc_context_count = hp->alloc_context_count;
+                        if (hp_alloc_context_count > 0)
+                            size /= (hp_alloc_context_count + 1);
+                        if (size > max_size)
+                        {
+                            max_hp = hp;
+                            max_size = size;
+                            max_alloc_context_count = hp_alloc_context_count;
+                        }
+                    }
+                }
+                while (org_alloc_context_count != org_hp->alloc_context_count ||
+                       max_alloc_context_count != max_hp->alloc_context_count);
+
+                if ((max_hp == org_hp) && (end < finish))
+                {   
+                    start = end; end = finish; 
+                    delta = dd_min_size(dd)/2; // Make it twice as hard to balance to remote nodes on NUMA.
+                    goto try_again;
+                }
+
+                if (max_hp != org_hp)
+                {
+                    org_hp->alloc_context_count--;
+                    max_hp->alloc_context_count++;
+                    acontext->alloc_heap = GCHeap::GetHeap(max_hp->heap_number);
+#if !defined(FEATURE_PAL)
+                    if (CPUGroupInfo::CanEnableGCCPUGroups())
+                    {   //only set ideal processor when max_hp and org_hp are in the same cpu
+                        //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
+                        uint8_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number);
+                        uint8_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number);
+                        if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough
+                        {   
+                            uint8_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number);
+
+                            GCThreadAffinity affinity;
+                            affinity.Processor = group_proc_no;
+                            affinity.Group = org_gn;
+                            if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
+                            {
+                                dprintf (3, ("Failed to set the ideal processor and group for heap %d.",
+                                            org_hp->heap_number));
+                            }
+                        }
+                    }
+                    else 
+                    {
+                        uint8_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
+
+                        GCThreadAffinity affinity;
+                        affinity.Processor = proc_no;
+                        affinity.Group = GCThreadAffinity::None;
+
+                        if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
+                        {
+                            dprintf (3, ("Failed to set the ideal processor for heap %d.",
+                                        org_hp->heap_number));
+                        }
+                    }
+#endif // !FEATURE_PAL
+                    dprintf (3, ("Switching context %p (home heap %d) ", 
+                                 acontext,
+                        acontext->home_heap->pGenGCHeap->heap_number));
+                    dprintf (3, (" from heap %d (%Id free bytes, %d contexts) ", 
+                                 org_hp->heap_number,
+                                 org_size,
+                                 org_alloc_context_count));
+                    dprintf (3, (" to heap %d (%Id free bytes, %d contexts)\n", 
+                                 max_hp->heap_number,
+                                 dd_new_allocation(max_hp->dynamic_data_of(0)),
+                                                   max_alloc_context_count));
+                }
+            }
+        }
+    }
+    acontext->alloc_count++;
+}
+
+gc_heap* gc_heap::balance_heaps_loh (alloc_context* acontext, size_t /*size*/)
+{
+    gc_heap* org_hp = acontext->alloc_heap->pGenGCHeap;
+    //dprintf (1, ("LA: %Id", size));
+
+    //if (size > 128*1024)
+    if (1)
+    {
+        dynamic_data* dd = org_hp->dynamic_data_of (max_generation + 1);
+
+        ptrdiff_t org_size = dd_new_allocation (dd);
+        gc_heap* max_hp;
+        ptrdiff_t max_size;
+        size_t delta = dd_min_size (dd) * 4;
+
+        int start, end, finish;
+        heap_select::get_heap_range_for_heap(org_hp->heap_number, &start, &end);
+        finish = start + n_heaps;
+
+try_again:
+        {
+            max_hp = org_hp;
+            max_size = org_size + delta;
+            dprintf (3, ("orig hp: %d, max size: %d",
+                org_hp->heap_number,
+                max_size));
+
+            for (int i = start; i < end; i++)
+            {
+                gc_heap* hp = GCHeap::GetHeap(i%n_heaps)->pGenGCHeap;
+                dd = hp->dynamic_data_of (max_generation + 1);
+                ptrdiff_t size = dd_new_allocation (dd);
+                dprintf (3, ("hp: %d, size: %d",
+                    hp->heap_number,
+                    size));
+                if (size > max_size)
+                {
+                    max_hp = hp;
+                    max_size = size;
+                    dprintf (3, ("max hp: %d, max size: %d",
+                        max_hp->heap_number,
+                        max_size));
+                }
+            }
+        }
+
+        if ((max_hp == org_hp) && (end < finish))
+        {
+            start = end; end = finish;
+            delta = dd_min_size(dd) * 4;   // Need to tuning delta
+            goto try_again;
+        }
+
+        if (max_hp != org_hp)
+        {
+            dprintf (3, ("loh: %d(%Id)->%d(%Id)", 
+                org_hp->heap_number, dd_new_allocation (org_hp->dynamic_data_of (max_generation + 1)),
+                max_hp->heap_number, dd_new_allocation (max_hp->dynamic_data_of (max_generation + 1))));
+        }
+
+        return max_hp;
+    }
+    else
+    {
+        return org_hp;
+    }
+}
+#endif //MULTIPLE_HEAPS
+
+BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size,
+                                  int alloc_generation_number)
+{
+    int status;
+    do
+    { 
+#ifdef MULTIPLE_HEAPS
+        if (alloc_generation_number == 0)
+        {
+            balance_heaps (acontext);
+            status = acontext->alloc_heap->pGenGCHeap->try_allocate_more_space (acontext, size, alloc_generation_number);
+        }
+        else
+        {
+            gc_heap* alloc_heap = balance_heaps_loh (acontext, size);
+            status = alloc_heap->try_allocate_more_space (acontext, size, alloc_generation_number);
+        }
+#else
+        status = try_allocate_more_space (acontext, size, alloc_generation_number);
+#endif //MULTIPLE_HEAPS
+    }
+    while (status == -1);
+    
+    return (status != 0);
+}
+
+inline
+CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext)
+{
+    size_t size = Align (jsize);
+    assert (size >= Align (min_obj_size));
+    {
+    retry:
+        uint8_t*  result = acontext->alloc_ptr;
+        acontext->alloc_ptr+=size;
+        if (acontext->alloc_ptr <= acontext->alloc_limit)
+        {
+            CObjectHeader* obj = (CObjectHeader*)result;
+            assert (obj != 0);
+            return obj;
+        }
+        else
+        {
+            acontext->alloc_ptr -= size;
+
+#ifdef _MSC_VER
+#pragma inline_depth(0)
+#endif //_MSC_VER
+
+            if (! allocate_more_space (acontext, size, 0))
+                return 0;
+
+#ifdef _MSC_VER
+#pragma inline_depth(20)
+#endif //_MSC_VER
+
+            goto retry;
+        }
+    }
+}
+
+inline
+CObjectHeader* gc_heap::try_fast_alloc (size_t jsize)
+{
+    size_t size = Align (jsize);
+    assert (size >= Align (min_obj_size));
+    generation* gen = generation_of (0);
+    uint8_t*  result = generation_allocation_pointer (gen);
+    generation_allocation_pointer (gen) += size;
+    if (generation_allocation_pointer (gen) <=
+        generation_allocation_limit (gen))
+    {
+        return (CObjectHeader*)result;
+    }
+    else
+    {
+        generation_allocation_pointer (gen) -= size;
+        return 0;
+    }
+}
+void  gc_heap::leave_allocation_segment (generation* gen)
+{
+    adjust_limit (0, 0, gen, max_generation);
+}
+
+void gc_heap::init_free_and_plug()
+{
+#ifdef FREE_USAGE_STATS
+    for (int i = 0; i <= settings.condemned_generation; i++)
+    {
+        generation* gen = generation_of (i);
+        memset (gen->gen_free_spaces, 0, sizeof (gen->gen_free_spaces));
+        memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs));
+        memset (gen->gen_current_pinned_free_spaces, 0, sizeof (gen->gen_current_pinned_free_spaces));
+    }
+
+    if (settings.condemned_generation != max_generation)
+    {
+        for (int i = (settings.condemned_generation + 1); i <= max_generation; i++)
+        {
+            generation* gen = generation_of (i);
+            memset (gen->gen_plugs, 0, sizeof (gen->gen_plugs));
+        }
+    }
+#endif //FREE_USAGE_STATS
+}
+
+void gc_heap::print_free_and_plug (const char* msg)
+{
+#if defined(FREE_USAGE_STATS) && defined(SIMPLE_DPRINTF)
+    int older_gen = ((settings.condemned_generation == max_generation) ? max_generation : (settings.condemned_generation + 1));
+    for (int i = 0; i <= older_gen; i++)
+    {
+        generation* gen = generation_of (i);
+        for (int j = 0; j < NUM_GEN_POWER2; j++)
+        {
+            if ((gen->gen_free_spaces[j] != 0) || (gen->gen_plugs[j] != 0))
+            {
+                dprintf (2, ("[%s][h%d][%s#%d]gen%d: 2^%d: F: %Id, P: %Id", 
+                    msg, 
+                    heap_number, 
+                    (settings.concurrent ? "BGC" : "GC"),
+                    settings.gc_index,
+                    i,
+                    (j + 9), gen->gen_free_spaces[j], gen->gen_plugs[j]));
+            }
+        }
+    }
+#else
+    UNREFERENCED_PARAMETER(msg);
+#endif //FREE_USAGE_STATS && SIMPLE_DPRINTF
+}
+
+void gc_heap::add_gen_plug (int gen_number, size_t plug_size)
+{
+#ifdef FREE_USAGE_STATS
+    dprintf (3, ("adding plug size %Id to gen%d", plug_size, gen_number));
+    generation* gen = generation_of (gen_number);
+    size_t sz = BASE_GEN_SIZE;
+    int i = 0;
+
+    for (; i < NUM_GEN_POWER2; i++)
+    {
+        if (plug_size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    
+    (gen->gen_plugs[i])++;
+#else
+    UNREFERENCED_PARAMETER(gen_number);
+    UNREFERENCED_PARAMETER(plug_size);
+#endif //FREE_USAGE_STATS
+}
+
+void gc_heap::add_item_to_current_pinned_free (int gen_number, size_t free_size)
+{
+#ifdef FREE_USAGE_STATS
+    generation* gen = generation_of (gen_number);
+    size_t sz = BASE_GEN_SIZE;
+    int i = 0;
+
+    for (; i < NUM_GEN_POWER2; i++)
+    {
+        if (free_size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    
+    (gen->gen_current_pinned_free_spaces[i])++;
+    generation_pinned_free_obj_space (gen) += free_size;
+    dprintf (3, ("left pin free %Id(2^%d) to gen%d, total %Id bytes (%Id)", 
+        free_size, (i + 10), gen_number, 
+        generation_pinned_free_obj_space (gen),
+        gen->gen_current_pinned_free_spaces[i]));
+#else
+    UNREFERENCED_PARAMETER(gen_number);
+    UNREFERENCED_PARAMETER(free_size);
+#endif //FREE_USAGE_STATS
+}
+
+void gc_heap::add_gen_free (int gen_number, size_t free_size)
+{
+#ifdef FREE_USAGE_STATS
+    dprintf (3, ("adding free size %Id to gen%d", free_size, gen_number));
+    generation* gen = generation_of (gen_number);
+    size_t sz = BASE_GEN_SIZE;
+    int i = 0;
+
+    for (; i < NUM_GEN_POWER2; i++)
+    {
+        if (free_size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    
+    (gen->gen_free_spaces[i])++;
+#else
+    UNREFERENCED_PARAMETER(gen_number);
+    UNREFERENCED_PARAMETER(free_size);
+#endif //FREE_USAGE_STATS
+}
+
+void gc_heap::remove_gen_free (int gen_number, size_t free_size)
+{
+#ifdef FREE_USAGE_STATS
+    dprintf (3, ("removing free %Id from gen%d", free_size, gen_number));
+    generation* gen = generation_of (gen_number);
+    size_t sz = BASE_GEN_SIZE;
+    int i = 0;
+
+    for (; i < NUM_GEN_POWER2; i++)
+    {
+        if (free_size < sz)
+        {
+            break;
+        }
+        sz = sz * 2;
+    }
+    
+    (gen->gen_free_spaces[i])--;
+#else
+    UNREFERENCED_PARAMETER(gen_number);
+    UNREFERENCED_PARAMETER(free_size);
+#endif //FREE_USAGE_STATS
+}
+
+uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
+                                             int from_gen_number,
+                                             uint8_t* old_loc REQD_ALIGN_AND_OFFSET_DCL)
+{
+    size = Align (size);
+    assert (size >= Align (min_obj_size));
+    assert (from_gen_number < max_generation);
+    assert (from_gen_number >= 0);
+    assert (generation_of (from_gen_number + 1) == gen);
+
+    allocator* gen_allocator = generation_allocator (gen);
+    BOOL discard_p = gen_allocator->discard_if_no_fit_p ();
+    int pad_in_front = (old_loc != 0)? USE_PADDING_FRONT : 0;
+
+    size_t real_size = size + Align (min_obj_size);
+    if (pad_in_front)
+        real_size += Align (min_obj_size);
+
+    if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
+                       generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front)))
+    {
+        size_t sz_list = gen_allocator->first_bucket_size();
+        for (unsigned int a_l_idx = 0; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
+        {
+            if ((real_size < (sz_list / 2)) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
+            {
+                uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+                uint8_t* prev_free_item = 0;
+                while (free_list != 0)
+                {
+                    dprintf (3, ("considering free list %Ix", (size_t)free_list));
+
+                    size_t free_list_size = unused_array_size (free_list);
+
+                    if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
+                                    old_loc, USE_PADDING_TAIL | pad_in_front))
+                    {
+                        dprintf (4, ("F:%Ix-%Id",
+                                     (size_t)free_list, free_list_size));
+
+                        gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, !discard_p);
+                        generation_free_list_space (gen) -= free_list_size;
+                        remove_gen_free (gen->gen_num, free_list_size);
+
+                        adjust_limit (free_list, free_list_size, gen, from_gen_number+1);
+                        goto finished;
+                    }
+                    // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
+                    else if (discard_p || (a_l_idx == 0))
+                    {
+                        dprintf (3, ("couldn't use this free area, discarding"));
+                        generation_free_obj_space (gen) += free_list_size;
+
+                        gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                        generation_free_list_space (gen) -= free_list_size;
+                        remove_gen_free (gen->gen_num, free_list_size);
+                    }
+                    else
+                    {
+                        prev_free_item = free_list;
+                    }
+                    free_list = free_list_slot (free_list); 
+                }
+            }
+            sz_list = sz_list * 2;
+        }
+        //go back to the beginning of the segment list 
+        generation_allocate_end_seg_p (gen) = TRUE;
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+        if (seg != generation_allocation_segment (gen))
+        {
+            leave_allocation_segment (gen);
+            generation_allocation_segment (gen) = seg;
+        }
+        while (seg != ephemeral_heap_segment)
+        {
+            if (size_fit_p(size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg),
+                           heap_segment_committed (seg), old_loc, USE_PADDING_TAIL | pad_in_front))
+            {
+                dprintf (3, ("using what's left in committed"));
+                adjust_limit (heap_segment_plan_allocated (seg),
+                              heap_segment_committed (seg) -
+                              heap_segment_plan_allocated (seg),
+                              gen, from_gen_number+1);
+                // dformat (t, 3, "Expanding segment allocation");
+                heap_segment_plan_allocated (seg) =
+                    heap_segment_committed (seg);
+                goto finished;
+            }
+            else
+            {
+                if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, heap_segment_plan_allocated (seg),
+                                heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) &&
+                    grow_heap_segment (seg, heap_segment_plan_allocated (seg), old_loc, size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG))
+                {
+                    dprintf (3, ("using what's left in reserved"));
+                    adjust_limit (heap_segment_plan_allocated (seg),
+                                  heap_segment_committed (seg) -
+                                  heap_segment_plan_allocated (seg),
+                                  gen, from_gen_number+1);
+                    heap_segment_plan_allocated (seg) =
+                        heap_segment_committed (seg);
+
+                    goto finished;
+                }
+                else
+                {
+                    leave_allocation_segment (gen);
+                    heap_segment*   next_seg = heap_segment_next_rw (seg);
+                    if (next_seg)
+                    {
+                        dprintf (3, ("getting next segment"));
+                        generation_allocation_segment (gen) = next_seg;
+                        generation_allocation_pointer (gen) = heap_segment_mem (next_seg);
+                        generation_allocation_limit (gen) = generation_allocation_pointer (gen);
+                    }
+                    else
+                    {
+                        size = 0;
+                        goto finished;
+                    }
+                }
+            }
+            seg = generation_allocation_segment (gen);
+        }
+        //No need to fix the last region. Will be done later
+        size = 0;
+        goto finished;
+    }
+    finished:
+    if (0 == size)
+    {
+        return 0;
+    }
+    else
+    {
+        uint8_t*  result = generation_allocation_pointer (gen);
+        size_t pad = 0;
+
+#ifdef SHORT_PLUGS
+        if ((pad_in_front & USE_PADDING_FRONT) &&
+            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
+             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))
+        {
+            pad = Align (min_obj_size);
+            set_plug_padded (old_loc);
+        }
+#endif //SHORT_PLUGS
+
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(!old_loc || alignmentOffset != 0);
+        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
+        if (old_loc != 0)
+        {
+            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
+            set_node_aligninfo (old_loc, requiredAlignment, pad1);
+            pad += pad1;
+        }
+#else // FEATURE_STRUCTALIGN
+        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
+        {
+            pad += switch_alignment_size (is_plug_padded (old_loc));
+            set_node_realigned (old_loc);
+            dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix",
+                         (size_t)old_loc, (size_t)(result+pad)));
+            assert (same_large_alignment_p (result + pad, old_loc));
+        }
+#endif // FEATURE_STRUCTALIGN
+        dprintf (3, ("Allocate %Id bytes", size));
+
+        if ((old_loc == 0) || (pad != 0))
+        {
+            //allocating a non plug or a gap, so reset the start region
+            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+        }
+
+        generation_allocation_pointer (gen) += size + pad;
+        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));
+        if (generation_allocate_end_seg_p (gen))
+        {
+            generation_end_seg_allocated (gen) += size;
+        }
+        else
+        {
+            generation_free_list_allocated (gen) += size;
+        }
+        generation_allocation_size (gen) += size;
+
+        dprintf (3, ("aio: ptr: %Ix, limit: %Ix, sr: %Ix", 
+            generation_allocation_pointer (gen), generation_allocation_limit (gen),
+            generation_allocation_context_start_region (gen)));
+
+        return result + pad;;
+    }
+}
+
+void gc_heap::repair_allocation_in_expanded_heap (generation* consing_gen)
+{
+    //make sure that every generation has a planned allocation start
+    int  gen_number = max_generation - 1;
+    while (gen_number>= 0)
+    {
+        generation* gen = generation_of (gen_number);
+        if (0 == generation_plan_allocation_start (gen))
+        {
+            realloc_plan_generation_start (gen, consing_gen);
+
+            assert (generation_plan_allocation_start (gen));
+        }
+        gen_number--;
+    }
+
+    // now we know the planned allocation size
+    size_t  size = (generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
+    heap_segment* seg = generation_allocation_segment (consing_gen);
+    if (generation_allocation_limit (consing_gen) == heap_segment_plan_allocated (seg))
+    {
+        if (size != 0)
+        {
+            heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen);
+        }
+    }
+    else
+    {
+        assert (settings.condemned_generation == max_generation);
+        uint8_t* first_address = generation_allocation_limit (consing_gen);
+        //look through the pinned plugs for relevant ones.
+        //Look for the right pinned plug to start from.
+        size_t mi = 0;
+        mark* m = 0;
+        while (mi != mark_stack_tos)
+        {
+            m = pinned_plug_of (mi);
+            if ((pinned_plug (m) == first_address))
+                break;
+            else
+                mi++;
+        }
+        assert (mi != mark_stack_tos);
+        pinned_len (m) = size;
+    }
+}
+
+//tododefrag optimize for new segment (plan_allocated == mem)
+uint8_t* gc_heap::allocate_in_expanded_heap (generation* gen,
+                                          size_t size,
+                                          BOOL& adjacentp,
+                                          uint8_t* old_loc,
+#ifdef SHORT_PLUGS
+                                          BOOL set_padding_on_saved_p,
+                                          mark* pinned_plug_entry,
+#endif //SHORT_PLUGS
+                                          BOOL consider_bestfit,
+                                          int active_new_gen_number
+                                          REQD_ALIGN_AND_OFFSET_DCL)
+{
+    UNREFERENCED_PARAMETER(active_new_gen_number);
+    dprintf (3, ("aie: P: %Ix, size: %Ix", old_loc, size));
+
+    size = Align (size);
+    assert (size >= Align (min_obj_size));
+    int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+
+    if (consider_bestfit && use_bestfit)
+    {
+        assert (bestfit_seg);
+        dprintf (SEG_REUSE_LOG_1, ("reallocating 0x%Ix in expanded heap, size: %Id", 
+                    old_loc, size));
+        return bestfit_seg->fit (old_loc, 
+#ifdef SHORT_PLUGS
+                                 set_padding_on_saved_p,
+                                 pinned_plug_entry,
+#endif //SHORT_PLUGS
+                                 size REQD_ALIGN_AND_OFFSET_ARG);
+    }
+
+    heap_segment* seg = generation_allocation_segment (gen);
+
+    if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
+                       generation_allocation_limit (gen), old_loc,
+                       ((generation_allocation_limit (gen) !=
+                          heap_segment_plan_allocated (seg))? USE_PADDING_TAIL : 0) | pad_in_front)))
+    {
+        dprintf (3, ("aie: can't fit: ptr: %Ix, limit: %Ix", generation_allocation_pointer (gen),
+            generation_allocation_limit (gen)));
+
+        adjacentp = FALSE;
+        uint8_t* first_address = (generation_allocation_limit (gen) ?
+                               generation_allocation_limit (gen) :
+                               heap_segment_mem (seg));
+        assert (in_range_for_segment (first_address, seg));
+
+        uint8_t* end_address   = heap_segment_reserved (seg);
+
+        dprintf (3, ("aie: first_addr: %Ix, gen alloc limit: %Ix, end_address: %Ix",
+            first_address, generation_allocation_limit (gen), end_address));
+
+        size_t mi = 0;
+        mark* m = 0;
+
+        if (heap_segment_allocated (seg) != heap_segment_mem (seg))
+        {
+            assert (settings.condemned_generation == max_generation);
+            //look through the pinned plugs for relevant ones.
+            //Look for the right pinned plug to start from.
+            while (mi != mark_stack_tos)
+            {
+                m = pinned_plug_of (mi);
+                if ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address))
+                {
+                    dprintf (3, ("aie: found pin: %Ix", pinned_plug (m)));
+                    break;
+                }
+                else
+                    mi++;
+            }
+            if (mi != mark_stack_tos)
+            {
+                //fix old free list.
+                size_t  hsize = (generation_allocation_limit (gen) - generation_allocation_pointer (gen));
+                {
+                    dprintf(3,("gc filling up hole"));
+                    ptrdiff_t mi1 = (ptrdiff_t)mi;
+                    while ((mi1 >= 0) &&
+                           (pinned_plug (pinned_plug_of(mi1)) != generation_allocation_limit (gen)))
+                    {
+                        dprintf (3, ("aie: checking pin %Ix", pinned_plug (pinned_plug_of(mi1))));
+                        mi1--;
+                    }
+                    if (mi1 >= 0)
+                    {
+                        size_t saved_pinned_len = pinned_len (pinned_plug_of(mi1));
+                        pinned_len (pinned_plug_of(mi1)) = hsize;
+                        dprintf (3, ("changing %Ix len %Ix->%Ix", 
+                            pinned_plug (pinned_plug_of(mi1)), 
+                            saved_pinned_len, pinned_len (pinned_plug_of(mi1))));
+                    }
+                }
+            }
+        }
+        else
+        {
+            assert (generation_allocation_limit (gen) ==
+                    generation_allocation_pointer (gen));
+            mi = mark_stack_tos;
+        }
+
+        while ((mi != mark_stack_tos) && in_range_for_segment (pinned_plug (m), seg))
+        {
+            size_t len = pinned_len (m);
+            uint8_t*  free_list = (pinned_plug (m) - len);
+            dprintf (3, ("aie: testing free item: %Ix->%Ix(%Ix)", 
+                free_list, (free_list + len), len));
+            if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + len), old_loc, USE_PADDING_TAIL | pad_in_front))
+            {
+                dprintf (3, ("aie: Found adequate unused area: %Ix, size: %Id",
+                            (size_t)free_list, len));
+                {
+                    generation_allocation_pointer (gen) = free_list;
+                    generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+                    generation_allocation_limit (gen) = (free_list + len);
+                }
+                goto allocate_in_free;
+            }
+            mi++;
+            m = pinned_plug_of (mi);
+        }
+
+        //switch to the end of the segment.
+        generation_allocation_pointer (gen) = heap_segment_plan_allocated (seg);
+        generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
+        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+        dprintf (3, ("aie: switching to end of seg: %Ix->%Ix(%Ix)", 
+            generation_allocation_pointer (gen), generation_allocation_limit (gen),
+            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+
+        if (!size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
+                         generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front))
+        {
+            dprintf (3, ("aie: ptr: %Ix, limit: %Ix, can't alloc", generation_allocation_pointer (gen),
+                generation_allocation_limit (gen)));
+            assert (!"Can't allocate if no free space");
+            return 0;
+        }
+    }
+    else
+    {
+        adjacentp = TRUE;
+    }
+
+allocate_in_free:
+    {
+        uint8_t*  result = generation_allocation_pointer (gen);
+        size_t pad = 0;
+
+#ifdef SHORT_PLUGS
+        if ((pad_in_front & USE_PADDING_FRONT) &&
+            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
+             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))
+
+        {
+            pad = Align (min_obj_size);
+            set_padding_in_expand (old_loc, set_padding_on_saved_p, pinned_plug_entry);
+        }
+#endif //SHORT_PLUGS
+
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(!old_loc || alignmentOffset != 0);
+        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
+        if (old_loc != 0)
+        {
+            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
+            set_node_aligninfo (old_loc, requiredAlignment, pad1);
+            pad += pad1;
+            adjacentp = FALSE;
+        }
+#else // FEATURE_STRUCTALIGN
+        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
+        {
+            pad += switch_alignment_size (is_plug_padded (old_loc));
+            set_node_realigned (old_loc);
+            dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix",
+                         (size_t)old_loc, (size_t)(result+pad)));
+            assert (same_large_alignment_p (result + pad, old_loc));
+            adjacentp = FALSE;
+        }
+#endif // FEATURE_STRUCTALIGN
+
+        if ((old_loc == 0) || (pad != 0))
+        {
+            //allocating a non plug or a gap, so reset the start region
+            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+        }
+
+        generation_allocation_pointer (gen) += size + pad;
+        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));
+        dprintf (3, ("Allocated in expanded heap %Ix:%Id", (size_t)(result+pad), size));
+
+        dprintf (3, ("aie: ptr: %Ix, limit: %Ix, sr: %Ix", 
+            generation_allocation_pointer (gen), generation_allocation_limit (gen),
+            generation_allocation_context_start_region (gen)));
+
+        return result + pad;
+    }
+}
+
+generation*  gc_heap::ensure_ephemeral_heap_segment (generation* consing_gen)
+{
+    heap_segment* seg = generation_allocation_segment (consing_gen);
+    if (seg != ephemeral_heap_segment)
+    {
+        assert (generation_allocation_pointer (consing_gen)>= heap_segment_mem (seg));
+        assert (generation_allocation_pointer (consing_gen)<= heap_segment_committed (seg));
+
+        //fix the allocated size of the segment.
+        heap_segment_plan_allocated (seg) = generation_allocation_pointer (consing_gen);
+
+        generation* new_consing_gen = generation_of (max_generation - 1);
+        generation_allocation_pointer (new_consing_gen) =
+                heap_segment_mem (ephemeral_heap_segment);
+        generation_allocation_limit (new_consing_gen) =
+            generation_allocation_pointer (new_consing_gen);
+        generation_allocation_context_start_region (new_consing_gen) = 
+            generation_allocation_pointer (new_consing_gen);
+        generation_allocation_segment (new_consing_gen) = ephemeral_heap_segment;
+
+        return new_consing_gen;
+    }
+    else
+        return consing_gen;
+}
+
+uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen,
+                                                  size_t size,
+                                                  int from_gen_number,
+#ifdef SHORT_PLUGS
+                                                  BOOL* convert_to_pinned_p,
+                                                  uint8_t* next_pinned_plug,
+                                                  heap_segment* current_seg,
+#endif //SHORT_PLUGS
+                                                  uint8_t* old_loc
+                                                  REQD_ALIGN_AND_OFFSET_DCL)
+{
+    // Make sure that the youngest generation gap hasn't been allocated
+    if (settings.promotion)
+    {
+        assert (generation_plan_allocation_start (youngest_generation) == 0);
+    }
+
+    size = Align (size);
+    assert (size >= Align (min_obj_size));
+    int to_gen_number = from_gen_number;
+    if (from_gen_number != (int)max_generation)
+    {
+        to_gen_number = from_gen_number + (settings.promotion ? 1 : 0);
+    }
+
+    dprintf (3, ("aic gen%d: s: %Id", gen->gen_num, size));
+
+    int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+
+    if ((from_gen_number != -1) && (from_gen_number != (int)max_generation) && settings.promotion)
+    {
+        generation_condemned_allocated (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size;
+        generation_allocation_size (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size;
+    }
+retry:
+    {
+        heap_segment* seg = generation_allocation_segment (gen);
+        if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
+                           generation_allocation_limit (gen), old_loc,
+                           ((generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))?USE_PADDING_TAIL:0)|pad_in_front)))
+        {
+            if ((! (pinned_plug_que_empty_p()) &&
+                 (generation_allocation_limit (gen) ==
+                  pinned_plug (oldest_pin()))))
+            {
+                size_t entry = deque_pinned_plug();
+                mark* pinned_plug_entry = pinned_plug_of (entry);
+                size_t len = pinned_len (pinned_plug_entry);
+                uint8_t* plug = pinned_plug (pinned_plug_entry);
+                set_new_pin_info (pinned_plug_entry, generation_allocation_pointer (gen));
+
+#ifdef FREE_USAGE_STATS
+                generation_allocated_in_pinned_free (gen) += generation_allocated_since_last_pin (gen);
+                dprintf (3, ("allocated %Id so far within pin %Ix, total->%Id", 
+                    generation_allocated_since_last_pin (gen), 
+                    plug,
+                    generation_allocated_in_pinned_free (gen)));
+                generation_allocated_since_last_pin (gen) = 0;
+
+                add_item_to_current_pinned_free (gen->gen_num, pinned_len (pinned_plug_of (entry)));
+#endif //FREE_USAGE_STATS
+
+                dprintf (3, ("mark stack bos: %Id, tos: %Id, aic: p %Ix len: %Ix->%Ix", 
+                    mark_stack_bos, mark_stack_tos, plug, len, pinned_len (pinned_plug_of (entry))));
+
+                assert(mark_stack_array[entry].len == 0 ||
+                       mark_stack_array[entry].len >= Align(min_obj_size));
+                generation_allocation_pointer (gen) = plug + len;
+                generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                set_allocator_next_pin (gen);
+
+                //Add the size of the pinned plug to the right pinned allocations
+                //find out which gen this pinned plug came from 
+                int frgn = object_gennum (plug);
+                if ((frgn != (int)max_generation) && settings.promotion)
+                {
+                    generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len;
+                    int togn = object_gennum_plan (plug);
+                    if (frgn < togn)
+                    {
+                        generation_pinned_allocation_compact_size (generation_of (togn)) += len;
+                    }
+                }
+                goto retry;
+            }
+            
+            if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))
+            {
+                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                dprintf (3, ("changed limit to plan alloc: %Ix", generation_allocation_limit (gen)));
+            }
+            else
+            {
+                if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg))
+                {
+                    heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
+                    generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                    dprintf (3, ("changed limit to commit: %Ix", generation_allocation_limit (gen)));
+                }
+                else
+                {
+#ifndef RESPECT_LARGE_ALIGNMENT
+                    assert (gen != youngest_generation);
+#endif //RESPECT_LARGE_ALIGNMENT
+
+                    if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
+                                    heap_segment_reserved (seg), old_loc, USE_PADDING_TAIL | pad_in_front) &&
+                        (grow_heap_segment (seg, generation_allocation_pointer (gen), old_loc,
+                                            size, pad_in_front REQD_ALIGN_AND_OFFSET_ARG)))
+                    {
+                        dprintf (3, ("Expanded segment allocation by committing more memory"));
+                        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
+                        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                    }
+                    else
+                    {
+                        heap_segment*   next_seg = heap_segment_next (seg);
+                        assert (generation_allocation_pointer (gen)>=
+                                heap_segment_mem (seg));
+                        // Verify that all pinned plugs for this segment are consumed
+                        if (!pinned_plug_que_empty_p() &&
+                            ((pinned_plug (oldest_pin()) <
+                              heap_segment_allocated (seg)) &&
+                             (pinned_plug (oldest_pin()) >=
+                              generation_allocation_pointer (gen))))
+                        {
+                            LOG((LF_GC, LL_INFO10, "remaining pinned plug %Ix while leaving segment on allocation",
+                                         pinned_plug (oldest_pin())));
+                            FATAL_GC_ERROR();
+                        }
+                        assert (generation_allocation_pointer (gen)>=
+                                heap_segment_mem (seg));
+                        assert (generation_allocation_pointer (gen)<=
+                                heap_segment_committed (seg));
+                        heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen);
+
+                        if (next_seg)
+                        {
+                            generation_allocation_segment (gen) = next_seg;
+                            generation_allocation_pointer (gen) = heap_segment_mem (next_seg);
+                            generation_allocation_limit (gen) = generation_allocation_pointer (gen);
+                            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+                        }
+                        else
+                        {
+                            return 0; //should only happen during allocation of generation 0 gap
+                            // in that case we are going to grow the heap anyway
+                        }
+                    }
+                }
+            }
+            set_allocator_next_pin (gen);
+
+            goto retry;
+        }
+    }
+
+    {
+        assert (generation_allocation_pointer (gen)>=
+                heap_segment_mem (generation_allocation_segment (gen)));
+        uint8_t* result = generation_allocation_pointer (gen);
+        size_t pad = 0;
+#ifdef SHORT_PLUGS
+        if ((pad_in_front & USE_PADDING_FRONT) &&
+            (((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))==0) ||
+             ((generation_allocation_pointer (gen) - generation_allocation_context_start_region (gen))>=DESIRED_PLUG_LENGTH)))
+        {
+            ptrdiff_t dist = old_loc - result;
+            if (dist == 0)
+            {
+                dprintf (3, ("old alloc: %Ix, same as new alloc, not padding", old_loc));
+                pad = 0;
+            }
+            else
+            {
+                if ((dist > 0) && (dist < (ptrdiff_t)Align (min_obj_size)))
+                {
+                    dprintf (3, ("old alloc: %Ix, only %d bytes > new alloc! Shouldn't happen", old_loc, dist));
+                    FATAL_GC_ERROR();
+                }
+
+                pad = Align (min_obj_size);
+                set_plug_padded (old_loc);
+            }
+        }
+#endif //SHORT_PLUGS
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(!old_loc || alignmentOffset != 0);
+        _ASSERTE(old_loc || requiredAlignment == DATA_ALIGNMENT);
+        if ((old_loc != 0))
+        {
+            size_t pad1 = ComputeStructAlignPad(result+pad, requiredAlignment, alignmentOffset);
+            set_node_aligninfo (old_loc, requiredAlignment, pad1);
+            pad += pad1;
+        }
+#else // FEATURE_STRUCTALIGN
+        if (!((old_loc == 0) || same_large_alignment_p (old_loc, result+pad)))
+        {
+            pad += switch_alignment_size (is_plug_padded (old_loc));
+            set_node_realigned(old_loc);
+            dprintf (3, ("Allocation realignment old_loc: %Ix, new_loc:%Ix",
+                         (size_t)old_loc, (size_t)(result+pad)));
+            assert (same_large_alignment_p (result + pad, old_loc));
+        }
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef SHORT_PLUGS
+        if ((next_pinned_plug != 0) && (pad != 0) && (generation_allocation_segment (gen) == current_seg))
+        {
+            assert (old_loc != 0);
+            ptrdiff_t dist_to_next_pin = (ptrdiff_t)(next_pinned_plug - (generation_allocation_pointer (gen) + size + pad));
+            assert (dist_to_next_pin >= 0);
+
+            if ((dist_to_next_pin >= 0) && (dist_to_next_pin < (ptrdiff_t)Align (min_obj_size)))
+            {
+                dprintf (3, ("%Ix->(%Ix,%Ix),%Ix(%Ix)(%Ix),NP->PP", 
+                    old_loc, 
+                    generation_allocation_pointer (gen),
+                    generation_allocation_limit (gen),
+                    next_pinned_plug,
+                    size, 
+                    dist_to_next_pin));
+                clear_plug_padded (old_loc);
+                pad = 0;
+                *convert_to_pinned_p = TRUE;
+                record_interesting_data_point (idp_converted_pin);
+
+                return 0;
+            }
+        }
+#endif //SHORT_PLUGS
+
+        if ((old_loc == 0) || (pad != 0))
+        {
+            //allocating a non plug or a gap, so reset the start region
+            generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+        }
+
+        generation_allocation_pointer (gen) += size + pad;
+        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));
+
+#ifdef FREE_USAGE_STATS
+        generation_allocated_since_last_pin (gen) += size;
+#endif //FREE_USAGE_STATS
+
+        dprintf (3, ("aic: ptr: %Ix, limit: %Ix, sr: %Ix", 
+            generation_allocation_pointer (gen), generation_allocation_limit (gen),
+            generation_allocation_context_start_region (gen)));
+
+        assert (result + pad);
+        return result + pad;
+    }
+}
+
+inline int power (int x, int y)
+{
+    int z = 1;
+    for (int i = 0; i < y; i++)
+    {
+        z = z*x;
+    }
+    return z;
+}
+
+int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, 
+                                           int n_initial,
+                                           BOOL* blocking_collection_p
+                                           STRESS_HEAP_ARG(int n_original))
+{
+    int n = n_initial;
+#ifdef MULTIPLE_HEAPS
+    BOOL blocking_p = *blocking_collection_p;
+    if (!blocking_p)
+    {
+        for (int i = 0; i < n_heaps; i++)
+        {
+            if (g_heaps[i]->last_gc_before_oom)
+            {
+                dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i));
+                *blocking_collection_p = TRUE;
+                break;
+            }
+        }
+    }
+#endif //MULTIPLE_HEAPS
+
+    if (should_evaluate_elevation && (n == max_generation))
+    {
+        dprintf (GTC_LOG, ("lock: %d(%d)", 
+            (settings.should_lock_elevation ? 1 : 0), 
+            settings.elevation_locked_count));
+
+        if (settings.should_lock_elevation)
+        {
+            settings.elevation_locked_count++;
+            if (settings.elevation_locked_count == 6)
+            {
+                settings.elevation_locked_count = 0;
+            }
+            else
+            {
+                n = max_generation - 1;
+                settings.elevation_reduced = TRUE;
+            }
+        }
+        else
+        {
+            settings.elevation_locked_count = 0;
+        }
+    }
+    else
+    {
+        settings.should_lock_elevation = FALSE;
+        settings.elevation_locked_count = 0;
+    }
+
+#ifdef STRESS_HEAP
+#ifdef BACKGROUND_GC
+    // We can only do Concurrent GC Stress if the caller did not explicitly ask for all
+    // generations to be collected,
+
+    if (n_original != max_generation &&
+        g_pConfig->GetGCStressLevel() && gc_can_use_concurrent)
+    {
+#ifndef FEATURE_REDHAWK
+        // for the GC stress mix mode throttle down gen2 collections
+        if (g_pConfig->IsGCStressMix())
+        {
+            size_t current_gc_count = 0;
+
+#ifdef MULTIPLE_HEAPS
+            current_gc_count = (size_t)dd_collection_count (g_heaps[0]->dynamic_data_of (0));
+#else
+            current_gc_count = (size_t)dd_collection_count (dynamic_data_of (0));
+#endif //MULTIPLE_HEAPS
+            // in gc stress, only escalate every 10th non-gen2 collection to a gen2...
+            if ((current_gc_count % 10) == 0)
+            {
+                n = max_generation;
+            }
+        }
+        // for traditional GC stress
+        else
+#endif // !FEATURE_REDHAWK
+        if (*blocking_collection_p)
+        {
+            // We call StressHeap() a lot for Concurrent GC Stress. However,
+            // if we can not do a concurrent collection, no need to stress anymore.
+            // @TODO: Enable stress when the memory pressure goes down again
+            GCStressPolicy::GlobalDisable();
+        }
+        else
+        {
+            n = max_generation;
+        }
+    }
+#endif //BACKGROUND_GC
+#endif //STRESS_HEAP
+
+    return n;
+}
+
+inline
+size_t get_survived_size (gc_history_per_heap* hist)
+{
+    size_t surv_size = 0;
+    gc_generation_data* gen_data;
+
+    for (int gen_number = 0; gen_number <= (max_generation + 1); gen_number++)
+    {
+        gen_data = &(hist->gen_data[gen_number]); 
+        surv_size += (gen_data->size_after - 
+                      gen_data->free_list_space_after - 
+                      gen_data->free_obj_space_after);
+    }
+
+    return surv_size;
+}
+
+size_t gc_heap::get_total_survived_size()
+{
+    size_t total_surv_size = 0;
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+        gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
+        total_surv_size += get_survived_size (current_gc_data_per_heap);
+    }
+#else
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+    total_surv_size = get_survived_size (current_gc_data_per_heap);
+#endif //MULTIPLE_HEAPS
+    return total_surv_size;
+}
+
+// Gets what's allocated on both SOH and LOH that hasn't been collected.
+size_t gc_heap::get_current_allocated()
+{
+    dynamic_data* dd = dynamic_data_of (0);
+    size_t current_alloc = dd_desired_allocation (dd) - dd_new_allocation (dd);
+    dd = dynamic_data_of (max_generation + 1);
+    current_alloc += dd_desired_allocation (dd) - dd_new_allocation (dd);
+
+    return current_alloc;
+}
+
+size_t gc_heap::get_total_allocated()
+{
+    size_t total_current_allocated = 0;
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+        total_current_allocated += hp->get_current_allocated();
+    }
+#else
+    total_current_allocated = get_current_allocated();
+#endif //MULTIPLE_HEAPS
+    return total_current_allocated;
+}
+
+size_t gc_heap::current_generation_size (int gen_number)
+{
+    dynamic_data* dd = dynamic_data_of (gen_number);
+    size_t gen_size = (dd_current_size (dd) + dd_desired_allocation (dd)
+                        - dd_new_allocation (dd));
+
+    return gen_size;
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:6326) // "Potential comparison of a constant with another constant" is intentional in this function.
+#endif //_PREFAST_
+
+/*
+    This is called by when we are actually doing a GC, or when we are just checking whether
+    we would do a full blocking GC, in which case check_only_p is TRUE.
+
+    The difference between calling this with check_only_p TRUE and FALSE is that when it's
+    TRUE: 
+            settings.reason is ignored
+            budgets are not checked (since they are checked before this is called)
+            it doesn't change anything non local like generation_skip_ratio
+*/
+int gc_heap::generation_to_condemn (int n_initial, 
+                                    BOOL* blocking_collection_p, 
+                                    BOOL* elevation_requested_p,
+                                    BOOL check_only_p)
+{
+    gc_mechanisms temp_settings = settings;
+    gen_to_condemn_tuning temp_condemn_reasons;
+    gc_mechanisms* local_settings = (check_only_p ? &temp_settings : &settings);
+    gen_to_condemn_tuning* local_condemn_reasons = (check_only_p ? &temp_condemn_reasons : &gen_to_condemn_reasons);
+    if (!check_only_p)
+    {
+        if ((local_settings->reason == reason_oos_soh) || (local_settings->reason == reason_oos_loh))
+        {
+            assert (n_initial >= 1);
+        }
+
+        assert (settings.reason != reason_empty);
+    }
+
+    local_condemn_reasons->init();
+
+    int n = n_initial;
+    int n_alloc = n;
+    if (heap_number == 0)
+    {
+        dprintf (GTC_LOG, ("init: %d(%d)", n_initial, settings.reason));
+    }
+    int i = 0;
+    int temp_gen = 0;
+    BOOL low_memory_detected = g_low_memory_status;
+    uint32_t memory_load = 0;
+    uint64_t available_physical = 0;
+    uint64_t available_page_file = 0;
+    BOOL check_memory = FALSE;
+    BOOL high_fragmentation  = FALSE;
+    BOOL v_high_memory_load  = FALSE;
+    BOOL high_memory_load    = FALSE;
+    BOOL low_ephemeral_space = FALSE;
+    BOOL evaluate_elevation  = TRUE;
+    *elevation_requested_p   = FALSE;
+    *blocking_collection_p   = FALSE;
+
+    BOOL check_max_gen_alloc = TRUE;
+
+#ifdef STRESS_HEAP
+    int orig_gen = n;
+#endif //STRESS_HEAP
+
+    if (!check_only_p)
+    {
+        dd_fragmentation (dynamic_data_of (0)) = 
+            generation_free_list_space (youngest_generation) + 
+            generation_free_obj_space (youngest_generation);
+
+        dd_fragmentation (dynamic_data_of (max_generation + 1)) = 
+            generation_free_list_space (large_object_generation) + 
+            generation_free_obj_space (large_object_generation);
+
+        //save new_allocation
+        for (i = 0; i <= max_generation+1; i++)
+        {
+            dynamic_data* dd = dynamic_data_of (i);
+            dprintf (GTC_LOG, ("h%d: g%d: l: %Id (%Id)", 
+                            heap_number, i,
+                            dd_new_allocation (dd),
+                            dd_desired_allocation (dd)));
+            dd_gc_new_allocation (dd) = dd_new_allocation (dd);
+        }
+
+        local_condemn_reasons->set_gen (gen_initial, n);
+        temp_gen = n;
+
+#ifdef BACKGROUND_GC
+        if (recursive_gc_sync::background_running_p())
+        {
+            dprintf (GTC_LOG, ("bgc in prog, 1"));
+            check_max_gen_alloc = FALSE;
+        }
+#endif //BACKGROUND_GC
+
+        if (check_max_gen_alloc)
+        {
+            //figure out if large objects need to be collected.
+            if (get_new_allocation (max_generation+1) <= 0)
+            {
+                n = max_generation;
+                local_condemn_reasons->set_gen (gen_alloc_budget, n);
+            }
+        }
+
+        //figure out which generation ran out of allocation
+        for (i = n+1; i <= (check_max_gen_alloc ? max_generation : (max_generation - 1)); i++)
+        {
+            if (get_new_allocation (i) <= 0)
+            {
+                n = i;
+            }
+            else
+                break;
+        }
+    }
+
+    if (n > temp_gen)
+    {
+        local_condemn_reasons->set_gen (gen_alloc_budget, n);
+    }
+
+    dprintf (GTC_LOG, ("h%d: g%d budget", heap_number, ((get_new_allocation (max_generation+1) <= 0) ? 3 : n)));
+
+    n_alloc = n;
+
+#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS)
+    //time based tuning
+    // if enough time has elapsed since the last gc
+    // and the number of gc is too low (1/10 of lower gen) then collect
+    // This should also be enabled if we have memory concerns
+    int n_time_max = max_generation;
+
+    if (!check_only_p)
+    {
+        if (recursive_gc_sync::background_running_p())
+        {
+            n_time_max = max_generation - 1;
+        }
+    }
+
+    if ((local_settings->pause_mode == pause_interactive) ||
+        (local_settings->pause_mode == pause_sustained_low_latency))
+    {
+        dynamic_data* dd0 = dynamic_data_of (0);
+        size_t now = GetHighPrecisionTimeStamp();
+        temp_gen = n;
+        for (i = (temp_gen+1); i <= n_time_max; i++)
+        {
+            dynamic_data* dd = dynamic_data_of (i);
+            if ((now > dd_time_clock(dd) + power (10, i)*1000) &&
+                (dd_gc_clock (dd0) > (dd_gc_clock (dd) + (power (10, i)))) &&
+                ((n < max_generation) || ((dd_current_size (dd) < dd_max_size (dd0)))))
+            {
+                n = min (i, n_time_max);
+                dprintf (GTC_LOG, ("time %d", n));
+            }
+        }
+        if (n > temp_gen)
+        {
+            local_condemn_reasons->set_gen (gen_time_tuning, n);
+        }
+    }
+
+    if (n != n_alloc)
+    {
+        dprintf (GTC_LOG, ("Condemning %d based on time tuning and fragmentation", n));
+    }
+#endif //BACKGROUND_GC && !MULTIPLE_HEAPS
+
+    if (n < (max_generation - 1))
+    {
+        if (dt_low_card_table_efficiency_p (tuning_deciding_condemned_gen))
+        {
+            n = max (n, max_generation - 1);
+            local_settings->promotion = TRUE;
+            dprintf (GTC_LOG, ("h%d: skip %d, c %d",
+                        heap_number, generation_skip_ratio, n));
+            local_condemn_reasons->set_condition (gen_low_card_p);
+        }
+    }
+
+    if (!check_only_p)
+    {
+        generation_skip_ratio = 100;
+    }
+
+    if (dt_low_ephemeral_space_p (check_only_p ? 
+                                  tuning_deciding_full_gc : 
+                                  tuning_deciding_condemned_gen))
+    {
+        low_ephemeral_space = TRUE;
+
+        n = max (n, max_generation - 1);
+        local_condemn_reasons->set_condition (gen_low_ephemeral_p);
+        dprintf (GTC_LOG, ("h%d: low eph", heap_number));
+
+#ifdef BACKGROUND_GC
+        if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0))
+#endif //BACKGROUND_GC
+        {
+            //It is better to defragment first if we are running out of space for
+            //the ephemeral generation but we have enough fragmentation to make up for it
+            //in the non ephemeral generation. Essentially we are trading a gen2 for 
+            // having to expand heap in ephemeral collections.
+            if (dt_high_frag_p (tuning_deciding_condemned_gen, 
+                                max_generation - 1, 
+                                TRUE))
+            {
+                high_fragmentation = TRUE;
+                local_condemn_reasons->set_condition (gen_max_high_frag_e_p);
+                dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number));
+            }
+        }
+    }
+
+    //figure out which ephemeral generation is too fragramented
+    temp_gen = n;
+    for (i = n+1; i < max_generation; i++)
+    {
+        if (dt_high_frag_p (tuning_deciding_condemned_gen, i))
+        {
+            dprintf (GTC_LOG, ("h%d g%d too frag", heap_number, i));
+            n = i;
+        }
+        else
+            break;
+    }
+
+    if (low_ephemeral_space)
+    {
+        //enable promotion
+        local_settings->promotion = TRUE;
+    }
+
+    if (n > temp_gen)
+    {
+        local_condemn_reasons->set_condition (gen_eph_high_frag_p);
+    }
+
+    if (!check_only_p)
+    {
+        if (settings.pause_mode == pause_low_latency)
+        {
+            if (!is_induced (settings.reason))
+            {
+                n = min (n, max_generation - 1);
+                dprintf (GTC_LOG, ("low latency mode is enabled, condemning %d", n));
+                evaluate_elevation = FALSE;
+                goto exit;
+            }
+        }
+    }
+
+    // It's hard to catch when we get to the point that the memory load is so high
+    // we get an induced GC from the finalizer thread so we are checking the memory load
+    // for every gen0 GC.
+    check_memory = (check_only_p ? 
+                    (n >= 0) : 
+                    ((n >= 1) || low_memory_detected));
+
+    if (check_memory)
+    {
+        //find out if we are short on memory
+        get_memory_info (&memory_load, &available_physical, &available_page_file);
+        if (heap_number == 0)
+        {
+            dprintf (GTC_LOG, ("ml: %d", memory_load));
+        }
+        
+        // Need to get it early enough for all heaps to use.
+        entry_available_physical_mem = available_physical;
+        local_settings->entry_memory_load = memory_load;
+
+        // @TODO: Force compaction more often under GCSTRESS
+        if (memory_load >= high_memory_load_th || low_memory_detected)
+        {
+#ifdef SIMPLE_DPRINTF
+            // stress log can't handle any parameter that's bigger than a void*.
+            if (heap_number == 0)
+            {
+                dprintf (GTC_LOG, ("tp: %I64d, ap: %I64d", total_physical_mem, available_physical));
+            }
+#endif //SIMPLE_DPRINTF
+
+            high_memory_load = TRUE;
+
+            if (memory_load >= v_high_memory_load_th || low_memory_detected)
+            {
+                // TODO: Perhaps in 64-bit we should be estimating gen1's fragmentation as well since
+                // gen1/gen0 may take a lot more memory than gen2.
+                if (!high_fragmentation)
+                {
+                    high_fragmentation = dt_estimate_reclaim_space_p (tuning_deciding_condemned_gen, max_generation);
+                }
+                v_high_memory_load = TRUE;
+            }
+            else
+            {
+                if (!high_fragmentation)
+                {
+                    high_fragmentation = dt_estimate_high_frag_p (tuning_deciding_condemned_gen, max_generation, available_physical);
+                }
+            }
+
+            if (high_fragmentation)
+            {
+                if (high_memory_load)
+                {
+                    local_condemn_reasons->set_condition (gen_max_high_frag_m_p);
+                }
+                else if (v_high_memory_load)
+                {
+                    local_condemn_reasons->set_condition (gen_max_high_frag_vm_p);
+                }
+            }
+        }
+    }
+
+    dprintf (GTC_LOG, ("h%d: le: %d, hm: %d, vm: %d, f: %d",
+                 heap_number, low_ephemeral_space, high_memory_load, v_high_memory_load,
+                 high_fragmentation));
+
+    if (should_expand_in_full_gc)
+    {
+        dprintf (GTC_LOG, ("h%d: expand_in_full - BLOCK", heap_number));
+        *blocking_collection_p = TRUE;
+        if (!check_only_p)
+        {
+            should_expand_in_full_gc = FALSE;
+        }
+        evaluate_elevation = FALSE;
+        n = max_generation;
+        local_condemn_reasons->set_condition (gen_expand_fullgc_p);
+    }
+
+    if (last_gc_before_oom)
+    {
+        dprintf (GTC_LOG, ("h%d: alloc full - BLOCK", heap_number));
+        n = max_generation;
+        *blocking_collection_p = TRUE;
+
+        local_condemn_reasons->set_condition (gen_before_oom);
+    }
+
+    if (!check_only_p)
+    {
+        if (is_induced_blocking (settings.reason) && 
+            n_initial == max_generation
+            IN_STRESS_HEAP( && !settings.stress_induced ))
+        {
+            if (heap_number == 0)
+            {
+                dprintf (GTC_LOG, ("induced - BLOCK"));
+            }
+
+            *blocking_collection_p = TRUE;
+            local_condemn_reasons->set_condition (gen_induced_fullgc_p);
+            evaluate_elevation = FALSE;
+        }
+
+        if (settings.reason == reason_induced_noforce)
+        {
+            local_condemn_reasons->set_condition (gen_induced_noforce_p);
+            evaluate_elevation = FALSE;
+        }
+    }
+
+    if (evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load))
+    {
+        *elevation_requested_p = TRUE;
+#ifdef BIT64
+        // if we are in high memory load and have consumed 10% of the gen2 budget, do a gen2 now.
+        if (high_memory_load || v_high_memory_load)
+        {
+            dynamic_data* dd_max = dynamic_data_of (max_generation);
+            if (((float)dd_new_allocation (dd_max) / (float)dd_desired_allocation (dd_max)) < 0.9)
+            {
+                dprintf (GTC_LOG, ("%Id left in gen2 alloc (%Id)", 
+                    dd_new_allocation (dd_max), dd_desired_allocation (dd_max)));
+                n = max_generation;
+                local_condemn_reasons->set_condition (gen_almost_max_alloc);
+            }
+        }
+
+        if (n <= max_generation)
+        {
+#endif // BIT64
+            if (high_fragmentation)
+            {
+                //elevate to max_generation
+                n = max_generation;
+                dprintf (GTC_LOG, ("h%d: f full", heap_number));
+
+#ifdef BACKGROUND_GC
+                if (high_memory_load || v_high_memory_load)
+                {
+                    // For background GC we want to do blocking collections more eagerly because we don't
+                    // want to get into the situation where the memory load becomes high while we are in
+                    // a background GC and we'd have to wait for the background GC to finish to start
+                    // a blocking collection (right now the implemenation doesn't handle converting 
+                    // a background GC to a blocking collection midway.
+                    dprintf (GTC_LOG, ("h%d: bgc - BLOCK", heap_number));
+                    *blocking_collection_p = TRUE;
+                }
+#else
+                if (v_high_memory_load)
+                {
+                    dprintf (GTC_LOG, ("h%d: - BLOCK", heap_number));
+                    *blocking_collection_p = TRUE;
+                }
+#endif //BACKGROUND_GC
+            }
+            else
+            {
+                n = max (n, max_generation - 1);
+                dprintf (GTC_LOG, ("h%d: nf c %d", heap_number, n));
+            }
+#ifdef BIT64
+        }
+#endif // BIT64
+    }
+
+    if ((n == (max_generation - 1)) && (n_alloc < (max_generation -1)))
+    {
+        dprintf (GTC_LOG, ("h%d: budget %d, check 2",
+                      heap_number, n_alloc));
+        if (get_new_allocation (max_generation) <= 0)
+        {
+            dprintf (GTC_LOG, ("h%d: budget alloc", heap_number));
+            n = max_generation;
+            local_condemn_reasons->set_condition (gen_max_gen1);
+        }
+    }
+
+    //figure out if max_generation is too fragmented -> blocking collection
+    if (n == max_generation)
+    {
+        if (dt_high_frag_p (tuning_deciding_condemned_gen, n))
+        {
+            dprintf (GTC_LOG, ("h%d: g%d too frag", heap_number, n));
+            local_condemn_reasons->set_condition (gen_max_high_frag_p);
+            if (local_settings->pause_mode != pause_sustained_low_latency)
+            {
+                *blocking_collection_p = TRUE;
+            }
+        }
+    }
+
+#ifdef BACKGROUND_GC
+    if (n == max_generation)
+    {
+        if (heap_number == 0)
+        {
+            BOOL bgc_heap_too_small = TRUE;
+            size_t gen2size = 0;
+            size_t gen3size = 0;
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) || 
+                    ((g_heaps[i]->current_generation_size (max_generation + 1)) > bgc_min_per_heap))
+                {
+                    bgc_heap_too_small = FALSE;
+                    break;
+                }
+            }
+#else //MULTIPLE_HEAPS
+            if ((current_generation_size (max_generation) > bgc_min_per_heap) || 
+                (current_generation_size (max_generation + 1) > bgc_min_per_heap))
+            {
+                bgc_heap_too_small = FALSE;
+            }            
+#endif //MULTIPLE_HEAPS
+
+            if (bgc_heap_too_small)
+            {
+                dprintf (GTC_LOG, ("gen2 and gen3 too small"));
+
+#ifdef STRESS_HEAP
+                // do not turn stress-induced collections into blocking GCs
+                if (!settings.stress_induced)
+#endif //STRESS_HEAP
+                {
+                    *blocking_collection_p = TRUE;
+                }
+
+                local_condemn_reasons->set_condition (gen_gen2_too_small);
+            }
+        }
+    }
+#endif //BACKGROUND_GC
+
+exit:
+    if (!check_only_p)
+    {
+#ifdef STRESS_HEAP
+#ifdef BACKGROUND_GC
+        // We can only do Concurrent GC Stress if the caller did not explicitly ask for all
+        // generations to be collected,
+
+        if (orig_gen != max_generation &&
+            g_pConfig->GetGCStressLevel() && gc_can_use_concurrent)
+        {
+            *elevation_requested_p = FALSE;
+        }
+#endif //BACKGROUND_GC
+#endif //STRESS_HEAP
+
+        if (check_memory)
+        {
+            fgm_result.available_pagefile_mb = (size_t)(available_page_file / (1024 * 1024));
+        }
+
+        local_condemn_reasons->set_gen (gen_final_per_heap, n);
+        get_gc_data_per_heap()->gen_to_condemn_reasons.init (local_condemn_reasons);
+
+#ifdef DT_LOG
+        local_condemn_reasons->print (heap_number);
+#endif //DT_LOG
+
+        if ((local_settings->reason == reason_oos_soh) || 
+            (local_settings->reason == reason_oos_loh))
+        {
+            assert (n >= 1);
+        }
+    }
+
+#ifndef FEATURE_REDHAWK
+    if (n == max_generation)
+    {
+        if (SystemDomain::System()->RequireAppDomainCleanup())
+        {
+#ifdef BACKGROUND_GC
+            // do not turn stress-induced collections into blocking GCs, unless there
+            // have already been more full BGCs than full NGCs
+#if 0
+            // This exposes DevDiv 94129, so we'll leave this out for now
+            if (!settings.stress_induced || 
+                full_gc_counts[gc_type_blocking] <= full_gc_counts[gc_type_background])
+#endif // 0
+#endif // BACKGROUND_GC
+            {
+                *blocking_collection_p = TRUE;
+            }
+        }
+    }
+#endif //!FEATURE_REDHAWK
+
+    return n;
+}
+
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif //_PREFAST_
+
+inline
+size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps)
+{
+    // if the memory load is higher, the threshold we'd want to collect gets lower.
+    size_t min_mem_based_on_available = 
+        (500 - (settings.entry_memory_load - high_memory_load_th) * 40) * 1024 * 1024 / num_heaps;
+    size_t ten_percent_size = (size_t)((float)generation_size (max_generation) * 0.10);
+    uint64_t three_percent_mem = mem_one_percent * 3 / num_heaps;
+
+#ifdef SIMPLE_DPRINTF
+    dprintf (GTC_LOG, ("min av: %Id, 10%% gen2: %Id, 3%% mem: %I64d", 
+        min_mem_based_on_available, ten_percent_size, three_percent_mem));
+#endif //SIMPLE_DPRINTF
+    return (size_t)(min (min_mem_based_on_available, min (ten_percent_size, three_percent_mem)));
+}
+
+inline
+uint64_t gc_heap::min_high_fragmentation_threshold(uint64_t available_mem, uint32_t num_heaps)
+{
+    return min (available_mem, (256*1024*1024)) / num_heaps;
+}
+
+enum {
+CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC'
+};
+
+
+#ifdef BACKGROUND_GC
+void gc_heap::init_background_gc ()
+{
+    //reset the allocation so foreground gc can allocate into older (max_generation) generation
+    generation* gen = generation_of (max_generation);
+    generation_allocation_pointer (gen)= 0;
+    generation_allocation_limit (gen) = 0;
+    generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
+
+    PREFIX_ASSUME(generation_allocation_segment(gen) != NULL);
+
+    //reset the plan allocation for each segment
+    for (heap_segment* seg = generation_allocation_segment (gen); seg != ephemeral_heap_segment;
+        seg = heap_segment_next_rw (seg))
+    {
+        heap_segment_plan_allocated (seg) = heap_segment_allocated (seg);
+    }
+
+    if (heap_number == 0)
+    {
+        dprintf (2, ("heap%d: bgc lowest: %Ix, highest: %Ix", 
+            heap_number,
+            background_saved_lowest_address, 
+            background_saved_highest_address));
+    }
+
+    gc_lh_block_event.Reset();
+}
+
+#endif //BACKGROUND_GC
+
+#define fire_bgc_event(x) { FireEtw##x(GetClrInstanceId()); }
+
+inline
+void fire_drain_mark_list_event (size_t mark_list_objects)
+{
+    FireEtwBGCDrainMark (mark_list_objects, GetClrInstanceId());
+}
+
+inline
+void fire_revisit_event (size_t dirtied_pages, 
+                         size_t marked_objects,
+                         BOOL large_objects_p)
+{
+    FireEtwBGCRevisit (dirtied_pages, marked_objects, large_objects_p, GetClrInstanceId());
+}
+
+inline
+void fire_overflow_event (uint8_t* overflow_min,
+                          uint8_t* overflow_max,
+                          size_t marked_objects, 
+                          int large_objects_p)
+{
+    FireEtwBGCOverflow ((uint64_t)overflow_min, (uint64_t)overflow_max, 
+                        marked_objects, large_objects_p, 
+                        GetClrInstanceId());
+}
+
+void gc_heap::concurrent_print_time_delta (const char* msg)
+{
+#ifdef TRACE_GC
+    size_t current_time = GetHighPrecisionTimeStamp();
+    size_t elapsed_time = current_time - time_bgc_last;
+    time_bgc_last = current_time;
+
+    dprintf (2, ("h%d: %s T %Id ms", heap_number, msg, elapsed_time));
+#else
+    UNREFERENCED_PARAMETER(msg);
+#endif //TRACE_GC
+}
+
+void gc_heap::free_list_info (int gen_num, const char* msg)
+{
+    UNREFERENCED_PARAMETER(gen_num);
+#if defined (BACKGROUND_GC) && defined (TRACE_GC)
+    dprintf (3, ("h%d: %s", heap_number, msg));
+    for (int i = 0; i <= (max_generation + 1); i++)
+    {
+        generation* gen = generation_of (i);
+        if ((generation_allocation_size (gen) == 0) && 
+            (generation_free_list_space (gen) == 0) && 
+            (generation_free_obj_space (gen) == 0))
+        {
+            // don't print if everything is 0.
+        }
+        else
+        {
+            dprintf (3, ("h%d: g%d: a-%Id, fl-%Id, fo-%Id",
+                heap_number, i, 
+                generation_allocation_size (gen), 
+                generation_free_list_space (gen), 
+                generation_free_obj_space (gen)));
+        }
+    }
+#else
+    UNREFERENCED_PARAMETER(msg);
+#endif // BACKGROUND_GC && TRACE_GC
+}
+
+void gc_heap::update_collection_counts_for_no_gc()
+{
+    assert (settings.pause_mode == pause_no_gc);
+
+    settings.condemned_generation = max_generation;
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+        g_heaps[i]->update_collection_counts();
+#else //MULTIPLE_HEAPS
+    update_collection_counts();
+#endif //MULTIPLE_HEAPS
+
+    full_gc_counts[gc_type_blocking]++;
+}
+
+BOOL gc_heap::should_proceed_with_gc()
+{
+    if (gc_heap::settings.pause_mode == pause_no_gc)
+    {
+        if (current_no_gc_region_info.started)
+        {
+            // The no_gc mode was already in progress yet we triggered another GC,
+            // this effectively exits the no_gc mode.
+            restore_data_for_no_gc();
+        }
+        else
+            return should_proceed_for_no_gc();
+    }
+
+    return TRUE;
+}
+
+//internal part of gc used by the serial and concurrent version
+void gc_heap::gc1()
+{
+#ifdef BACKGROUND_GC
+    assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
+#endif //BACKGROUND_GC
+
+#ifdef TIME_GC
+    mark_time = plan_time = reloc_time = compact_time = sweep_time = 0;
+#endif //TIME_GC
+
+    verify_soh_segment_list();
+
+    int n = settings.condemned_generation;
+
+    update_collection_counts ();
+
+#ifdef BACKGROUND_GC
+    bgc_alloc_lock->check();
+#endif //BACKGROUND_GC
+
+    free_list_info (max_generation, "beginning");
+
+    vm_heap->GcCondemnedGeneration = settings.condemned_generation;
+
+    assert (g_card_table == card_table);
+
+    {
+        if (n == max_generation)
+        {
+            gc_low = lowest_address;
+            gc_high = highest_address;
+        }
+        else
+        {
+            gc_low = generation_allocation_start (generation_of (n));
+            gc_high = heap_segment_reserved (ephemeral_heap_segment);
+        }   
+#ifdef BACKGROUND_GC
+        if (settings.concurrent)
+        {
+#ifdef TRACE_GC
+            time_bgc_last = GetHighPrecisionTimeStamp();
+#endif //TRACE_GC
+
+            fire_bgc_event (BGCBegin);
+
+            concurrent_print_time_delta ("BGC");
+
+//#ifdef WRITE_WATCH
+            //reset_write_watch (FALSE);
+//#endif //WRITE_WATCH
+
+            concurrent_print_time_delta ("RW");
+            background_mark_phase();
+            free_list_info (max_generation, "after mark phase");
+            
+            background_sweep();
+            free_list_info (max_generation, "after sweep phase");
+        }
+        else
+#endif //BACKGROUND_GC
+        {
+            mark_phase (n, FALSE);
+
+            GCScan::GcRuntimeStructuresValid (FALSE);
+            plan_phase (n);
+            GCScan::GcRuntimeStructuresValid (TRUE);
+        }
+    }
+
+    size_t end_gc_time = GetHighPrecisionTimeStamp();
+//    printf ("generation: %d, elapsed time: %Id\n", n,  end_gc_time - dd_time_clock (dynamic_data_of (0)));
+
+    //adjust the allocation size from the pinned quantities. 
+    for (int gen_number = 0; gen_number <= min (max_generation,n+1); gen_number++)
+    {
+        generation* gn = generation_of (gen_number);
+        if (settings.compaction)
+        {
+            generation_pinned_allocated (gn) += generation_pinned_allocation_compact_size (gn);
+            generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_compact_size (gn);
+        }
+        else
+        {
+            generation_pinned_allocated (gn) += generation_pinned_allocation_sweep_size (gn);
+            generation_allocation_size (generation_of (gen_number)) += generation_pinned_allocation_sweep_size (gn);
+        }
+        generation_pinned_allocation_sweep_size (gn) = 0;
+        generation_pinned_allocation_compact_size (gn) = 0;
+    }
+
+#ifdef BACKGROUND_GC
+    if (settings.concurrent)
+    {
+        dynamic_data* dd = dynamic_data_of (n);
+        dd_gc_elapsed_time (dd) = end_gc_time - dd_time_clock (dd);
+
+        free_list_info (max_generation, "after computing new dynamic data");
+
+        gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+
+        for (int gen_number = 0; gen_number < max_generation; gen_number++)
+        {
+            dprintf (2, ("end of BGC: gen%d new_alloc: %Id", 
+                         gen_number, dd_desired_allocation (dynamic_data_of (gen_number))));
+            current_gc_data_per_heap->gen_data[gen_number].size_after = generation_size (gen_number);
+            current_gc_data_per_heap->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number));
+            current_gc_data_per_heap->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number));
+        }
+    }
+    else
+#endif //BACKGROUND_GC
+    {
+        free_list_info (max_generation, "end");
+        for (int gen_number = 0; gen_number <= n; gen_number++)
+        {
+            dynamic_data* dd = dynamic_data_of (gen_number);
+            dd_gc_elapsed_time (dd) = end_gc_time - dd_time_clock (dd);
+            compute_new_dynamic_data (gen_number);
+        }
+
+        if (n != max_generation)
+        {
+            int gen_num_for_data = ((n < (max_generation - 1)) ? (n + 1) : (max_generation + 1));
+            for (int gen_number = (n + 1); gen_number <= gen_num_for_data; gen_number++)
+            {
+                get_gc_data_per_heap()->gen_data[gen_number].size_after = generation_size (gen_number);
+                get_gc_data_per_heap()->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number));
+                get_gc_data_per_heap()->gen_data[gen_number].free_obj_space_after = generation_free_obj_space (generation_of (gen_number));
+            }
+        }
+
+        get_gc_data_per_heap()->maxgen_size_info.running_free_list_efficiency = (uint32_t)(generation_allocator_efficiency (generation_of (max_generation)) * 100);
+
+        free_list_info (max_generation, "after computing new dynamic data");
+        
+        if (heap_number == 0)
+        {
+            dprintf (GTC_LOG, ("GC#%d(gen%d) took %Idms", 
+                dd_collection_count (dynamic_data_of (0)), 
+                settings.condemned_generation,
+                dd_gc_elapsed_time (dynamic_data_of (0))));
+        }
+
+        for (int gen_number = 0; gen_number <= (max_generation + 1); gen_number++)
+        {
+            dprintf (2, ("end of FGC/NGC: gen%d new_alloc: %Id", 
+                         gen_number, dd_desired_allocation (dynamic_data_of (gen_number))));
+        }
+    }
+
+    if (n < max_generation)
+    {
+        compute_promoted_allocation (1 + n);
+
+        dynamic_data* dd = dynamic_data_of (1 + n);
+        size_t new_fragmentation = generation_free_list_space (generation_of (1 + n)) + 
+                                   generation_free_obj_space (generation_of (1 + n));
+
+#ifdef BACKGROUND_GC
+        if (current_c_gc_state != c_gc_state_planning)
+#endif //BACKGROUND_GC
+        {
+            if (settings.promotion)
+            {
+                dd_fragmentation (dd) = new_fragmentation;
+            }
+            else
+            {
+                //assert (dd_fragmentation (dd) == new_fragmentation);
+            }
+        }
+    }
+
+#ifdef BACKGROUND_GC
+    if (!settings.concurrent)
+#endif //BACKGROUND_GC
+    {
+        adjust_ephemeral_limits(!!IsGCThread());
+    }
+
+#ifdef BACKGROUND_GC
+    assert (ephemeral_low == generation_allocation_start (generation_of ( max_generation -1)));
+    assert (ephemeral_high == heap_segment_reserved (ephemeral_heap_segment));
+#endif //BACKGROUND_GC
+
+    if (fgn_maxgen_percent)
+    {
+        if (settings.condemned_generation == (max_generation - 1))
+        {
+            check_for_full_gc (max_generation - 1, 0);
+        }
+        else if (settings.condemned_generation == max_generation)
+        {
+            if (full_gc_approach_event_set 
+#ifdef MULTIPLE_HEAPS
+                && (heap_number == 0)
+#endif //MULTIPLE_HEAPS
+                )
+            {
+                dprintf (2, ("FGN-GC: setting gen2 end event"));
+
+                full_gc_approach_event.Reset();
+#ifdef BACKGROUND_GC
+                // By definition WaitForFullGCComplete only succeeds if it's full, *blocking* GC, otherwise need to return N/A
+                fgn_last_gc_was_concurrent = settings.concurrent ? TRUE : FALSE;
+#endif //BACKGROUND_GC
+                full_gc_end_event.Set();
+                full_gc_approach_event_set = false;            
+            }
+        }
+    }
+
+#ifdef BACKGROUND_GC
+    if (!settings.concurrent)
+#endif //BACKGROUND_GC
+    {
+        //decide on the next allocation quantum
+        if (alloc_contexts_used >= 1)
+        {
+            allocation_quantum = Align (min ((size_t)CLR_SIZE,
+                                            (size_t)max (1024, get_new_allocation (0) / (2 * alloc_contexts_used))),
+                                            get_alignment_constant(FALSE));
+            dprintf (3, ("New allocation quantum: %d(0x%Ix)", allocation_quantum, allocation_quantum));
+        }
+    }
+#ifdef NO_WRITE_BARRIER
+    reset_write_watch(FALSE);
+#endif //NO_WRITE_BARRIER
+
+    descr_generations (FALSE);
+    descr_card_table();
+
+    verify_soh_segment_list();
+
+#ifdef BACKGROUND_GC
+    add_to_history_per_heap();
+    if (heap_number == 0)
+    {
+        add_to_history();
+    }
+#endif // BACKGROUND_GC
+
+#ifdef GC_STATS
+    if (GCStatistics::Enabled() && heap_number == 0)
+        g_GCStatistics.AddGCStats(settings, 
+            dd_gc_elapsed_time(dynamic_data_of(settings.condemned_generation)));
+#endif // GC_STATS
+
+#ifdef TIME_GC
+    fprintf (stdout, "%d,%d,%d,%d,%d,%d\n",
+             n, mark_time, plan_time, reloc_time, compact_time, sweep_time);
+#endif //TIME_GC
+
+#ifdef BACKGROUND_GC
+    assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
+#endif //BACKGROUND_GC
+
+#if defined(VERIFY_HEAP) || (defined (FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC))
+    if (FALSE 
+#ifdef VERIFY_HEAP
+        // Note that right now g_pConfig->GetHeapVerifyLevel always returns the same
+        // value. If we ever allow randomly adjusting this as the process runs,
+        // we cannot call it this way as joins need to match - we must have the same
+        // value for all heaps like we do with bgc_heap_walk_for_etw_p.
+        || (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+#endif
+#if defined(FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC)
+        || (bgc_heap_walk_for_etw_p && settings.concurrent)
+#endif
+        )
+    {
+#ifdef BACKGROUND_GC
+        Thread* current_thread = GetThread();
+        BOOL cooperative_mode = TRUE;
+
+        if (settings.concurrent)
+        {
+            cooperative_mode = enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+            bgc_t_join.join(this, gc_join_suspend_ee_verify);
+            if (bgc_t_join.joined())
+            {
+                bgc_threads_sync_event.Reset();
+
+                dprintf(2, ("Joining BGC threads to suspend EE for verify heap"));
+                bgc_t_join.restart();
+            }
+            if (heap_number == 0)
+            {
+                suspend_EE();
+                bgc_threads_sync_event.Set();
+            }
+            else
+            {
+                bgc_threads_sync_event.Wait(INFINITE, FALSE);
+                dprintf (2, ("bgc_threads_sync_event is signalled"));
+            }
+#else
+            suspend_EE();
+#endif //MULTIPLE_HEAPS
+
+            //fix the allocation area so verify_heap can proceed.
+            fix_allocation_contexts (FALSE);
+        }
+#endif //BACKGROUND_GC
+
+#ifdef BACKGROUND_GC
+        assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread()));
+#ifdef FEATURE_EVENT_TRACE
+        if (bgc_heap_walk_for_etw_p && settings.concurrent)
+        {
+            make_free_lists_for_profiler_for_bgc();
+        }
+#endif // FEATURE_EVENT_TRACE
+#endif //BACKGROUND_GC
+
+#ifdef VERIFY_HEAP
+        if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+            verify_heap (FALSE);
+#endif // VERIFY_HEAP
+
+#ifdef BACKGROUND_GC
+        if (settings.concurrent)
+        {
+            repair_allocation_contexts (TRUE);
+
+#ifdef MULTIPLE_HEAPS
+            bgc_t_join.join(this, gc_join_restart_ee_verify);
+            if (bgc_t_join.joined())
+            {
+                bgc_threads_sync_event.Reset();
+
+                dprintf(2, ("Joining BGC threads to restart EE after verify heap"));
+                bgc_t_join.restart();
+            }
+            if (heap_number == 0)
+            {
+                restart_EE();
+                bgc_threads_sync_event.Set();
+            }
+            else
+            {
+                bgc_threads_sync_event.Wait(INFINITE, FALSE);
+                dprintf (2, ("bgc_threads_sync_event is signalled"));
+            }
+#else
+            restart_EE();
+#endif //MULTIPLE_HEAPS
+
+            disable_preemptive (current_thread, cooperative_mode);
+        }
+#endif //BACKGROUND_GC
+    }
+#endif // defined(VERIFY_HEAP) || (defined(FEATURE_EVENT_TRACE) && defined(BACKGROUND_GC))
+
+#ifdef MULTIPLE_HEAPS
+    if (!settings.concurrent)
+    {
+        gc_t_join.join(this, gc_join_done);
+        if (gc_t_join.joined ())
+        {
+            gc_heap::internal_gc_done = false;
+
+            //equalize the new desired size of the generations
+            int limit = settings.condemned_generation;
+            if (limit == max_generation)
+            {
+                limit = max_generation+1;
+            }
+            for (int gen = 0; gen <= limit; gen++)
+            {
+                size_t total_desired = 0;
+
+                for (int i = 0; i < gc_heap::n_heaps; i++)
+                {
+                    gc_heap* hp = gc_heap::g_heaps[i];
+                    dynamic_data* dd = hp->dynamic_data_of (gen);
+                    size_t temp_total_desired = total_desired + dd_desired_allocation (dd);
+                    if (temp_total_desired < total_desired)
+                    {
+                        // we overflowed.
+                        total_desired = (size_t)MAX_PTR;
+                        break;
+                    }
+                    total_desired = temp_total_desired;
+                }
+
+                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps,
+                                                    get_alignment_constant ((gen != (max_generation+1))));
+
+                if (gen == 0)
+                {
+#if 1 //subsumed by the linear allocation model 
+                    // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
+                    // apply some smoothing.
+                    static size_t smoothed_desired_per_heap = 0;
+                    size_t smoothing = 3; // exponential smoothing factor
+                    if (smoothing  > VolatileLoad(&settings.gc_index))
+                        smoothing  = VolatileLoad(&settings.gc_index);
+                    smoothed_desired_per_heap = desired_per_heap / smoothing + ((smoothed_desired_per_heap / smoothing) * (smoothing-1));
+                    dprintf (1, ("sn = %Id  n = %Id", smoothed_desired_per_heap, desired_per_heap));
+                    desired_per_heap = Align(smoothed_desired_per_heap, get_alignment_constant (true));
+#endif //0
+
+                    // if desired_per_heap is close to min_gc_size, trim it
+                    // down to min_gc_size to stay in the cache
+                    gc_heap* hp = gc_heap::g_heaps[0];
+                    dynamic_data* dd = hp->dynamic_data_of (gen);
+                    size_t min_gc_size = dd_min_gc_size(dd);
+                    // if min GC size larger than true on die cache, then don't bother
+                    // limiting the desired size
+                    if ((min_gc_size <= GCToOSInterface::GetLargestOnDieCacheSize(TRUE) / GCToOSInterface::GetLogicalCpuCount()) &&
+                        desired_per_heap <= 2*min_gc_size)
+                    {
+                        desired_per_heap = min_gc_size;
+                    }
+#ifdef BIT64
+                    desired_per_heap = joined_youngest_desired (desired_per_heap);
+                    dprintf (2, ("final gen0 new_alloc: %Id", desired_per_heap));
+#endif // BIT64
+
+                    gc_data_global.final_youngest_desired = desired_per_heap;
+                }
+#if 1 //subsumed by the linear allocation model 
+                if (gen == (max_generation + 1))
+                {
+                    // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
+                    // apply some smoothing.
+                    static size_t smoothed_desired_per_heap_loh = 0;
+                    size_t smoothing = 3; // exponential smoothing factor
+                    size_t loh_count = dd_collection_count (dynamic_data_of (max_generation));
+                    if (smoothing  > loh_count)
+                        smoothing  = loh_count;
+                    smoothed_desired_per_heap_loh = desired_per_heap / smoothing + ((smoothed_desired_per_heap_loh / smoothing) * (smoothing-1));
+                    dprintf( 2, ("smoothed_desired_per_heap_loh  = %Id  desired_per_heap = %Id", smoothed_desired_per_heap_loh, desired_per_heap));
+                    desired_per_heap = Align(smoothed_desired_per_heap_loh, get_alignment_constant (false));
+                }
+#endif //0
+                for (int i = 0; i < gc_heap::n_heaps; i++)
+                {
+                    gc_heap* hp = gc_heap::g_heaps[i];
+                    dynamic_data* dd = hp->dynamic_data_of (gen);
+                    dd_desired_allocation (dd) = desired_per_heap;
+                    dd_gc_new_allocation (dd) = desired_per_heap;
+                    dd_new_allocation (dd) = desired_per_heap;
+
+                    if (gen == 0)
+                    {
+                        hp->fgn_last_alloc = desired_per_heap;
+                    }
+                }
+            }
+
+#ifdef FEATURE_LOH_COMPACTION
+            BOOL all_heaps_compacted_p = TRUE;
+#endif //FEATURE_LOH_COMPACTION
+            for (int i = 0; i < gc_heap::n_heaps; i++)
+            {
+                gc_heap* hp = gc_heap::g_heaps[i];
+                hp->decommit_ephemeral_segment_pages();
+                hp->rearrange_large_heap_segments();
+#ifdef FEATURE_LOH_COMPACTION
+                all_heaps_compacted_p &= hp->loh_compacted_p;
+#endif //FEATURE_LOH_COMPACTION
+            }
+
+#ifdef FEATURE_LOH_COMPACTION
+            check_loh_compact_mode (all_heaps_compacted_p);
+#endif //FEATURE_LOH_COMPACTION
+
+            fire_pevents();
+
+            gc_t_join.restart();
+        }
+        alloc_context_count = 0;
+        heap_select::mark_heap (heap_number);
+    }
+
+#else
+    gc_data_global.final_youngest_desired = 
+        dd_desired_allocation (dynamic_data_of (0));
+
+    check_loh_compact_mode (loh_compacted_p);
+
+    decommit_ephemeral_segment_pages();
+    fire_pevents();
+
+    if (!(settings.concurrent))
+    {
+        rearrange_large_heap_segments();
+        do_post_gc();
+    }
+
+#ifdef BACKGROUND_GC
+    recover_bgc_settings();
+#endif //BACKGROUND_GC
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::save_data_for_no_gc()
+{
+    current_no_gc_region_info.saved_pause_mode = settings.pause_mode;
+#ifdef MULTIPLE_HEAPS
+    // This is to affect heap balancing. 
+    for (int i = 0; i < n_heaps; i++)
+    {
+        current_no_gc_region_info.saved_gen0_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (0));
+        dd_min_size (g_heaps[i]->dynamic_data_of (0)) = min_balance_threshold;
+        current_no_gc_region_info.saved_gen3_min_size = dd_min_size (g_heaps[i]->dynamic_data_of (max_generation + 1));
+        dd_min_size (g_heaps[i]->dynamic_data_of (max_generation + 1)) = 0;
+    }
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::restore_data_for_no_gc()
+{
+    gc_heap::settings.pause_mode = current_no_gc_region_info.saved_pause_mode;
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        dd_min_size (g_heaps[i]->dynamic_data_of (0)) = current_no_gc_region_info.saved_gen0_min_size;
+        dd_min_size (g_heaps[i]->dynamic_data_of (max_generation + 1)) = current_no_gc_region_info.saved_gen3_min_size;
+    }
+#endif //MULTIPLE_HEAPS
+}
+
+start_no_gc_region_status gc_heap::prepare_for_no_gc_region (uint64_t total_size,
+                                                             BOOL loh_size_known, 
+                                                             uint64_t loh_size,
+                                                             BOOL disallow_full_blocking)
+{
+    if (current_no_gc_region_info.started)
+    {
+        return start_no_gc_in_progress;
+    }
+
+    start_no_gc_region_status status = start_no_gc_success;
+
+    save_data_for_no_gc();
+    settings.pause_mode = pause_no_gc;
+    current_no_gc_region_info.start_status = start_no_gc_success;
+    
+    size_t allocation_no_gc_loh = 0;
+    size_t allocation_no_gc_soh = 0;
+    size_t size_per_heap = 0;
+
+    if (loh_size_known)
+    {
+        allocation_no_gc_loh = (size_t)loh_size;
+        allocation_no_gc_soh = (size_t)(total_size - loh_size);
+    }
+    else
+    {
+        allocation_no_gc_soh = (size_t)total_size;
+        allocation_no_gc_loh = (size_t)total_size;
+    }
+
+    size_t soh_segment_size = get_valid_segment_size();
+
+    int num_heaps = 1;
+#ifdef MULTIPLE_HEAPS
+    num_heaps = n_heaps;
+#endif //MULTIPLE_HEAPS
+    size_t total_allowed_soh_allocation = (soh_segment_size - OS_PAGE_SIZE) * num_heaps;
+
+    if (allocation_no_gc_soh > total_allowed_soh_allocation)
+    {
+        status = start_no_gc_too_large;
+        goto done;
+    }
+
+    if (disallow_full_blocking)
+        current_no_gc_region_info.minimal_gc_p = TRUE;
+
+    if (allocation_no_gc_soh != 0)
+    {
+        current_no_gc_region_info.soh_allocation_size = (size_t)((float)allocation_no_gc_soh * 1.05);
+        //current_no_gc_region_info.soh_allocation_size = allocation_no_gc_soh;
+        size_per_heap = current_no_gc_region_info.soh_allocation_size;
+#ifdef MULTIPLE_HEAPS
+        size_per_heap /= n_heaps;
+        for (int i = 0; i < n_heaps; i++)
+        {
+            // due to heap balancing we need to allow some room before we even look to balance to another heap.
+            g_heaps[i]->soh_allocation_no_gc = min (Align (size_per_heap + min_balance_threshold, get_alignment_constant (TRUE)), (soh_segment_size - OS_PAGE_SIZE));
+        }
+#else //MULTIPLE_HEAPS
+        soh_allocation_no_gc = min (Align (size_per_heap, get_alignment_constant (TRUE)), (soh_segment_size - OS_PAGE_SIZE));
+#endif //MULTIPLE_HEAPS
+    }
+
+    if (allocation_no_gc_loh != 0)
+    {
+        current_no_gc_region_info.loh_allocation_size = (size_t)((float)allocation_no_gc_loh * 1.05);
+        size_per_heap = current_no_gc_region_info.loh_allocation_size;
+#ifdef MULTIPLE_HEAPS
+        size_per_heap /= n_heaps;
+        for (int i = 0; i < n_heaps; i++)
+            g_heaps[i]->loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE));
+#else //MULTIPLE_HEAPS
+        loh_allocation_no_gc = Align (size_per_heap, get_alignment_constant (FALSE));
+#endif //MULTIPLE_HEAPS
+    }
+
+done:
+    if (status != start_no_gc_success)
+        restore_data_for_no_gc();
+    return status;
+}
+
+void gc_heap::handle_failure_for_no_gc()
+{
+    gc_heap::restore_data_for_no_gc();
+    // sets current_no_gc_region_info.started to FALSE here.
+    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));
+}
+
+start_no_gc_region_status gc_heap::get_start_no_gc_region_status()
+{
+    return current_no_gc_region_info.start_status;
+}
+
+void gc_heap::record_gcs_during_no_gc()
+{
+    if (current_no_gc_region_info.started)
+    {
+        current_no_gc_region_info.num_gcs++;
+        if (is_induced (settings.reason))
+            current_no_gc_region_info.num_gcs_induced++;
+    }
+}
+
+BOOL gc_heap::find_loh_free_for_no_gc()
+{
+    allocator* loh_allocator = generation_allocator (generation_of (max_generation + 1));
+    size_t sz_list = loh_allocator->first_bucket_size();
+    size_t size = loh_allocation_no_gc;
+    for (unsigned int a_l_idx = 0; a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++)
+    {
+        if ((size < sz_list) || (a_l_idx == (loh_allocator->number_of_buckets()-1)))
+        {
+            uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx);
+            while (free_list)
+            {
+                size_t free_list_size = unused_array_size(free_list);
+
+                if (free_list_size > loh_allocation_no_gc)
+                {
+                    dprintf (3, ("free item %Ix(%Id) for no gc", (size_t)free_list, free_list_size));
+                    return TRUE;
+                }
+
+                free_list = free_list_slot (free_list); 
+            }
+        }
+        sz_list = sz_list * 2;
+    }
+
+    return FALSE;
+}
+
+BOOL gc_heap::find_loh_space_for_no_gc()
+{
+    saved_loh_segment_no_gc = 0;
+
+    if (find_loh_free_for_no_gc())
+        return TRUE;
+
+    heap_segment* seg = generation_allocation_segment (generation_of (max_generation + 1));
+
+    while (seg)
+    {
+        size_t remaining = heap_segment_reserved (seg) - heap_segment_allocated (seg);
+        if (remaining >= loh_allocation_no_gc)
+        {
+            saved_loh_segment_no_gc = seg;
+            break;
+        }
+        seg = heap_segment_next (seg);
+    }
+
+    if (!saved_loh_segment_no_gc && current_no_gc_region_info.minimal_gc_p)
+    {
+        // If no full GC is allowed, we try to get a new seg right away.
+        saved_loh_segment_no_gc = get_segment_for_loh (get_large_seg_size (loh_allocation_no_gc)
+#ifdef MULTIPLE_HEAPS
+                                                      , this
+#endif //MULTIPLE_HEAPS
+                                                      );
+    }
+
+    return (saved_loh_segment_no_gc != 0);
+}
+
+BOOL gc_heap::loh_allocated_for_no_gc()
+{
+    if (!saved_loh_segment_no_gc)
+        return FALSE;
+
+    heap_segment* seg = generation_allocation_segment (generation_of (max_generation + 1));
+    do 
+    {
+        if (seg == saved_loh_segment_no_gc)
+        {
+            return FALSE;
+        }
+        seg = heap_segment_next (seg);
+    } while (seg);
+
+    return TRUE;
+}
+
+BOOL gc_heap::commit_loh_for_no_gc (heap_segment* seg)
+{
+    uint8_t* end_committed = heap_segment_allocated (seg) + loh_allocation_no_gc;
+    assert (end_committed <= heap_segment_reserved (seg));
+    return (grow_heap_segment (seg, end_committed));
+}
+
+void gc_heap::thread_no_gc_loh_segments()
+{
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* hp = g_heaps[i];
+        if (hp->loh_allocated_for_no_gc())
+        {
+            hp->thread_loh_segment (hp->saved_loh_segment_no_gc);
+            hp->saved_loh_segment_no_gc = 0;
+        }
+    }
+#else //MULTIPLE_HEAPS
+    if (loh_allocated_for_no_gc())
+    {
+        thread_loh_segment (saved_loh_segment_no_gc);
+        saved_loh_segment_no_gc = 0;
+    }
+#endif //MULTIPLE_HEAPS    
+}
+
+void gc_heap::set_loh_allocations_for_no_gc()
+{
+    if (current_no_gc_region_info.loh_allocation_size != 0)
+    {
+        dynamic_data* dd = dynamic_data_of (max_generation + 1);
+        dd_new_allocation (dd) = loh_allocation_no_gc;
+        dd_gc_new_allocation (dd) = dd_new_allocation (dd);
+    }
+}
+
+void gc_heap::set_soh_allocations_for_no_gc()
+{
+    if (current_no_gc_region_info.soh_allocation_size != 0)
+    {
+        dynamic_data* dd = dynamic_data_of (0);
+        dd_new_allocation (dd) = soh_allocation_no_gc;
+        dd_gc_new_allocation (dd) = dd_new_allocation (dd);
+#ifdef MULTIPLE_HEAPS
+        alloc_context_count = 0;
+#endif //MULTIPLE_HEAPS
+    }
+}
+
+void gc_heap::set_allocations_for_no_gc()
+{
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* hp = g_heaps[i];
+        hp->set_loh_allocations_for_no_gc();
+        hp->set_soh_allocations_for_no_gc();
+    }
+#else //MULTIPLE_HEAPS
+    set_loh_allocations_for_no_gc();
+    set_soh_allocations_for_no_gc();
+#endif //MULTIPLE_HEAPS
+}
+
+BOOL gc_heap::should_proceed_for_no_gc()
+{
+    BOOL gc_requested = FALSE;
+    BOOL loh_full_gc_requested = FALSE;
+    BOOL soh_full_gc_requested = FALSE;
+    BOOL no_gc_requested = FALSE;
+    BOOL get_new_loh_segments = FALSE;
+
+    if (current_no_gc_region_info.soh_allocation_size)
+    {
+#ifdef MULTIPLE_HEAPS
+        for (int i = 0; i < n_heaps; i++)
+        {
+            gc_heap* hp = g_heaps[i];
+            if ((size_t)(heap_segment_reserved (hp->ephemeral_heap_segment) - hp->alloc_allocated) < hp->soh_allocation_no_gc)
+            {
+                gc_requested = TRUE;
+                break;
+            }
+        }
+#else //MULTIPLE_HEAPS
+        if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - alloc_allocated) < soh_allocation_no_gc)
+            gc_requested = TRUE;
+#endif //MULTIPLE_HEAPS
+
+        if (!gc_requested)
+        {
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                gc_heap* hp = g_heaps[i];
+                if (!(hp->grow_heap_segment (hp->ephemeral_heap_segment, (hp->alloc_allocated + hp->soh_allocation_no_gc))))
+                {
+                    soh_full_gc_requested = TRUE;
+                    break;
+                }
+            }
+#else //MULTIPLE_HEAPS
+            if (!grow_heap_segment (ephemeral_heap_segment, (alloc_allocated + soh_allocation_no_gc)))
+                soh_full_gc_requested = TRUE;
+#endif //MULTIPLE_HEAPS
+        }
+    }
+
+    if (!current_no_gc_region_info.minimal_gc_p && gc_requested)
+    {
+        soh_full_gc_requested = TRUE;
+    }
+
+    no_gc_requested = !(soh_full_gc_requested || gc_requested);
+
+    if (soh_full_gc_requested && current_no_gc_region_info.minimal_gc_p)
+    {
+        current_no_gc_region_info.start_status = start_no_gc_no_memory;
+        goto done;
+    }
+
+    if (!soh_full_gc_requested && current_no_gc_region_info.loh_allocation_size)
+    {
+        // Check to see if we have enough reserved space. 
+#ifdef MULTIPLE_HEAPS
+        for (int i = 0; i < n_heaps; i++)
+        {
+            gc_heap* hp = g_heaps[i];
+            if (!hp->find_loh_space_for_no_gc())
+            {
+                loh_full_gc_requested = TRUE;
+                break;
+            }
+        }
+#else //MULTIPLE_HEAPS
+        if (!find_loh_space_for_no_gc())
+            loh_full_gc_requested = TRUE;
+#endif //MULTIPLE_HEAPS
+
+        // Check to see if we have committed space.
+        if (!loh_full_gc_requested)
+        {
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                gc_heap* hp = g_heaps[i];
+                if (hp->saved_loh_segment_no_gc &&!hp->commit_loh_for_no_gc (hp->saved_loh_segment_no_gc))
+                {
+                    loh_full_gc_requested = TRUE;
+                    break;
+                }
+            }
+#else //MULTIPLE_HEAPS
+            if (saved_loh_segment_no_gc && !commit_loh_for_no_gc (saved_loh_segment_no_gc))
+                loh_full_gc_requested = TRUE;
+#endif //MULTIPLE_HEAPS
+        }
+    }
+
+    if (loh_full_gc_requested || soh_full_gc_requested)
+    {
+        if (current_no_gc_region_info.minimal_gc_p)
+            current_no_gc_region_info.start_status = start_no_gc_no_memory;
+    }
+
+    no_gc_requested = !(loh_full_gc_requested || soh_full_gc_requested || gc_requested);
+
+    if (current_no_gc_region_info.start_status == start_no_gc_success)
+    {
+        if (no_gc_requested)
+            set_allocations_for_no_gc();
+    }
+
+done:
+
+    if ((current_no_gc_region_info.start_status == start_no_gc_success) && !no_gc_requested)
+        return TRUE;
+    else
+    {
+        // We are done with starting the no gc region.
+        current_no_gc_region_info.started = TRUE;
+        return FALSE;
+    }
+}
+
+end_no_gc_region_status gc_heap::end_no_gc_region()
+{
+    dprintf (1, ("end no gc called"));
+
+    end_no_gc_region_status status = end_no_gc_success;
+
+    if (!(current_no_gc_region_info.started))
+        status = end_no_gc_not_in_progress;
+    if (current_no_gc_region_info.num_gcs_induced)
+        status = end_no_gc_induced;
+    else if (current_no_gc_region_info.num_gcs)
+        status = end_no_gc_alloc_exceeded;
+
+    if (settings.pause_mode == pause_no_gc)
+        restore_data_for_no_gc();
+
+    // sets current_no_gc_region_info.started to FALSE here.
+    memset (&current_no_gc_region_info, 0, sizeof (current_no_gc_region_info));
+
+    return status;
+}
+
+//update counters
+void gc_heap::update_collection_counts ()
+{
+    dynamic_data* dd0 = dynamic_data_of (0);
+    dd_gc_clock (dd0) += 1;
+
+    size_t now = GetHighPrecisionTimeStamp();
+
+    for (int i = 0; i <= settings.condemned_generation;i++)
+    {
+        dynamic_data* dd = dynamic_data_of (i);
+        dd_collection_count (dd)++;
+        //this is needed by the linear allocation model
+        if (i == max_generation)
+            dd_collection_count (dynamic_data_of (max_generation+1))++;
+        dd_gc_clock (dd) = dd_gc_clock (dd0);
+        dd_time_clock (dd) = now;
+    }
+}
+
+#ifdef HEAP_ANALYZE
+inline
+BOOL AnalyzeSurvivorsRequested(int condemnedGeneration)
+{
+    // Is the list active?
+    GcNotifications gn(g_pGcNotificationTable);
+    if (gn.IsActive())
+    {
+        GcEvtArgs gea = { GC_MARK_END, { (1<<condemnedGeneration) } };
+        if (gn.GetNotification(gea) != 0)
+        {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+void DACNotifyGcMarkEnd(int condemnedGeneration)
+{
+    // Is the list active?
+    GcNotifications gn(g_pGcNotificationTable);
+    if (gn.IsActive())
+    {
+        GcEvtArgs gea = { GC_MARK_END, { (1<<condemnedGeneration) } };
+        if (gn.GetNotification(gea) != 0)
+        {
+            DACNotify::DoGCNotification(gea);
+        }
+    }
+}
+#endif // HEAP_ANALYZE
+
+BOOL gc_heap::expand_soh_with_minimal_gc()
+{
+    if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) >= soh_allocation_no_gc)
+        return TRUE;
+
+    heap_segment* new_seg = soh_get_segment_to_expand();
+    if (new_seg)
+    {
+        settings.promotion = TRUE;
+        settings.demotion = FALSE;
+        ephemeral_promotion = TRUE;
+        save_ephemeral_generation_starts();
+        size_t ephemeral_size = (heap_segment_allocated (ephemeral_heap_segment) - 
+                                generation_allocation_start (generation_of (max_generation - 1)));
+        heap_segment_next (ephemeral_heap_segment) = new_seg;
+        ephemeral_heap_segment = new_seg;
+        uint8_t*  start = heap_segment_mem (ephemeral_heap_segment);
+
+        for (int i = (max_generation - 1); i >= 0; i--)
+        {
+            generation* gen = generation_of (i);
+            size_t gen_start_size = Align (min_obj_size);
+            make_generation (generation_table[i], ephemeral_heap_segment, start, 0);
+            generation_plan_allocation_start (gen) = start;
+            generation_plan_allocation_start_size (gen) = gen_start_size;
+            start += gen_start_size;
+        }
+        heap_segment_used (ephemeral_heap_segment) = start - plug_skew;
+        heap_segment_plan_allocated (ephemeral_heap_segment) = start;
+
+        fix_generation_bounds ((max_generation - 1), generation_of (0));
+
+        dd_gc_new_allocation (dynamic_data_of (max_generation)) -= ephemeral_size;
+        dd_new_allocation (dynamic_data_of (max_generation)) = dd_gc_new_allocation (dynamic_data_of (max_generation));
+
+        adjust_ephemeral_limits(!!IsGCThread());
+        return TRUE;
+    }
+    else
+        return FALSE;
+}
+
+void gc_heap::allocate_for_no_gc_after_gc()
+{
+    if (current_no_gc_region_info.minimal_gc_p)
+        repair_allocation_contexts (TRUE);
+
+    if (current_no_gc_region_info.start_status != start_no_gc_no_memory)
+    {
+        if (current_no_gc_region_info.soh_allocation_size != 0)
+        {
+            if (((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)) < soh_allocation_no_gc) ||
+                (!grow_heap_segment (ephemeral_heap_segment, (heap_segment_allocated (ephemeral_heap_segment) + soh_allocation_no_gc))))
+            {
+                current_no_gc_region_info.start_status = start_no_gc_no_memory;
+            }
+
+#ifdef MULTIPLE_HEAPS
+            if (!current_no_gc_region_info.minimal_gc_p &&
+                (current_no_gc_region_info.loh_allocation_size != 0))
+            {
+                gc_t_join.join(this, gc_join_after_commit_soh_no_gc);
+                if (gc_t_join.joined())
+                {
+                    gc_t_join.restart();
+                }
+            }
+#endif //MULTIPLE_HEAPS
+        }
+
+        if ((current_no_gc_region_info.start_status == start_no_gc_success) &&
+            !(current_no_gc_region_info.minimal_gc_p) && 
+            (current_no_gc_region_info.loh_allocation_size != 0))
+        {
+            gc_policy = policy_compact;
+            saved_loh_segment_no_gc = 0;
+
+            if (!find_loh_free_for_no_gc())
+            {
+                heap_segment* seg = generation_allocation_segment (generation_of (max_generation + 1));
+                BOOL found_seg_p = FALSE;
+                while (seg)
+                {
+                    if ((size_t)(heap_segment_reserved (seg) - heap_segment_allocated (seg)) >= loh_allocation_no_gc)
+                    {
+                        found_seg_p = TRUE;
+                        if (!commit_loh_for_no_gc (seg))
+                        {
+                            current_no_gc_region_info.start_status = start_no_gc_no_memory;
+                            break;
+                        }
+                    }
+                    seg = heap_segment_next (seg);
+                }
+
+                if (!found_seg_p)
+                    gc_policy = policy_expand;
+            }
+
+#ifdef MULTIPLE_HEAPS
+            gc_t_join.join(this, gc_join_expand_loh_no_gc);
+            if (gc_t_join.joined())
+            {
+                for (int i = 0; i < n_heaps; i++)
+                {
+                    gc_heap* hp = g_heaps[i];
+                    if (hp->gc_policy == policy_expand)
+                    {
+                        hp->saved_loh_segment_no_gc = get_segment_for_loh (get_large_seg_size (loh_allocation_no_gc), hp);
+                        if (!(hp->saved_loh_segment_no_gc))
+                            current_no_gc_region_info.start_status = start_no_gc_no_memory;
+                    }
+                }
+                gc_t_join.restart();
+            }
+#else //MULTIPLE_HEAPS
+            if (gc_policy == policy_expand)
+            {
+                saved_loh_segment_no_gc = get_segment_for_loh (get_large_seg_size (loh_allocation_no_gc));
+                if (!saved_loh_segment_no_gc)
+                    current_no_gc_region_info.start_status = start_no_gc_no_memory;
+            }
+#endif //MULTIPLE_HEAPS
+
+            if ((current_no_gc_region_info.start_status == start_no_gc_success) && saved_loh_segment_no_gc)
+            {
+                if (!commit_loh_for_no_gc (saved_loh_segment_no_gc))
+                {
+                    current_no_gc_region_info.start_status = start_no_gc_no_memory;
+                }                
+            }
+        }
+    }
+
+#ifdef MULTIPLE_HEAPS
+    gc_t_join.join(this, gc_join_final_no_gc);
+    if (gc_t_join.joined())
+    {
+#endif //MULTIPLE_HEAPS
+        if (current_no_gc_region_info.start_status == start_no_gc_success)
+        {
+            set_allocations_for_no_gc();
+            current_no_gc_region_info.started = TRUE;
+        }
+
+#ifdef MULTIPLE_HEAPS
+        gc_t_join.restart();
+    }
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::init_records()
+{
+    memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap));
+    gc_data_per_heap.heap_index = heap_number;
+    if (heap_number == 0)
+        memset (&gc_data_global, 0, sizeof (gc_data_global));
+
+#ifdef GC_CONFIG_DRIVEN
+    memset (interesting_data_per_gc, 0, sizeof (interesting_data_per_gc));
+#endif //GC_CONFIG_DRIVEN
+}
+
+int gc_heap::garbage_collect (int n)
+{
+    //reset the number of alloc contexts
+    alloc_contexts_used = 0;
+
+    fix_allocation_contexts (TRUE);
+#ifdef MULTIPLE_HEAPS
+    clear_gen0_bricks();
+#endif //MULTIPLE_HEAPS
+
+    if ((settings.pause_mode == pause_no_gc) && current_no_gc_region_info.minimal_gc_p)
+    {
+#ifdef MULTIPLE_HEAPS
+        gc_t_join.join(this, gc_join_minimal_gc);
+        if (gc_t_join.joined())
+        {
+#endif //MULTIPLE_HEAPS
+
+#ifdef MULTIPLE_HEAPS
+            // this is serialized because we need to get a segment
+            for (int i = 0; i < n_heaps; i++)
+            {
+                if (!(g_heaps[i]->expand_soh_with_minimal_gc()))
+                    current_no_gc_region_info.start_status = start_no_gc_no_memory;
+            }
+#else
+            if (!expand_soh_with_minimal_gc())
+                current_no_gc_region_info.start_status = start_no_gc_no_memory;
+#endif //MULTIPLE_HEAPS
+
+            update_collection_counts_for_no_gc();
+
+#ifdef MULTIPLE_HEAPS
+            gc_t_join.restart();
+        }
+#endif //MULTIPLE_HEAPS
+
+        goto done;
+    }
+
+    init_records();
+    memset (&fgm_result, 0, sizeof (fgm_result));
+
+    settings.reason = gc_trigger_reason;
+    verify_pinned_queue_p = FALSE;
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+        num_pinned_objects = 0;
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef STRESS_HEAP
+    if (settings.reason == reason_gcstress)
+    {
+        settings.reason = reason_induced;
+        settings.stress_induced = TRUE;
+    }
+#endif // STRESS_HEAP
+
+#ifdef MULTIPLE_HEAPS
+    //align all heaps on the max generation to condemn
+    dprintf (3, ("Joining for max generation to condemn"));
+    condemned_generation_num = generation_to_condemn (n, 
+                                                      &blocking_collection, 
+                                                      &elevation_requested, 
+                                                      FALSE);
+    gc_t_join.join(this, gc_join_generation_determined);
+    if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+#ifdef TRACE_GC
+        int gc_count = (int)dd_collection_count (dynamic_data_of (0));
+        if (gc_count >= g_pConfig->GetGCtraceStart())
+            trace_gc = 1;
+        if (gc_count >=  g_pConfig->GetGCtraceEnd())
+            trace_gc = 0;
+#endif //TRACE_GC
+
+#ifdef MULTIPLE_HEAPS
+#if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE)
+        //delete old slots from the segment table
+        seg_table->delete_old_slots();
+#endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE
+        for (int i = 0; i < n_heaps; i++)
+        {
+            //copy the card and brick tables
+            if (g_card_table != g_heaps[i]->card_table)
+            {
+                g_heaps[i]->copy_brick_card_table();
+            }
+
+            g_heaps[i]->rearrange_large_heap_segments();
+            if (!recursive_gc_sync::background_running_p())
+            {
+                g_heaps[i]->rearrange_small_heap_segments();
+            }
+        }
+#else //MULTIPLE_HEAPS
+#ifdef BACKGROUND_GC
+            //delete old slots from the segment table
+#if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE)
+            seg_table->delete_old_slots();
+#endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE
+            rearrange_large_heap_segments();
+            if (!recursive_gc_sync::background_running_p())
+            {
+                rearrange_small_heap_segments();
+            }
+#endif //BACKGROUND_GC
+        // check for card table growth
+        if (g_card_table != card_table)
+            copy_brick_card_table();
+
+#endif //MULTIPLE_HEAPS
+
+    BOOL should_evaluate_elevation = FALSE;
+    BOOL should_do_blocking_collection = FALSE;
+
+#ifdef MULTIPLE_HEAPS
+    int gen_max = condemned_generation_num;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        if (gen_max < g_heaps[i]->condemned_generation_num)
+            gen_max = g_heaps[i]->condemned_generation_num;
+        if ((!should_evaluate_elevation) && (g_heaps[i]->elevation_requested))
+            should_evaluate_elevation = TRUE;
+        if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection))
+            should_do_blocking_collection = TRUE;
+    }
+
+    settings.condemned_generation = gen_max;
+//logically continues after GC_PROFILING.
+#else //MULTIPLE_HEAPS
+    settings.condemned_generation = generation_to_condemn (n, 
+                                                           &blocking_collection, 
+                                                           &elevation_requested, 
+                                                           FALSE);
+    should_evaluate_elevation = elevation_requested;
+    should_do_blocking_collection = blocking_collection;
+#endif //MULTIPLE_HEAPS
+
+    settings.condemned_generation = joined_generation_to_condemn (
+                                        should_evaluate_elevation, 
+                                        settings.condemned_generation,
+                                        &should_do_blocking_collection
+                                        STRESS_HEAP_ARG(n)
+                                        );
+
+    STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, 
+            "condemned generation num: %d\n", settings.condemned_generation);
+
+    record_gcs_during_no_gc();
+
+    if (settings.condemned_generation > 1)
+        settings.promotion = TRUE;
+
+#ifdef HEAP_ANALYZE
+    // At this point we've decided what generation is condemned
+    // See if we've been requested to analyze survivors after the mark phase
+    if (AnalyzeSurvivorsRequested(settings.condemned_generation))
+    {
+        heap_analyze_enabled = TRUE;
+    }
+#endif // HEAP_ANALYZE
+
+#ifdef GC_PROFILING
+
+        // If we're tracking GCs, then we need to walk the first generation
+        // before collection to track how many items of each class has been
+        // allocated.
+        UpdateGenerationBounds();
+        GarbageCollectionStartedCallback(settings.condemned_generation, settings.reason == reason_induced);
+        {
+            BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+            size_t profiling_context = 0;
+
+#ifdef MULTIPLE_HEAPS
+            int hn = 0;
+            for (hn = 0; hn < gc_heap::n_heaps; hn++)
+            {
+                gc_heap* hp = gc_heap::g_heaps [hn];
+
+                // When we're walking objects allocated by class, then we don't want to walk the large
+                // object heap because then it would count things that may have been around for a while.
+                hp->walk_heap (&AllocByClassHelper, (void *)&profiling_context, 0, FALSE);
+            }
+#else
+            // When we're walking objects allocated by class, then we don't want to walk the large
+            // object heap because then it would count things that may have been around for a while.
+            gc_heap::walk_heap (&AllocByClassHelper, (void *)&profiling_context, 0, FALSE);
+#endif //MULTIPLE_HEAPS
+
+            // Notify that we've reached the end of the Gen 0 scan
+            g_profControlBlock.pProfInterface->EndAllocByClass(&profiling_context);
+            END_PIN_PROFILER();
+        }
+
+#endif // GC_PROFILING
+
+#ifdef BACKGROUND_GC
+        if ((settings.condemned_generation == max_generation) &&
+            (recursive_gc_sync::background_running_p()))
+        {
+            //TODO BACKGROUND_GC If we just wait for the end of gc, it won't woork
+            // because we have to collect 0 and 1 properly
+            // in particular, the allocation contexts are gone.
+            // For now, it is simpler to collect max_generation-1
+            settings.condemned_generation = max_generation - 1;
+            dprintf (GTC_LOG, ("bgc - 1 instead of 2"));
+        }
+
+        if ((settings.condemned_generation == max_generation) &&
+            (should_do_blocking_collection == FALSE) &&
+            gc_can_use_concurrent &&
+            !temp_disable_concurrent_p &&                 
+            ((settings.pause_mode == pause_interactive) || (settings.pause_mode == pause_sustained_low_latency)))
+        {
+            keep_bgc_threads_p = TRUE;
+            c_write (settings.concurrent,  TRUE);
+        }
+#endif //BACKGROUND_GC
+
+        settings.gc_index = (uint32_t)dd_collection_count (dynamic_data_of (0)) + 1;
+
+        // Call the EE for start of GC work
+        // just one thread for MP GC
+        GCToEEInterface::GcStartWork (settings.condemned_generation,
+                                 max_generation);            
+
+        // TODO: we could fire an ETW event to say this GC as a concurrent GC but later on due to not being able to
+        // create threads or whatever, this could be a non concurrent GC. Maybe for concurrent GC we should fire
+        // it in do_background_gc and if it failed to be a CGC we fire it in gc1... in other words, this should be
+        // fired in gc1.
+        do_pre_gc();
+
+#ifdef MULTIPLE_HEAPS
+        gc_start_event.Reset();
+        //start all threads on the roots.
+        dprintf(3, ("Starting all gc threads for gc"));
+        gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    {
+        int gen_num_for_data = max_generation + 1;
+        for (int i = 0; i <= gen_num_for_data; i++)
+        {
+            gc_data_per_heap.gen_data[i].size_before = generation_size (i);
+            generation* gen = generation_of (i);
+            gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
+            gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
+        }
+    }
+    descr_generations (TRUE);
+//    descr_card_table();
+
+#ifdef NO_WRITE_BARRIER
+    fix_card_table();
+#endif //NO_WRITE_BARRIER
+
+#ifdef VERIFY_HEAP
+    if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC) &&
+       !(g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_POST_GC_ONLY))
+    {
+        verify_heap (TRUE);
+    }
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK)
+        checkGCWriteBarrier();
+
+#endif // VERIFY_HEAP
+
+#ifdef BACKGROUND_GC
+    if (settings.concurrent)
+    {
+        // We need to save the settings because we'll need to restore it after each FGC.
+        assert (settings.condemned_generation == max_generation);
+        settings.compaction = FALSE;
+        saved_bgc_settings = settings;
+
+#ifdef MULTIPLE_HEAPS
+        if (heap_number == 0)
+        {
+            for (int i = 0; i < n_heaps; i++)
+            {
+                prepare_bgc_thread (g_heaps[i]);
+            }
+            dprintf (2, ("setting bgc_threads_sync_event"));
+            bgc_threads_sync_event.Set();
+        }
+        else
+        {
+            bgc_threads_sync_event.Wait(INFINITE, FALSE);
+            dprintf (2, ("bgc_threads_sync_event is signalled"));
+        }
+#else
+        prepare_bgc_thread(0);
+#endif //MULTIPLE_HEAPS
+
+#ifdef MULTIPLE_HEAPS
+        gc_t_join.join(this, gc_join_start_bgc);
+        if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+            do_concurrent_p = TRUE;
+            do_ephemeral_gc_p = FALSE;
+#ifdef MULTIPLE_HEAPS
+            dprintf(2, ("Joined to perform a background GC"));
+
+            for (int i = 0; i < n_heaps; i++)
+            {
+                gc_heap* hp = g_heaps[i];
+                if (!(hp->bgc_thread) || !hp->commit_mark_array_bgc_init (hp->mark_array))
+                {
+                    do_concurrent_p = FALSE;
+                    break;
+                }
+                else
+                {
+                    hp->background_saved_lowest_address = hp->lowest_address;
+                    hp->background_saved_highest_address = hp->highest_address;
+                }
+            }
+#else
+            do_concurrent_p = (!!bgc_thread && commit_mark_array_bgc_init (mark_array));
+            if (do_concurrent_p)
+            {
+                background_saved_lowest_address = lowest_address;
+                background_saved_highest_address = highest_address;
+            }
+#endif //MULTIPLE_HEAPS
+
+            if (do_concurrent_p)
+            {
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                SoftwareWriteWatch::EnableForGCHeap();
+#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+#ifdef MULTIPLE_HEAPS
+                for (int i = 0; i < n_heaps; i++)
+                    g_heaps[i]->current_bgc_state = bgc_initialized;
+#else
+                current_bgc_state = bgc_initialized;
+#endif //MULTIPLE_HEAPS
+
+                int gen = check_for_ephemeral_alloc();
+                // always do a gen1 GC before we start BGC. 
+                // This is temporary for testing purpose.
+                //int gen = max_generation - 1;
+                dont_restart_ee_p = TRUE;
+                if (gen == -1)
+                {
+                    // If we decide to not do a GC before the BGC we need to 
+                    // restore the gen0 alloc context.
+#ifdef MULTIPLE_HEAPS
+                    for (int i = 0; i < n_heaps; i++)
+                    {
+                        generation_allocation_pointer (g_heaps[i]->generation_of (0)) =  0;
+                        generation_allocation_limit (g_heaps[i]->generation_of (0)) = 0;
+                    }
+#else
+                    generation_allocation_pointer (youngest_generation) =  0;
+                    generation_allocation_limit (youngest_generation) = 0;
+#endif //MULTIPLE_HEAPS
+                }
+                else
+                {
+                    do_ephemeral_gc_p = TRUE;
+
+                    settings.init_mechanisms();
+                    settings.condemned_generation = gen;
+                    settings.gc_index = (size_t)dd_collection_count (dynamic_data_of (0)) + 2;
+                    do_pre_gc();
+
+                    // TODO BACKGROUND_GC need to add the profiling stuff here.
+                    dprintf (GTC_LOG, ("doing gen%d before doing a bgc", gen));
+                }
+
+                //clear the cards so they don't bleed in gen 1 during collection
+                // shouldn't this always be done at the beginning of any GC?
+                //clear_card_for_addresses (
+                //    generation_allocation_start (generation_of (0)),
+                //    heap_segment_allocated (ephemeral_heap_segment));
+
+                if (!do_ephemeral_gc_p)
+                {
+                    do_background_gc();
+                }
+            }
+            else
+            {
+                settings.compaction = TRUE;
+                c_write (settings.concurrent, FALSE);
+            }
+
+#ifdef MULTIPLE_HEAPS
+            gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+
+        if (do_concurrent_p)
+        {
+            // At this point we are sure we'll be starting a BGC, so save its per heap data here.
+            // global data is only calculated at the end of the GC so we don't need to worry about
+            // FGCs overwriting it.
+            memset (&bgc_data_per_heap, 0, sizeof (bgc_data_per_heap));
+            memcpy (&bgc_data_per_heap, &gc_data_per_heap, sizeof(gc_data_per_heap));
+
+            if (do_ephemeral_gc_p)
+            {
+                dprintf (2, ("GC threads running, doing gen%d GC", settings.condemned_generation));
+
+                gen_to_condemn_reasons.init();
+                gen_to_condemn_reasons.set_condition (gen_before_bgc);
+                gc_data_per_heap.gen_to_condemn_reasons.init (&gen_to_condemn_reasons);
+                gc1();
+#ifdef MULTIPLE_HEAPS
+                gc_t_join.join(this, gc_join_bgc_after_ephemeral);
+                if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+                {
+#ifdef MULTIPLE_HEAPS
+                    do_post_gc();
+#endif //MULTIPLE_HEAPS
+                    settings = saved_bgc_settings;
+                    assert (settings.concurrent);
+
+                    do_background_gc();
+
+#ifdef MULTIPLE_HEAPS
+                    gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+                }
+            }
+        }
+        else
+        {
+            dprintf (2, ("couldn't create BGC threads, reverting to doing a blocking GC"));
+            gc1();
+        }
+    }
+    else
+#endif //BACKGROUND_GC
+    {
+        gc1();
+    }
+#ifndef MULTIPLE_HEAPS
+    allocation_running_time = (size_t)GCToOSInterface::GetLowPrecisionTimeStamp();
+    allocation_running_amount = dd_new_allocation (dynamic_data_of (0));
+    fgn_last_alloc = dd_new_allocation (dynamic_data_of (0));
+#endif //MULTIPLE_HEAPS
+
+done:
+    if (settings.pause_mode == pause_no_gc)
+        allocate_for_no_gc_after_gc();
+
+    int gn = settings.condemned_generation;
+    return gn;
+}
+
+#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos)
+
+inline
+size_t& gc_heap::promoted_bytes(int thread)
+{
+#ifdef MULTIPLE_HEAPS
+    return g_promoted [thread*16];
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(thread);
+    return g_promoted;
+#endif //MULTIPLE_HEAPS
+}
+
+#ifdef INTERIOR_POINTERS
+heap_segment* gc_heap::find_segment (uint8_t* interior, BOOL small_segment_only_p)
+{
+#ifdef SEG_MAPPING_TABLE
+    heap_segment* seg = seg_mapping_table_segment_of (interior);
+    if (seg)
+    {
+        if (small_segment_only_p && heap_segment_loh_p (seg))
+            return 0;
+    }
+    return seg;
+#else //SEG_MAPPING_TABLE
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* h = gc_heap::g_heaps [i];
+        hs = h->find_segment_per_heap (o, small_segment_only_p);
+        if (hs)
+        {
+            break;
+        }        
+    }
+#else
+    {
+        gc_heap* h = pGenGCHeap;
+        hs = h->find_segment_per_heap (o, small_segment_only_p);
+    }
+#endif //MULTIPLE_HEAPS
+#endif //SEG_MAPPING_TABLE
+}
+
+heap_segment* gc_heap::find_segment_per_heap (uint8_t* interior, BOOL small_segment_only_p)
+{
+#ifdef SEG_MAPPING_TABLE
+    return find_segment (interior, small_segment_only_p);
+#else //SEG_MAPPING_TABLE
+    if (in_range_for_segment (interior, ephemeral_heap_segment))
+    {
+        return ephemeral_heap_segment;
+    }
+    else
+    {
+        heap_segment* found_seg = 0;
+
+        {
+            heap_segment* seg = generation_start_segment (generation_of (max_generation));
+            do
+            {
+                if (in_range_for_segment (interior, seg))
+                {
+                    found_seg = seg;
+                    goto end_find_segment;
+                }
+
+            } while ((seg = heap_segment_next (seg)) != 0);
+        }
+        if (!small_segment_only_p)
+        {
+#ifdef BACKGROUND_GC
+            {
+                ptrdiff_t delta = 0;
+                heap_segment* seg = segment_of (interior, delta);
+                if (seg && in_range_for_segment (interior, seg))
+                {
+                    found_seg = seg;
+                }
+                goto end_find_segment;
+            }
+#else //BACKGROUND_GC
+            heap_segment* seg = generation_start_segment (generation_of (max_generation+1));
+            do
+            {
+                if (in_range_for_segment(interior, seg))
+                {
+                    found_seg = seg;
+                    goto end_find_segment;
+                }
+
+            } while ((seg = heap_segment_next (seg)) != 0);
+#endif //BACKGROUND_GC
+        }
+end_find_segment:
+
+        return found_seg;
+    }
+#endif //SEG_MAPPING_TABLE
+}
+#endif //INTERIOR_POINTERS
+
+#if !defined(_DEBUG) && !defined(__GNUC__)
+inline // This causes link errors if global optimization is off
+#endif //!_DEBUG && !__GNUC__
+gc_heap* gc_heap::heap_of (uint8_t* o)
+{
+#ifdef MULTIPLE_HEAPS
+    if (o == 0)
+        return g_heaps [0];
+#ifdef SEG_MAPPING_TABLE
+    gc_heap* hp = seg_mapping_table_heap_of (o);
+    return (hp ? hp : g_heaps[0]);
+#else //SEG_MAPPING_TABLE
+    ptrdiff_t delta = 0;
+    heap_segment* seg = segment_of (o, delta);
+    return (seg ? heap_segment_heap (seg) : g_heaps [0]);
+#endif //SEG_MAPPING_TABLE
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(o);
+    return __this;
+#endif //MULTIPLE_HEAPS
+}
+
+inline
+gc_heap* gc_heap::heap_of_gc (uint8_t* o)
+{
+#ifdef MULTIPLE_HEAPS
+    if (o == 0)
+        return g_heaps [0];
+#ifdef SEG_MAPPING_TABLE
+    gc_heap* hp = seg_mapping_table_heap_of_gc (o);
+    return (hp ? hp : g_heaps[0]);
+#else //SEG_MAPPING_TABLE
+    ptrdiff_t delta = 0;
+    heap_segment* seg = segment_of (o, delta);
+    return (seg ? heap_segment_heap (seg) : g_heaps [0]);
+#endif //SEG_MAPPING_TABLE
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(o);
+    return __this;
+#endif //MULTIPLE_HEAPS
+}
+
+#ifdef INTERIOR_POINTERS
+// will find all heap objects (large and small)
+uint8_t* gc_heap::find_object (uint8_t* interior, uint8_t* low)
+{
+    if (!gen0_bricks_cleared)
+    {
+#ifdef MULTIPLE_HEAPS
+        assert (!"Should have already been done in server GC");
+#endif //MULTIPLE_HEAPS
+        gen0_bricks_cleared = TRUE;
+        //initialize brick table for gen 0
+        for (size_t b = brick_of (generation_allocation_start (generation_of (0)));
+             b < brick_of (align_on_brick
+                           (heap_segment_allocated (ephemeral_heap_segment)));
+             b++)
+        {
+            set_brick (b, -1);
+        }
+    }
+#ifdef FFIND_OBJECT
+    //indicate that in the future this needs to be done during allocation
+#ifdef MULTIPLE_HEAPS
+    gen0_must_clear_bricks = FFIND_DECAY*gc_heap::n_heaps;
+#else
+    gen0_must_clear_bricks = FFIND_DECAY;
+#endif //MULTIPLE_HEAPS
+#endif //FFIND_OBJECT
+
+    int brick_entry = brick_table [brick_of (interior)];
+    if (brick_entry == 0)
+    {
+        // this is a pointer to a large object
+        heap_segment* seg = find_segment_per_heap (interior, FALSE);
+        if (seg
+#ifdef FEATURE_CONSERVATIVE_GC
+            && (!g_pConfig->GetGCConservative() || interior <= heap_segment_allocated(seg))
+#endif
+            )
+        {
+            // If interior falls within the first free object at the beginning of a generation,
+            // we don't have brick entry for it, and we may incorrectly treat it as on large object heap.
+            int align_const = get_alignment_constant (heap_segment_read_only_p (seg)
+#ifdef FEATURE_CONSERVATIVE_GC
+                                                       || (g_pConfig->GetGCConservative() && !heap_segment_loh_p (seg))
+#endif
+                                                      );
+            //int align_const = get_alignment_constant (heap_segment_read_only_p (seg));
+            assert (interior < heap_segment_allocated (seg));
+
+            uint8_t* o = heap_segment_mem (seg);
+            while (o < heap_segment_allocated (seg))
+            {
+                uint8_t* next_o = o + Align (size (o), align_const);
+                assert (next_o > o);
+                if ((o <= interior) && (interior < next_o))
+                return o;
+                o = next_o;
+            }
+            return 0;
+        }
+        else
+        {
+            return 0;
+        }
+    }
+    else if (interior >= low)
+    {
+        heap_segment* seg = find_segment_per_heap (interior, TRUE);
+        if (seg)
+        {
+#ifdef FEATURE_CONSERVATIVE_GC
+            if (interior >= heap_segment_allocated (seg))
+                return 0;
+#else
+            assert (interior < heap_segment_allocated (seg));
+#endif
+            uint8_t* o = find_first_object (interior, heap_segment_mem (seg));
+            return o;
+        }
+        else
+            return 0;
+    }
+    else
+        return 0;
+}
+
+uint8_t*
+gc_heap::find_object_for_relocation (uint8_t* interior, uint8_t* low, uint8_t* high)
+{
+    uint8_t* old_address = interior;
+    if (!((old_address >= low) && (old_address < high)))
+        return 0;
+    uint8_t* plug = 0;
+    size_t  brick = brick_of (old_address);
+    int    brick_entry =  brick_table [ brick ];
+    if (brick_entry != 0)
+    {
+    retry:
+        {
+            while (brick_entry < 0)
+            {
+                brick = (brick + brick_entry);
+                brick_entry =  brick_table [ brick ];
+            }
+            uint8_t* old_loc = old_address;
+            uint8_t* node = tree_search ((brick_address (brick) + brick_entry-1),
+                                      old_loc);
+            if (node <= old_loc)
+                plug = node;
+            else
+            {
+                brick = brick - 1;
+                brick_entry =  brick_table [ brick ];
+                goto retry;
+            }
+
+        }
+        assert (plug);
+        //find the object by going along the plug
+        uint8_t* o = plug;
+        while (o <= interior)
+        {
+            uint8_t* next_o = o + Align (size (o));
+            assert (next_o > o);
+            if (next_o > interior)
+            {
+                break;
+            }
+            o = next_o;
+        }
+        assert ((o <= interior) && ((o + Align (size (o))) > interior));
+        return o;
+    }
+    else
+    {
+        // this is a pointer to a large object
+        heap_segment* seg = find_segment_per_heap (interior, FALSE);
+        if (seg)
+        {
+            assert (interior < heap_segment_allocated (seg));
+
+            uint8_t* o = heap_segment_mem (seg);
+            while (o < heap_segment_allocated (seg))
+            {
+                uint8_t* next_o = o + Align (size (o));
+                assert (next_o > o);
+                if ((o < interior) && (interior < next_o))
+                return o;
+                o = next_o;
+            }
+            return 0;
+        }
+        else
+            {
+            return 0;
+        }
+    }
+}
+#else //INTERIOR_POINTERS
+inline
+uint8_t* gc_heap::find_object (uint8_t* o, uint8_t* low)
+{
+    return o;
+}
+#endif //INTERIOR_POINTERS
+
+#ifdef MARK_LIST
+#ifdef GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;}
+#else
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;}
+#endif //GC_CONFIG_DRIVEN
+#else //MARK_LIST
+#define m_boundary(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}
+#endif //MARK_LIST
+
+#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}
+
+#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable()
+
+inline
+BOOL gc_heap::gc_mark1 (uint8_t* o)
+{
+    BOOL marked = !marked (o);
+    set_marked (o);
+    dprintf (3, ("*%Ix*, newly marked: %d", (size_t)o, marked));
+    return marked;
+}
+
+inline
+BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high)
+{
+    BOOL marked = FALSE;
+    if ((o >= low) && (o < high))
+        marked = gc_mark1 (o);
+#ifdef MULTIPLE_HEAPS
+    else if (o)
+    {
+        //find the heap
+        gc_heap* hp = heap_of_gc (o);
+        assert (hp);
+        if ((o >= hp->gc_low) && (o < hp->gc_high))
+            marked = gc_mark1 (o);
+    }
+#ifdef SNOOP_STATS
+    snoop_stat.objects_checked_count++;
+
+    if (marked)
+    {
+        snoop_stat.objects_marked_count++;
+    }
+    if (!o)
+    {
+        snoop_stat.zero_ref_count++;
+    }
+
+#endif //SNOOP_STATS
+#endif //MULTIPLE_HEAPS
+    return marked;
+}
+
+#ifdef BACKGROUND_GC
+
+inline
+BOOL gc_heap::background_marked (uint8_t* o)
+{
+    return mark_array_marked (o);
+}
+inline
+BOOL gc_heap::background_mark1 (uint8_t* o)
+{
+    BOOL to_mark = !mark_array_marked (o);
+
+    dprintf (3, ("b*%Ix*b(%d)", (size_t)o, (to_mark ? 1 : 0)));
+    if (to_mark)
+    {
+        mark_array_set_marked (o);
+        dprintf (4, ("n*%Ix*n", (size_t)o));
+        return TRUE;
+    }
+    else
+        return FALSE;
+}
+
+// TODO: we could consider filtering out NULL's here instead of going to 
+// look for it on other heaps
+inline
+BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high)
+{
+    BOOL marked = FALSE;
+    if ((o >= low) && (o < high))
+        marked = background_mark1 (o);
+#ifdef MULTIPLE_HEAPS
+    else if (o)
+    {
+        //find the heap
+        gc_heap* hp = heap_of (o);
+        assert (hp);
+        if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address))
+            marked = background_mark1 (o);
+    }
+#endif //MULTIPLE_HEAPS
+    return marked;
+}
+
+#endif //BACKGROUND_GC
+
+inline
+uint8_t* gc_heap::next_end (heap_segment* seg, uint8_t* f)
+{
+    if (seg == ephemeral_heap_segment)
+        return  f;
+    else
+        return  heap_segment_allocated (seg);
+}
+
+#define new_start() {if (ppstop <= start) {break;} else {parm = start}}
+#define ignore_start 0
+#define use_start 1
+
+#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp)      \
+{                                                                           \
+    CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt));           \
+    CGCDescSeries* cur = map->GetHighestSeries();                           \
+    ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries();                        \
+                                                                            \
+    if (cnt >= 0)                                                           \
+    {                                                                       \
+        CGCDescSeries* last = map->GetLowestSeries();                       \
+        uint8_t** parm = 0;                                                 \
+        do                                                                  \
+        {                                                                   \
+            assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset()));     \
+            parm = (uint8_t**)((o) + cur->GetSeriesOffset());               \
+            uint8_t** ppstop =                                              \
+                (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\
+            if (!start_useful || (uint8_t*)ppstop > (start))                \
+            {                                                               \
+                if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\
+                while (parm < ppstop)                                       \
+                {                                                           \
+                   {exp}                                                    \
+                   parm++;                                                  \
+                }                                                           \
+            }                                                               \
+            cur--;                                                          \
+                                                                            \
+        } while (cur >= last);                                              \
+    }                                                                       \
+    else                                                                    \
+    {                                                                       \
+        /* Handle the repeating case - array of valuetypes */               \
+        uint8_t** parm = (uint8_t**)((o) + cur->startoffset);               \
+        if (start_useful && start > (uint8_t*)parm)                         \
+        {                                                                   \
+            ptrdiff_t cs = mt->RawGetComponentSize();                         \
+            parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \
+        }                                                                   \
+        while ((uint8_t*)parm < ((o)+(size)-plug_skew))                     \
+        {                                                                   \
+            for (ptrdiff_t __i = 0; __i > cnt; __i--)                         \
+            {                                                               \
+                HALF_SIZE_T skip =  cur->val_serie[__i].skip;               \
+                HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs;              \
+                uint8_t** ppstop = parm + nptrs;                            \
+                if (!start_useful || (uint8_t*)ppstop > (start))            \
+                {                                                           \
+                    if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);      \
+                    do                                                      \
+                    {                                                       \
+                       {exp}                                                \
+                       parm++;                                              \
+                    } while (parm < ppstop);                                \
+                }                                                           \
+                parm = (uint8_t**)((uint8_t*)ppstop + skip);                \
+            }                                                               \
+        }                                                                   \
+    }                                                                       \
+}
+
+#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); }
+
+// 1 thing to note about this macro:
+// 1) you can use *parm safely but in general you don't want to use parm 
+// because for the collectible types it's not an address on the managed heap.
+#ifndef COLLECTIBLE_CLASS
+#define go_through_object_cl(mt,o,size,parm,exp)                            \
+{                                                                           \
+    if (header(o)->ContainsPointers())                                      \
+    {                                                                       \
+        go_through_object_nostart(mt,o,size,parm,exp);                      \
+    }                                                                       \
+}
+#else //COLLECTIBLE_CLASS
+#define go_through_object_cl(mt,o,size,parm,exp)                            \
+{                                                                           \
+    if (header(o)->Collectible())                                           \
+    {                                                                       \
+        uint8_t* class_obj = get_class_object (o);                             \
+        uint8_t** parm = &class_obj;                                           \
+        do {exp} while (false);                                             \
+    }                                                                       \
+    if (header(o)->ContainsPointers())                                      \
+    {                                                                       \
+        go_through_object_nostart(mt,o,size,parm,exp);                      \
+    }                                                                       \
+}
+#endif //COLLECTIBLE_CLASS
+
+// This starts a plug. But mark_stack_tos isn't increased until set_pinned_info is called.
+void gc_heap::enque_pinned_plug (uint8_t* plug,
+                                 BOOL save_pre_plug_info_p, 
+                                 uint8_t* last_object_in_last_plug)
+{
+    if (mark_stack_array_length <= mark_stack_tos)
+    {
+        if (!grow_mark_stack (mark_stack_array, mark_stack_array_length, MARK_STACK_INITIAL_LENGTH))
+        {
+            // we don't want to continue here due to security
+            // risks. This happens very rarely and fixing it in the
+            // way so that we can continue is a bit involved and will
+            // not be done in Dev10.
+            EEPOLICY_HANDLE_FATAL_ERROR(CORINFO_EXCEPTION_GC);
+        }
+    }
+
+    dprintf (3, ("enquing P #%Id(%Ix): %Ix. oldest: %Id, LO: %Ix, pre: %d", 
+        mark_stack_tos, &mark_stack_array[mark_stack_tos], plug, mark_stack_bos, last_object_in_last_plug, (save_pre_plug_info_p ? 1 : 0)));
+    mark& m = mark_stack_array[mark_stack_tos];
+    m.first = plug;
+    // Must be set now because if we have a short object we'll need the value of saved_pre_p.
+    m.saved_pre_p = save_pre_plug_info_p;
+
+    if (save_pre_plug_info_p)
+    {
+#ifdef SHORT_PLUGS
+        BOOL is_padded = is_plug_padded (last_object_in_last_plug);
+        if (is_padded)
+            clear_plug_padded (last_object_in_last_plug);
+#endif //SHORT_PLUGS
+        memcpy (&(m.saved_pre_plug), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair));
+#ifdef SHORT_PLUGS
+        if (is_padded)
+            set_plug_padded (last_object_in_last_plug);
+#endif //SHORT_PLUGS
+
+        memcpy (&(m.saved_pre_plug_reloc), &(((plug_and_gap*)plug)[-1]), sizeof (gap_reloc_pair));
+
+        // If the last object in the last plug is too short, it requires special handling.
+        size_t last_obj_size = plug - last_object_in_last_plug;
+        if (last_obj_size < min_pre_pin_obj_size)
+        {
+            record_interesting_data_point (idp_pre_short);
+#ifdef SHORT_PLUGS
+            if (is_padded)
+                record_interesting_data_point (idp_pre_short_padded);
+#endif //SHORT_PLUGS
+            dprintf (3, ("encountered a short object %Ix right before pinned plug %Ix!", 
+                         last_object_in_last_plug, plug));
+            // Need to set the short bit regardless of having refs or not because we need to 
+            // indicate that this object is not walkable.
+            m.set_pre_short();
+
+#ifdef COLLECTIBLE_CLASS
+            if (is_collectible (last_object_in_last_plug))
+            {
+                m.set_pre_short_collectible();
+            }
+#endif //COLLECTIBLE_CLASS
+
+            if (contain_pointers (last_object_in_last_plug))
+            {
+                dprintf (3, ("short object: %Ix(%Ix)", last_object_in_last_plug, last_obj_size));
+
+                go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval,
+                    {
+                        size_t gap_offset = (((size_t)pval - (size_t)(plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*);
+                        dprintf (3, ("member: %Ix->%Ix, %Id ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset));
+                        m.set_pre_short_bit (gap_offset);
+                    }
+                );
+            }
+        }
+    }
+
+    m.saved_post_p = FALSE;
+}
+
+void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_object_in_last_plug, uint8_t* post_plug)
+{
+    UNREFERENCED_PARAMETER(last_pinned_plug);
+
+    mark& m = mark_stack_array[mark_stack_tos - 1];
+    assert (last_pinned_plug == m.first);
+    m.saved_post_plug_info_start = (uint8_t*)&(((plug_and_gap*)post_plug)[-1]);
+
+#ifdef SHORT_PLUGS
+    BOOL is_padded = is_plug_padded (last_object_in_last_plug);
+    if (is_padded)
+        clear_plug_padded (last_object_in_last_plug);
+#endif //SHORT_PLUGS
+    memcpy (&(m.saved_post_plug), m.saved_post_plug_info_start, sizeof (gap_reloc_pair));
+#ifdef SHORT_PLUGS
+    if (is_padded)
+        set_plug_padded (last_object_in_last_plug);
+#endif //SHORT_PLUGS
+
+    memcpy (&(m.saved_post_plug_reloc), m.saved_post_plug_info_start, sizeof (gap_reloc_pair));
+
+    // This is important - we need to clear all bits here except the last one.
+    m.saved_post_p = TRUE;
+
+#ifdef _DEBUG
+    m.saved_post_plug_debug.gap = 1;
+#endif //_DEBUG
+
+    dprintf (3, ("PP %Ix has NP %Ix right after", last_pinned_plug, post_plug));
+
+    size_t last_obj_size = post_plug - last_object_in_last_plug;
+    if (last_obj_size < min_pre_pin_obj_size)
+    {
+        dprintf (3, ("PP %Ix last obj %Ix is too short", last_pinned_plug, last_object_in_last_plug));
+        record_interesting_data_point (idp_post_short);
+#ifdef SHORT_PLUGS
+        if (is_padded)
+            record_interesting_data_point (idp_post_short_padded);
+#endif //SHORT_PLUGS
+        m.set_post_short();
+        verify_pinned_queue_p = TRUE;
+
+#ifdef COLLECTIBLE_CLASS
+        if (is_collectible (last_object_in_last_plug))
+        {
+            m.set_post_short_collectible();
+        }
+#endif //COLLECTIBLE_CLASS
+
+        if (contain_pointers (last_object_in_last_plug))
+        {
+            dprintf (3, ("short object: %Ix(%Ix)", last_object_in_last_plug, last_obj_size));
+
+            // TODO: since we won't be able to walk this object in relocation, we still need to
+            // take care of collectible assemblies here.
+            go_through_object_nostart (method_table(last_object_in_last_plug), last_object_in_last_plug, last_obj_size, pval,
+                {
+                    size_t gap_offset = (((size_t)pval - (size_t)(post_plug - sizeof (gap_reloc_pair) - plug_skew))) / sizeof (uint8_t*);
+                    dprintf (3, ("member: %Ix->%Ix, %Id ptrs from beginning of gap", (uint8_t*)pval, *pval, gap_offset));
+                    m.set_post_short_bit (gap_offset);
+                }
+            );
+        }
+    }
+}
+
+//#define PREFETCH
+#ifdef PREFETCH
+__declspec(naked) void __fastcall Prefetch(void* addr)
+{
+   __asm {
+       PREFETCHT0 [ECX]
+        ret
+    };
+}
+#else //PREFETCH
+inline void Prefetch (void* addr)
+{
+    UNREFERENCED_PARAMETER(addr);
+}
+#endif //PREFETCH
+#ifdef MH_SC_MARK
+inline
+VOLATILE(uint8_t*)& gc_heap::ref_mark_stack (gc_heap* hp, int index)
+{
+    return ((VOLATILE(uint8_t*)*)(hp->mark_stack_array))[index];
+}
+
+#endif //MH_SC_MARK
+
+#define stolen 2
+#define partial 1
+#define partial_object 3
+inline 
+uint8_t* ref_from_slot (uint8_t* r)
+{
+    return (uint8_t*)((size_t)r & ~(stolen | partial));
+}
+inline
+BOOL stolen_p (uint8_t* r)
+{
+    return (((size_t)r&2) && !((size_t)r&1));
+}
+inline 
+BOOL ready_p (uint8_t* r)
+{
+    return ((size_t)r != 1);
+}
+inline
+BOOL partial_p (uint8_t* r)
+{
+    return (((size_t)r&1) && !((size_t)r&2));
+}
+inline 
+BOOL straight_ref_p (uint8_t* r)
+{
+    return (!stolen_p (r) && !partial_p (r));
+}
+inline 
+BOOL partial_object_p (uint8_t* r)
+{
+    return (((size_t)r & partial_object) == partial_object);
+}
+inline
+BOOL ref_p (uint8_t* r)
+{
+    return (straight_ref_p (r) || partial_object_p (r));
+}
+
+void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL)
+{
+    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_tos = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)mark_stack_array;
+    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_limit = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)&mark_stack_array[mark_stack_array_length];
+    SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_base = mark_stack_tos;
+#ifdef SORT_MARK_STACK
+    SERVER_SC_MARK_VOLATILE(uint8_t*)* sorted_tos = mark_stack_base;
+#endif //SORT_MARK_STACK
+
+    // If we are doing a full GC we don't use mark list anyway so use m_boundary_fullgc that doesn't 
+    // update mark list.
+    BOOL  full_p = (settings.condemned_generation == max_generation);
+
+    assert ((start >= oo) && (start < oo+size(oo)));
+
+#ifndef MH_SC_MARK
+    *mark_stack_tos = oo;
+#endif //!MH_SC_MARK
+
+    while (1)
+    {
+#ifdef MULTIPLE_HEAPS
+#else  //MULTIPLE_HEAPS
+        const int thread = 0;
+#endif //MULTIPLE_HEAPS
+
+        if (oo && ((size_t)oo != 4))
+        {
+            size_t s = 0; 
+            if (stolen_p (oo))
+            {
+                --mark_stack_tos;
+                goto next_level;
+            }
+            else if (!partial_p (oo) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*))))
+            {
+                BOOL overflow_p = FALSE;
+
+                if (mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit  - 1))
+                {
+                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
+                    if (mark_stack_tos + CGCDesc::GetNumPointers(method_table(oo), s, num_components) >= (mark_stack_limit - 1))
+                    {
+                        overflow_p = TRUE;
+                    }
+                }
+                
+                if (overflow_p == FALSE)
+                {
+                    dprintf(3,("pushing mark for %Ix ", (size_t)oo));
+
+                    go_through_object_cl (method_table(oo), oo, s, ppslot,
+                                          {
+                                              uint8_t* o = *ppslot;
+                                              Prefetch(o);
+                                              if (gc_mark (o, gc_low, gc_high))
+                                              {
+                                                  if (full_p)
+                                                  {
+                                                      m_boundary_fullgc (o);
+                                                  }
+                                                  else
+                                                  {
+                                                      m_boundary (o);
+                                                  }
+                                                  size_t obj_size = size (o);
+                                                  promoted_bytes (thread) += obj_size;
+                                                  if (contain_pointers_or_collectible (o))
+                                                  {
+                                                      *(mark_stack_tos++) = o;
+                                                  }
+                                              }
+                                          }
+                        );
+                }
+                else
+                {
+                    dprintf(3,("mark stack overflow for object %Ix ", (size_t)oo));
+                    min_overflow_address = min (min_overflow_address, oo);
+                    max_overflow_address = max (max_overflow_address, oo);
+                }
+            }
+            else
+            {
+                if (partial_p (oo))
+                {
+                    start = ref_from_slot (oo);
+                    oo = ref_from_slot (*(--mark_stack_tos));
+                    dprintf (4, ("oo: %Ix, start: %Ix\n", (size_t)oo, (size_t)start));
+                    assert ((oo < start) && (start < (oo + size (oo))));
+                }
+#ifdef COLLECTIBLE_CLASS
+                else
+                {
+                    // If there's a class object, push it now. We are guaranteed to have the slot since
+                    // we just popped one object off.
+                    if (is_collectible (oo))
+                    {
+                        uint8_t* class_obj = get_class_object (oo);
+                        if (gc_mark (class_obj, gc_low, gc_high))
+                        {
+                            if (full_p)
+                            {
+                                m_boundary_fullgc (class_obj);
+                            }
+                            else
+                            {
+                                m_boundary (class_obj);
+                            }
+
+                            size_t obj_size = size (class_obj);
+                            promoted_bytes (thread) += obj_size;
+                            *(mark_stack_tos++) = class_obj;
+                        }
+                    }
+                }
+#endif //COLLECTIBLE_CLASS
+
+                s = size (oo);
+                
+                BOOL overflow_p = FALSE;
+            
+                if (mark_stack_tos + (num_partial_refs + 2)  >= mark_stack_limit)
+                {
+                    overflow_p = TRUE;
+                }
+                if (overflow_p == FALSE)
+                {
+                    dprintf(3,("pushing mark for %Ix ", (size_t)oo));
+
+                    //push the object and its current 
+                    SERVER_SC_MARK_VOLATILE(uint8_t*)* place = ++mark_stack_tos;
+                    mark_stack_tos++;
+#ifdef MH_SC_MARK
+                    *(place-1) = 0;
+                    *(place) = (uint8_t*)partial;
+#endif //MH_SC_MARK
+                    int i = num_partial_refs; 
+                    uint8_t* ref_to_continue = 0;
+
+                    go_through_object (method_table(oo), oo, s, ppslot,
+                                       start, use_start, (oo + s),
+                                       {
+                                           uint8_t* o = *ppslot;
+                                           Prefetch(o);
+                                           if (gc_mark (o, gc_low, gc_high))
+                                           {
+                                                if (full_p)
+                                                {
+                                                    m_boundary_fullgc (o);
+                                                }
+                                                else
+                                                {
+                                                    m_boundary (o);
+                                                }
+                                                size_t obj_size = size (o);
+                                                promoted_bytes (thread) += obj_size;
+                                                if (contain_pointers_or_collectible (o))
+                                                {
+                                                    *(mark_stack_tos++) = o;
+                                                    if (--i == 0)
+                                                    {
+                                                        ref_to_continue = (uint8_t*)((size_t)(ppslot+1) | partial);
+                                                        goto more_to_do;
+                                                    }
+
+                                                }
+                                           }
+
+                                       }
+                        );
+                    //we are finished with this object
+                    assert (ref_to_continue == 0);
+#ifdef MH_SC_MARK
+                    assert ((*(place-1)) == (uint8_t*)0);
+#else //MH_SC_MARK
+                    *(place-1) = 0;
+#endif //MH_SC_MARK
+                    *place = 0; 
+                    // shouldn't we decrease tos by 2 here??
+
+more_to_do:
+                    if (ref_to_continue)
+                    {
+                        //update the start
+#ifdef MH_SC_MARK
+                        assert ((*(place-1)) == (uint8_t*)0);
+                        *(place-1) = (uint8_t*)((size_t)oo | partial_object);
+                        assert (((*place) == (uint8_t*)1) || ((*place) == (uint8_t*)2));
+#endif //MH_SC_MARK
+                        *place = ref_to_continue;
+                    }
+                }
+                else
+                {
+                    dprintf(3,("mark stack overflow for object %Ix ", (size_t)oo));
+                    min_overflow_address = min (min_overflow_address, oo);
+                    max_overflow_address = max (max_overflow_address, oo);
+                }
+            }
+#ifdef SORT_MARK_STACK
+            if (mark_stack_tos > sorted_tos + mark_stack_array_length/8)
+            {
+                rqsort1 (sorted_tos, mark_stack_tos-1);
+                sorted_tos = mark_stack_tos-1;
+            }
+#endif //SORT_MARK_STACK
+        }
+    next_level:
+        if (!(mark_stack_empty_p()))
+        {
+            oo = *(--mark_stack_tos);
+            start = oo;
+
+#ifdef SORT_MARK_STACK
+            sorted_tos = min ((size_t)sorted_tos, (size_t)mark_stack_tos);
+#endif //SORT_MARK_STACK
+        }
+        else
+            break;
+    }
+}
+
+#ifdef MH_SC_MARK
+BOOL same_numa_node_p (int hn1, int hn2)
+{
+    return (heap_select::find_numa_node_from_heap_no (hn1) == heap_select::find_numa_node_from_heap_no (hn2));
+}
+
+int find_next_buddy_heap (int this_heap_number, int current_buddy, int n_heaps)
+{
+    int hn = (current_buddy+1)%n_heaps;
+    while (hn != current_buddy)
+    {
+        if ((this_heap_number != hn) && (same_numa_node_p (this_heap_number, hn)))
+            return hn;
+        hn = (hn+1)%n_heaps;
+    }
+    return current_buddy;
+}
+
+void 
+gc_heap::mark_steal()
+{
+    mark_stack_busy() = 0;
+    //clear the mark stack in the snooping range
+    for (int i = 0; i < max_snoop_level; i++)
+    {
+        ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0;
+    }
+
+    //pick the next heap as our buddy
+    int thpn = find_next_buddy_heap (heap_number, heap_number, n_heaps);
+
+#ifdef SNOOP_STATS
+        dprintf (SNOOP_LOG, ("(GC%d)heap%d: start snooping %d", settings.gc_index, heap_number, (heap_number+1)%n_heaps));
+        uint32_t begin_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+#endif //SNOOP_STATS
+
+    int idle_loop_count = 0; 
+    int first_not_ready_level = 0;
+
+    while (1)
+    {
+        gc_heap* hp = g_heaps [thpn];
+        int level = first_not_ready_level;
+        first_not_ready_level = 0; 
+
+        while (check_next_mark_stack (hp) && (level < (max_snoop_level-1)))
+        {
+            idle_loop_count = 0; 
+#ifdef SNOOP_STATS
+            snoop_stat.busy_count++;
+            dprintf (SNOOP_LOG, ("heap%d: looking at next heap level %d stack contents: %Ix", 
+                                 heap_number, level, (int)((uint8_t**)(hp->mark_stack_array))[level]));
+#endif //SNOOP_STATS
+
+            uint8_t* o = ref_mark_stack (hp, level);
+
+            uint8_t* start = o;
+            if (ref_p (o))
+            {
+                mark_stack_busy() = 1;
+
+                BOOL success = TRUE;
+                uint8_t* next = (ref_mark_stack (hp, level+1));
+                if (ref_p (next))
+                {
+                    if (((size_t)o > 4) && !partial_object_p (o))
+                    {
+                        //this is a normal object, not a partial mark tuple
+                        //success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), 0, o)==o);
+                        success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level), (uint8_t*)4, o)==o);
+#ifdef SNOOP_STATS
+                        snoop_stat.interlocked_count++;
+                        if (success)
+                            snoop_stat.normal_count++;
+#endif //SNOOP_STATS
+                    }
+                    else
+                    {
+                        //it is a stolen entry, or beginning/ending of a partial mark
+                        level++;
+#ifdef SNOOP_STATS
+                        snoop_stat.stolen_or_pm_count++;
+#endif //SNOOP_STATS
+                        success = FALSE;
+                    }
+                }
+                else if (stolen_p (next))
+                {
+                    //ignore the stolen guy and go to the next level
+                    success = FALSE;
+                    level+=2;
+#ifdef SNOOP_STATS
+                    snoop_stat.stolen_entry_count++;
+#endif //SNOOP_STATS
+                }
+                else
+                {
+                    assert (partial_p (next));
+                    start = ref_from_slot (next);
+                    //re-read the object
+                    o = ref_from_slot (ref_mark_stack (hp, level));
+                    if (o && start)
+                    {
+                        //steal the object
+                        success = (Interlocked::CompareExchangePointer (&ref_mark_stack (hp, level+1), (uint8_t*)stolen, next)==next);
+#ifdef SNOOP_STATS
+                        snoop_stat.interlocked_count++;
+                        if (success)
+                        {
+                            snoop_stat.partial_mark_parent_count++;                    
+                        }
+#endif //SNOOP_STATS
+                    }
+                    else
+                    {
+                        // stack is not ready, or o is completely different from the last time we read from this stack level.
+                        // go up 2 levels to steal children or totally unrelated objects.
+                        success = FALSE;
+                        if (first_not_ready_level == 0)
+                        {
+                            first_not_ready_level = level;
+                        }
+                        level+=2;
+#ifdef SNOOP_STATS
+                        snoop_stat.pm_not_ready_count++;
+#endif //SNOOP_STATS                        
+                    }
+                }
+                if (success)
+                {
+
+#ifdef SNOOP_STATS
+                    dprintf (SNOOP_LOG, ("heap%d: marking %Ix from %d [%d] tl:%dms",
+                            heap_number, (size_t)o, (heap_number+1)%n_heaps, level,
+                            (GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick)));
+                    uint32_t start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+#endif //SNOOP_STATS
+
+                    mark_object_simple1 (o, start, heap_number);
+
+#ifdef SNOOP_STATS
+                    dprintf (SNOOP_LOG, ("heap%d: done marking %Ix from %d [%d] %dms tl:%dms",
+                            heap_number, (size_t)o, (heap_number+1)%n_heaps, level,
+                            (GCToOSInterface::GetLowPrecisionTimeStamp()-start_tick),(GCToOSInterface::GetLowPrecisionTimeStamp()-begin_tick)));
+#endif //SNOOP_STATS
+
+                    mark_stack_busy() = 0;
+
+                    //clear the mark stack in snooping range
+                    for (int i = 0; i < max_snoop_level; i++)
+                    {
+                        if (((uint8_t**)mark_stack_array)[i] != 0)
+                        {
+                            ((VOLATILE(uint8_t*)*)(mark_stack_array))[i] = 0;
+#ifdef SNOOP_STATS
+                            snoop_stat.stack_bottom_clear_count++;
+#endif //SNOOP_STATS
+                        }
+                    }
+
+                    level = 0; 
+                }
+                mark_stack_busy() = 0;
+            }
+            else
+            {
+                //slot is either partial or stolen
+                level++;
+            }
+        }
+        if ((first_not_ready_level != 0) && hp->mark_stack_busy())
+        {
+            continue;
+        } 
+        if (!hp->mark_stack_busy())
+        {
+            first_not_ready_level = 0; 
+            idle_loop_count++;
+
+            if ((idle_loop_count % (6) )==1)
+            {
+#ifdef SNOOP_STATS
+                snoop_stat.switch_to_thread_count++;
+#endif //SNOOP_STATS
+                GCToOSInterface::Sleep(1);
+            }
+            int free_count = 1;
+#ifdef SNOOP_STATS
+            snoop_stat.stack_idle_count++;
+            //dprintf (SNOOP_LOG, ("heap%d: counting idle threads", heap_number));
+#endif //SNOOP_STATS
+            for (int hpn = (heap_number+1)%n_heaps; hpn != heap_number;)
+            {
+                if (!((g_heaps [hpn])->mark_stack_busy()))
+                {
+                    free_count++;
+#ifdef SNOOP_STATS
+                dprintf (SNOOP_LOG, ("heap%d: %d idle", heap_number, free_count));
+#endif //SNOOP_STATS
+                }
+                else if (same_numa_node_p (hpn, heap_number) || ((idle_loop_count%1000))==999)
+                {
+                    thpn = hpn;
+                    break;
+                }
+                hpn = (hpn+1)%n_heaps;
+                YieldProcessor();
+            }
+            if (free_count == n_heaps)
+            {
+                break;
+            }
+        }
+    }
+}
+
+inline
+BOOL gc_heap::check_next_mark_stack (gc_heap* next_heap)
+{
+#ifdef SNOOP_STATS
+    snoop_stat.check_level_count++;
+#endif //SNOOP_STATS
+    return (next_heap->mark_stack_busy()>=1);
+}
+#endif //MH_SC_MARK
+
+#ifdef SNOOP_STATS
+void gc_heap::print_snoop_stat()
+{
+    dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s", 
+        "heap", "check", "zero", "mark", "stole", "pstack", "nstack", "nonsk"));
+    dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d",
+        snoop_stat.heap_index,
+        snoop_stat.objects_checked_count,
+        snoop_stat.zero_ref_count,
+        snoop_stat.objects_marked_count,
+        snoop_stat.stolen_stack_count,
+        snoop_stat.partial_stack_count,
+        snoop_stat.normal_stack_count,
+        snoop_stat.non_stack_count));
+    dprintf (1234, ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s", 
+        "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "clear"));
+    dprintf (1234, ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
+        snoop_stat.heap_index,
+        snoop_stat.check_level_count,
+        snoop_stat.busy_count,
+        snoop_stat.interlocked_count,
+        snoop_stat.partial_mark_parent_count,
+        snoop_stat.stolen_or_pm_count,
+        snoop_stat.stolen_entry_count,
+        snoop_stat.pm_not_ready_count,
+        snoop_stat.normal_count,
+        snoop_stat.stack_bottom_clear_count));
+
+    printf ("\n%4s | %8s | %8s | %8s | %8s | %8s\n", 
+        "heap", "check", "zero", "mark", "idle", "switch");
+    printf ("%4d | %8d | %8d | %8d | %8d | %8d\n",
+        snoop_stat.heap_index,
+        snoop_stat.objects_checked_count,
+        snoop_stat.zero_ref_count,
+        snoop_stat.objects_marked_count,
+        snoop_stat.stack_idle_count,
+        snoop_stat.switch_to_thread_count);
+    printf ("%4s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n", 
+        "heap", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear");
+    printf ("%4d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
+        snoop_stat.heap_index,
+        snoop_stat.check_level_count,
+        snoop_stat.busy_count,
+        snoop_stat.interlocked_count,
+        snoop_stat.partial_mark_parent_count,
+        snoop_stat.stolen_or_pm_count,
+        snoop_stat.stolen_entry_count,
+        snoop_stat.pm_not_ready_count,
+        snoop_stat.normal_count,
+        snoop_stat.stack_bottom_clear_count);
+}
+#endif //SNOOP_STATS
+
+#ifdef HEAP_ANALYZE
+void
+gc_heap::ha_mark_object_simple (uint8_t** po THREAD_NUMBER_DCL)
+{
+    if (!internal_root_array)
+    {
+        internal_root_array = new (nothrow) uint8_t* [internal_root_array_length];
+        if (!internal_root_array)
+        {
+            heap_analyze_success = FALSE;
+        }
+    }
+
+    if (heap_analyze_success && (internal_root_array_length <= internal_root_array_index))
+    {
+        size_t new_size = 2*internal_root_array_length;
+
+        uint64_t available_physical = 0;
+        get_memory_info (NULL, &available_physical);
+        if (new_size > (size_t)(available_physical / 10))
+        {
+            heap_analyze_success = FALSE;
+        }
+        else
+        {
+            uint8_t** tmp = new (nothrow) uint8_t* [new_size];
+            if (tmp)
+            {
+                memcpy (tmp, internal_root_array,
+                        internal_root_array_length*sizeof (uint8_t*));
+                delete[] internal_root_array;
+                internal_root_array = tmp;
+                internal_root_array_length = new_size;
+            }
+            else
+            {
+                heap_analyze_success = FALSE;
+            }
+        }
+    }
+
+    if (heap_analyze_success)
+    {
+        PREFIX_ASSUME(internal_root_array_index < internal_root_array_length);
+
+        uint8_t* ref = (uint8_t*)po;
+        if (!current_obj || 
+            !((ref >= current_obj) && (ref < (current_obj + current_obj_size))))
+        {
+            gc_heap* hp = gc_heap::heap_of (ref);
+            current_obj = hp->find_object (ref, hp->lowest_address);
+            current_obj_size = size (current_obj);
+
+            internal_root_array[internal_root_array_index] = current_obj;
+            internal_root_array_index++;
+        }
+    }
+
+    mark_object_simple (po THREAD_NUMBER_ARG);
+}
+#endif //HEAP_ANALYZE
+
+//this method assumes that *po is in the [low. high[ range
+void
+gc_heap::mark_object_simple (uint8_t** po THREAD_NUMBER_DCL)
+{
+    uint8_t* o = *po;
+#ifdef MULTIPLE_HEAPS
+#else  //MULTIPLE_HEAPS
+    const int thread = 0;
+#endif //MULTIPLE_HEAPS
+    {
+#ifdef SNOOP_STATS
+        snoop_stat.objects_checked_count++;
+#endif //SNOOP_STATS
+
+        if (gc_mark1 (o))
+        {
+            m_boundary (o);
+            size_t s = size (o);
+            promoted_bytes (thread) += s;
+            {
+                go_through_object_cl (method_table(o), o, s, poo,
+                                        {
+                                            uint8_t* oo = *poo;
+                                            if (gc_mark (oo, gc_low, gc_high))
+                                            {
+                                                m_boundary (oo);
+                                                size_t obj_size = size (oo);
+                                                promoted_bytes (thread) += obj_size;
+
+                                                if (contain_pointers_or_collectible (oo))
+                                                    mark_object_simple1 (oo, oo THREAD_NUMBER_ARG);
+                                            }
+                                        }
+                    );
+            }
+        }
+    }
+}
+
+inline
+uint8_t* gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL)
+{
+    if ((o >= gc_low) && (o < gc_high))
+        mark_object_simple (&o THREAD_NUMBER_ARG);
+#ifdef MULTIPLE_HEAPS
+    else if (o)
+    {
+        //find the heap
+        gc_heap* hp = heap_of (o);
+        assert (hp);
+        if ((o >= hp->gc_low) && (o < hp->gc_high))
+            mark_object_simple (&o THREAD_NUMBER_ARG);
+    }
+#endif //MULTIPLE_HEAPS
+
+    return o;
+}
+
+#ifdef BACKGROUND_GC
+
+void gc_heap::background_mark_simple1 (uint8_t* oo THREAD_NUMBER_DCL)
+{
+    uint8_t** mark_stack_limit = &background_mark_stack_array[background_mark_stack_array_length];
+
+#ifdef SORT_MARK_STACK
+    uint8_t** sorted_tos = background_mark_stack_array;
+#endif //SORT_MARK_STACK
+
+    background_mark_stack_tos = background_mark_stack_array;
+
+    while (1)
+    {
+#ifdef MULTIPLE_HEAPS
+#else  //MULTIPLE_HEAPS
+        const int thread = 0;
+#endif //MULTIPLE_HEAPS
+        if (oo)
+        {
+            size_t s = 0; 
+            if ((((size_t)oo & 1) == 0) && ((s = size (oo)) < (partial_size_th*sizeof (uint8_t*))))
+            {
+                BOOL overflow_p = FALSE;
+            
+                if (background_mark_stack_tos + (s) /sizeof (uint8_t*) >= (mark_stack_limit - 1))
+                {
+                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
+                    size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components);
+                    if (background_mark_stack_tos + num_pointers >= (mark_stack_limit - 1))
+                    {
+                        dprintf (2, ("h%d: %Id left, obj (mt: %Ix) %Id ptrs", 
+                            heap_number,
+                            (size_t)(mark_stack_limit - 1 - background_mark_stack_tos),
+                            method_table(oo), 
+                            num_pointers));
+
+                        bgc_overflow_count++;
+                        overflow_p = TRUE;
+                    }
+                }
+            
+                if (overflow_p == FALSE)
+                {
+                    dprintf(3,("pushing mark for %Ix ", (size_t)oo));
+
+                    go_through_object_cl (method_table(oo), oo, s, ppslot,
+                    {
+                        uint8_t* o = *ppslot;
+                        Prefetch(o);
+                        if (background_mark (o, 
+                                             background_saved_lowest_address, 
+                                             background_saved_highest_address))
+                        {
+                            //m_boundary (o);
+                            size_t obj_size = size (o);
+                            bpromoted_bytes (thread) += obj_size;
+                            if (contain_pointers_or_collectible (o))
+                            {
+                                *(background_mark_stack_tos++) = o;
+
+                            }
+                        }
+                    }
+                        );
+                }
+                else
+                {
+                    dprintf (3,("mark stack overflow for object %Ix ", (size_t)oo));
+                    background_min_overflow_address = min (background_min_overflow_address, oo);
+                    background_max_overflow_address = max (background_max_overflow_address, oo);
+                }
+            }
+            else 
+            {
+                uint8_t* start = oo;
+                if ((size_t)oo & 1)
+                {
+                    oo = (uint8_t*)((size_t)oo & ~1);
+                    start = *(--background_mark_stack_tos);
+                    dprintf (4, ("oo: %Ix, start: %Ix\n", (size_t)oo, (size_t)start));
+                }
+#ifdef COLLECTIBLE_CLASS
+                else
+                {
+                    // If there's a class object, push it now. We are guaranteed to have the slot since
+                    // we just popped one object off.
+                    if (is_collectible (oo))
+                    {
+                        uint8_t* class_obj = get_class_object (oo);
+                        if (background_mark (class_obj, 
+                                            background_saved_lowest_address, 
+                                            background_saved_highest_address))
+                        {
+                            size_t obj_size = size (class_obj);
+                            bpromoted_bytes (thread) += obj_size;
+
+                            *(background_mark_stack_tos++) = class_obj;
+                        }
+                    }
+                }
+#endif //COLLECTIBLE_CLASS
+
+                s = size (oo);
+                
+                BOOL overflow_p = FALSE;
+            
+                if (background_mark_stack_tos + (num_partial_refs + 2)  >= mark_stack_limit)
+                {
+                    size_t num_components = ((method_table(oo))->HasComponentSize() ? ((CObjectHeader*)oo)->GetNumComponents() : 0);
+                    size_t num_pointers = CGCDesc::GetNumPointers(method_table(oo), s, num_components);
+
+                    dprintf (2, ("h%d: PM: %Id left, obj %Ix (mt: %Ix) start: %Ix, total: %Id", 
+                        heap_number,
+                        (size_t)(mark_stack_limit - background_mark_stack_tos),
+                        oo,
+                        method_table(oo), 
+                        start,
+                        num_pointers));
+
+                    bgc_overflow_count++;
+                    overflow_p = TRUE;
+                }
+                if (overflow_p == FALSE)
+                {
+                    dprintf(3,("pushing mark for %Ix ", (size_t)oo));
+
+                    //push the object and its current 
+                    uint8_t** place = background_mark_stack_tos++;
+                    *(place) = start;
+                    *(background_mark_stack_tos++) = (uint8_t*)((size_t)oo | 1);
+
+                    int i = num_partial_refs; 
+
+                    go_through_object (method_table(oo), oo, s, ppslot,
+                                       start, use_start, (oo + s),
+                    {
+                        uint8_t* o = *ppslot;
+                        Prefetch(o);
+
+                        if (background_mark (o, 
+                                            background_saved_lowest_address, 
+                                            background_saved_highest_address))
+                        {
+                            //m_boundary (o);
+                            size_t obj_size = size (o);
+                            bpromoted_bytes (thread) += obj_size;
+                            if (contain_pointers_or_collectible (o))
+                            {
+                                *(background_mark_stack_tos++) = o;
+                                if (--i == 0)
+                                {
+                                    //update the start
+                                    *place = (uint8_t*)(ppslot+1);
+                                    goto more_to_do;
+                                }
+
+                            }
+                        }
+
+                    }
+                        );
+                    //we are finished with this object
+                    *place = 0; 
+                    *(place+1) = 0;
+
+                more_to_do:;
+                }
+                else
+                {
+                    dprintf (3,("mark stack overflow for object %Ix ", (size_t)oo));
+                    background_min_overflow_address = min (background_min_overflow_address, oo);
+                    background_max_overflow_address = max (background_max_overflow_address, oo);
+                }
+            }
+        }
+#ifdef SORT_MARK_STACK
+        if (background_mark_stack_tos > sorted_tos + mark_stack_array_length/8)
+        {
+            rqsort1 (sorted_tos, background_mark_stack_tos-1);
+            sorted_tos = background_mark_stack_tos-1;
+        }
+#endif //SORT_MARK_STACK
+
+        allow_fgc();
+
+        if (!(background_mark_stack_tos == background_mark_stack_array))
+        {
+            oo = *(--background_mark_stack_tos);
+
+#ifdef SORT_MARK_STACK
+            sorted_tos = (uint8_t**)min ((size_t)sorted_tos, (size_t)background_mark_stack_tos);
+#endif //SORT_MARK_STACK
+        }
+        else
+            break;
+    }
+
+    assert (background_mark_stack_tos == background_mark_stack_array);
+
+
+}
+
+//this version is different than the foreground GC because
+//it can't keep pointers to the inside of an object
+//while calling background_mark_simple1. The object could be moved
+//by an intervening foreground gc.
+//this method assumes that *po is in the [low. high[ range
+void
+gc_heap::background_mark_simple (uint8_t* o THREAD_NUMBER_DCL)
+{
+#ifdef MULTIPLE_HEAPS
+#else  //MULTIPLE_HEAPS
+    const int thread = 0;
+#endif //MULTIPLE_HEAPS
+    {
+        dprintf (3, ("bmarking %Ix", o));
+        
+        if (background_mark1 (o))
+        {
+            //m_boundary (o);
+            size_t s = size (o);
+            bpromoted_bytes (thread) += s;
+
+            if (contain_pointers_or_collectible (o))
+            {
+                background_mark_simple1 (o THREAD_NUMBER_ARG);
+            }
+        }
+    }
+}
+
+inline
+uint8_t* gc_heap::background_mark_object (uint8_t* o THREAD_NUMBER_DCL)
+{
+    if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address))
+    {
+        background_mark_simple (o THREAD_NUMBER_ARG);
+    }
+    else
+    {
+        if (o)
+        {
+            dprintf (3, ("or-%Ix", o));
+        }
+    }
+    return o;
+}
+
+void gc_heap::background_verify_mark (Object*& object, ScanContext* sc, uint32_t flags)
+{
+    UNREFERENCED_PARAMETER(sc);
+
+    assert (settings.concurrent);
+    uint8_t* o = (uint8_t*)object;
+
+    gc_heap* hp = gc_heap::heap_of (o);
+#ifdef INTERIOR_POINTERS
+    if (flags & GC_CALL_INTERIOR)
+    {
+        o = hp->find_object (o, background_saved_lowest_address);
+    }
+#endif //INTERIOR_POINTERS
+
+    if (!background_object_marked (o, FALSE))
+    {
+        FATAL_GC_ERROR();
+    }
+}
+
+void gc_heap::background_promote (Object** ppObject, ScanContext* sc, uint32_t flags)
+{
+    UNREFERENCED_PARAMETER(sc);
+    //in order to save space on the array, mark the object,
+    //knowing that it will be visited later
+    assert (settings.concurrent);
+
+    THREAD_NUMBER_FROM_CONTEXT;
+#ifndef MULTIPLE_HEAPS
+    const int thread = 0;
+#endif //!MULTIPLE_HEAPS
+
+    uint8_t* o = (uint8_t*)*ppObject;
+
+    if (o == 0)
+        return;
+
+#ifdef DEBUG_DestroyedHandleValue
+    // we can race with destroy handle during concurrent scan
+    if (o == (uint8_t*)DEBUG_DestroyedHandleValue)
+        return;
+#endif //DEBUG_DestroyedHandleValue
+
+    HEAP_FROM_THREAD;
+
+    gc_heap* hp = gc_heap::heap_of (o);
+
+    if ((o < hp->background_saved_lowest_address) || (o >= hp->background_saved_highest_address))
+    {
+        return;
+    }
+
+#ifdef INTERIOR_POINTERS
+    if (flags & GC_CALL_INTERIOR)
+    {
+        o = hp->find_object (o, hp->background_saved_lowest_address);
+        if (o == 0)
+            return;
+    }
+#endif //INTERIOR_POINTERS
+
+#ifdef FEATURE_CONSERVATIVE_GC
+    // For conservative GC, a value on stack may point to middle of a free object.
+    // In this case, we don't need to promote the pointer.
+    if (g_pConfig->GetGCConservative() && ((CObjectHeader*)o)->IsFree())
+    {
+        return;
+    }
+#endif //FEATURE_CONSERVATIVE_GC
+
+#ifdef _DEBUG
+    ((CObjectHeader*)o)->Validate();
+#endif //_DEBUG
+
+    dprintf (BGC_LOG, ("Background Promote %Ix", (size_t)o));
+
+    //needs to be called before the marking because it is possible for a foreground
+    //gc to take place during the mark and move the object
+    STRESS_LOG3(LF_GC|LF_GCROOTS, LL_INFO1000000, "    GCHeap::Promote: Promote GC Root *%p = %p MT = %pT", ppObject, o, o ? ((Object*) o)->GetGCSafeMethodTable() : NULL);
+
+    hpt->background_mark_simple (o THREAD_NUMBER_ARG);
+}
+
+//used by the ephemeral collection to scan the local background structures
+//containing references.
+void
+gc_heap::scan_background_roots (promote_func* fn, int hn, ScanContext *pSC)
+{
+    ScanContext sc;
+    if (pSC == 0)
+        pSC = &sc;
+
+    pSC->thread_number = hn;
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    pSC->pCurrentDomain = 0;
+#endif
+
+    BOOL relocate_p = (fn == &GCHeap::Relocate);
+
+    dprintf (3, ("Scanning background mark list"));
+
+    //scan mark_list
+    size_t mark_list_finger = 0;
+    while (mark_list_finger < c_mark_list_index)
+    {
+        uint8_t** o = &c_mark_list [mark_list_finger];
+        if (!relocate_p)
+        {
+            // We may not be able to calculate the size during relocate as POPO
+            // may have written over the object.
+            size_t s = size (*o);
+            assert (Align (s) >= Align (min_obj_size));
+            dprintf(3,("background root %Ix", (size_t)*o));
+        }
+        (*fn) ((Object**)o, pSC, 0);
+        mark_list_finger++;
+    }
+
+    //scan the mark stack
+    dprintf (3, ("Scanning background mark stack"));
+
+    uint8_t** finger = background_mark_stack_array;
+    while (finger < background_mark_stack_tos)
+    {
+        if ((finger + 1) < background_mark_stack_tos)
+        {
+            // We need to check for the partial mark case here.
+            uint8_t* parent_obj = *(finger + 1);
+            if ((size_t)parent_obj & 1)
+            {
+                uint8_t* place = *finger;
+                size_t place_offset = 0;
+                uint8_t* real_parent_obj = (uint8_t*)((size_t)parent_obj & ~1);
+
+                if (relocate_p)
+                {
+                    *(finger + 1) = real_parent_obj;
+                    place_offset = place - real_parent_obj;
+                    dprintf(3,("relocating background root %Ix", (size_t)real_parent_obj));
+                    (*fn) ((Object**)(finger + 1), pSC, 0);
+                    real_parent_obj = *(finger + 1);
+                    *finger = real_parent_obj + place_offset;
+                    *(finger + 1) = (uint8_t*)((size_t)real_parent_obj | 1);
+                    dprintf(3,("roots changed to %Ix, %Ix", *finger, *(finger + 1)));
+                }
+                else
+                {
+                    uint8_t** temp = &real_parent_obj;
+                    dprintf(3,("marking background root %Ix", (size_t)real_parent_obj));
+                    (*fn) ((Object**)temp, pSC, 0);
+                }
+
+                finger += 2;
+                continue;
+            }
+        }
+        dprintf(3,("background root %Ix", (size_t)*finger));
+        (*fn) ((Object**)finger, pSC, 0);
+        finger++;
+    }
+}
+
+#endif //BACKGROUND_GC
+
+
+void gc_heap::fix_card_table ()
+{
+#ifdef WRITE_WATCH
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+    PREFIX_ASSUME(seg != NULL);
+
+#ifdef BACKGROUND_GC
+    bool reset_watch_state = !!settings.concurrent;
+#else //BACKGROUND_GC
+    bool reset_watch_state = false;
+#endif //BACKGROUND_GC
+    BOOL small_object_segments = TRUE;
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (small_object_segments)
+            {
+                small_object_segments = FALSE;
+                seg = heap_segment_rw (generation_start_segment (large_object_generation));
+
+                PREFIX_ASSUME(seg != NULL);
+
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        uint8_t* base_address = align_lower_page (heap_segment_mem (seg));
+        uint8_t* high_address =  align_on_page (
+            (seg != ephemeral_heap_segment) ?
+            heap_segment_allocated (seg) :
+            generation_allocation_start (generation_of (0))
+            );
+        uintptr_t bcount = array_size;
+        do
+        {
+            if(high_address <= base_address)
+                break;
+
+            size_t region_size = high_address - base_address;
+            assert (region_size > 0);
+            dprintf (3,("Probing pages [%Ix, %Ix[", (size_t)base_address, (size_t)high_address));
+
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_start = GetCycleCount32();
+#endif //TIME_WRITE_WATCH
+            get_write_watch_for_gc_heap(reset_watch_state, base_address, region_size,
+                                        (void**)g_addresses,
+                                        &bcount, true);
+
+#ifdef TIME_WRITE_WATCH
+            unsigned int time_stop = GetCycleCount32();
+            tot_cycles += time_stop - time_start;
+            printf ("get_write_watch_for_gc_heap Duration: %d, total: %d\n",
+                    time_stop - time_start, tot_cycles);
+#endif //TIME_WRITE_WATCH
+
+            assert( ((card_size * card_word_width)&(OS_PAGE_SIZE-1))==0 );
+            //printf ("%Ix written into\n", bcount);
+            dprintf (3,("Found %Id pages written", bcount));
+            for (unsigned  i = 0; i < bcount; i++)
+            {
+                for (unsigned j = 0; j< (card_size*card_word_width)/OS_PAGE_SIZE; j++)
+                {
+                    card_table [card_word (card_of (g_addresses [i]))+j] = ~0u;
+                }
+                dprintf (2,("Set Cards [%Ix:%Ix, %Ix:%Ix[",
+                      card_of (g_addresses [i]), (size_t)g_addresses [i],
+                      card_of (g_addresses [i]+OS_PAGE_SIZE), (size_t)g_addresses [i]+OS_PAGE_SIZE));
+            }
+            if (bcount >= array_size){
+                base_address = g_addresses [array_size-1] + OS_PAGE_SIZE;
+                bcount = array_size;
+            }
+        } while (bcount >= array_size);
+        seg = heap_segment_next_rw (seg);
+    }
+
+#ifdef BACKGROUND_GC
+    if (settings.concurrent)
+    {
+        //reset the ephemeral page allocated by generation_of (0)
+        uint8_t* base_address =
+            align_on_page (generation_allocation_start (generation_of (0)));
+        size_t region_size =
+            heap_segment_allocated (ephemeral_heap_segment) - base_address;
+        reset_write_watch_for_gc_heap(base_address, region_size);
+    }
+#endif //BACKGROUND_GC
+#endif //WRITE_WATCH
+}
+
+#ifdef BACKGROUND_GC
+inline
+void gc_heap::background_mark_through_object (uint8_t* oo THREAD_NUMBER_DCL)
+{
+    if (contain_pointers (oo))
+    {
+        size_t total_refs = 0;
+        size_t s = size (oo);
+        go_through_object_nostart (method_table(oo), oo, s, po,
+                          {
+                            uint8_t* o = *po;
+                            total_refs++;
+                            background_mark_object (o THREAD_NUMBER_ARG);
+                          }
+            );
+
+        dprintf (3,("Background marking through %Ix went through %Id refs", 
+                          (size_t)oo,
+                           total_refs));
+    }
+}
+
+uint8_t* gc_heap::background_seg_end (heap_segment* seg, BOOL concurrent_p)
+{
+    if (concurrent_p && (seg == saved_overflow_ephemeral_seg))
+    {
+        // for now we stop at where gen1 started when we started processing 
+        return background_min_soh_overflow_address;
+    }
+    else
+    {
+        return heap_segment_allocated (seg);
+    }
+}
+
+uint8_t* gc_heap::background_first_overflow (uint8_t* min_add,
+                                          heap_segment* seg,
+                                          BOOL concurrent_p, 
+                                          BOOL small_object_p)
+{
+    uint8_t* o = 0;
+
+    if (small_object_p)
+    {
+        if (in_range_for_segment (min_add, seg))
+        {
+            // min_add was the beginning of gen1 when we did the concurrent
+            // overflow. Now we could be in a situation where min_add is
+            // actually the same as allocated for that segment (because
+            // we expanded heap), in which case we can not call 
+            // find first on this address or we will AV.
+            if (min_add >= heap_segment_allocated (seg))
+            {
+                return min_add;
+            }
+            else
+            {
+                if (concurrent_p && 
+                    ((seg == saved_overflow_ephemeral_seg) && (min_add >= background_min_soh_overflow_address)))
+                {
+                    return background_min_soh_overflow_address;
+                }
+                else
+                {
+                    o = find_first_object (min_add, heap_segment_mem (seg));
+                    return o;
+                }
+            }
+        }
+    }
+
+    o = max (heap_segment_mem (seg), min_add);
+    return o;
+}
+
+void gc_heap::background_process_mark_overflow_internal (int condemned_gen_number,
+                                                         uint8_t* min_add, uint8_t* max_add,
+                                                         BOOL concurrent_p)
+{
+    if (concurrent_p)
+    {
+        current_bgc_state = bgc_overflow_soh;
+    }
+
+    size_t total_marked_objects = 0;
+
+#ifdef MULTIPLE_HEAPS
+    int thread = heap_number;
+#endif //MULTIPLE_HEAPS
+
+    exclusive_sync* loh_alloc_lock = 0;
+
+    dprintf (2,("Processing Mark overflow [%Ix %Ix]", (size_t)min_add, (size_t)max_add));
+#ifdef MULTIPLE_HEAPS
+    // We don't have each heap scan all heaps concurrently because we are worried about
+    // multiple threads calling things like find_first_object.
+    int h_start = (concurrent_p ? heap_number : 0);
+    int h_end = (concurrent_p ? (heap_number + 1) : n_heaps);
+    for (int hi = h_start; hi < h_end; hi++)
+    {
+        gc_heap*  hp = (concurrent_p ? this : g_heaps [(heap_number + hi) % n_heaps]);
+
+#else
+    {
+        gc_heap*  hp = 0;
+
+#endif //MULTIPLE_HEAPS
+        BOOL small_object_segments = TRUE;
+        int align_const = get_alignment_constant (small_object_segments);
+        generation* gen = hp->generation_of (condemned_gen_number);
+        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+        PREFIX_ASSUME(seg != NULL);
+        loh_alloc_lock = hp->bgc_alloc_lock;
+
+        uint8_t* o = hp->background_first_overflow (min_add,
+                                                    seg, 
+                                                    concurrent_p, 
+                                                    small_object_segments);
+
+        while (1)
+        {
+            while ((o < hp->background_seg_end (seg, concurrent_p)) && (o <= max_add))
+            {
+                dprintf (3, ("considering %Ix", (size_t)o));
+
+                size_t s;
+
+                if (concurrent_p && !small_object_segments)
+                {
+                    loh_alloc_lock->bgc_mark_set (o);
+
+                    if (((CObjectHeader*)o)->IsFree())
+                    {
+                        s = unused_array_size (o);
+                    }
+                    else
+                    {
+                        s = size (o);
+                    }
+                }
+                else
+                {
+                    s = size (o);
+                }
+
+                if (background_object_marked (o, FALSE) && contain_pointers_or_collectible (o))
+                {
+                    total_marked_objects++;
+                    go_through_object_cl (method_table(o), o, s, poo,
+                                          uint8_t* oo = *poo;
+                                          background_mark_object (oo THREAD_NUMBER_ARG);
+                                         );
+                }
+
+                if (concurrent_p && !small_object_segments)
+                {
+                    loh_alloc_lock->bgc_mark_done ();
+                }
+
+                o = o + Align (s, align_const);
+
+                if (concurrent_p)
+                {
+                    allow_fgc();
+                }
+            }
+
+            dprintf (2, ("went through overflow objects in segment %Ix (%d) (so far %Id marked)", 
+                heap_segment_mem (seg), (small_object_segments ? 0 : 1), total_marked_objects));
+
+            if ((concurrent_p && (seg == hp->saved_overflow_ephemeral_seg)) ||
+                (seg = heap_segment_next_in_range (seg)) == 0)
+            {
+                if (small_object_segments)
+                {
+                    if (concurrent_p)
+                    {
+                        current_bgc_state = bgc_overflow_loh;
+                    }
+
+                    dprintf (2, ("h%d: SOH: ov-mo: %Id", heap_number, total_marked_objects));
+                    fire_overflow_event (min_add, max_add, total_marked_objects, !small_object_segments);
+                    concurrent_print_time_delta (concurrent_p ? "Cov SOH" : "Nov SOH");
+                    total_marked_objects = 0;
+                    small_object_segments = FALSE;
+                    align_const = get_alignment_constant (small_object_segments);
+                    seg = heap_segment_in_range (generation_start_segment (hp->generation_of (max_generation+1)));
+
+                    PREFIX_ASSUME(seg != NULL);
+
+                    o = max (heap_segment_mem (seg), min_add);
+                    continue;
+                }
+                else
+                {
+                    dprintf (GTC_LOG, ("h%d: LOH: ov-mo: %Id", heap_number, total_marked_objects));
+                    fire_overflow_event (min_add, max_add, total_marked_objects, !small_object_segments);
+                    break;
+                }
+            } 
+            else
+            {
+                o = hp->background_first_overflow (min_add, 
+                                                   seg, 
+                                                   concurrent_p, 
+                                                   small_object_segments);
+                continue;
+            }
+        }
+    }
+}
+
+BOOL gc_heap::background_process_mark_overflow (BOOL concurrent_p)
+{
+    BOOL grow_mark_array_p = TRUE;
+
+    if (concurrent_p)
+    {
+        assert (!processed_soh_overflow_p);
+
+        if ((background_max_overflow_address != 0) &&
+            (background_min_overflow_address != MAX_PTR))
+        {
+            // We have overflow to process but we know we can't process the ephemeral generations
+            // now (we actually could process till the current gen1 start but since we are going to 
+            // make overflow per segment, for now I'll just stop at the saved gen1 start.
+            saved_overflow_ephemeral_seg = ephemeral_heap_segment;
+            background_max_soh_overflow_address = heap_segment_reserved (saved_overflow_ephemeral_seg);
+            background_min_soh_overflow_address = generation_allocation_start (generation_of (max_generation-1));
+        }
+    }
+    else
+    {
+        assert ((saved_overflow_ephemeral_seg == 0) || 
+                ((background_max_soh_overflow_address != 0) &&
+                 (background_min_soh_overflow_address != MAX_PTR)));
+        
+        if (!processed_soh_overflow_p)
+        {
+            // if there was no more overflow we just need to process what we didn't process 
+            // on the saved ephemeral segment.
+            if ((background_max_overflow_address == 0) && (background_min_overflow_address == MAX_PTR))
+            {
+                dprintf (2, ("final processing mark overflow - no more overflow since last time"));
+                grow_mark_array_p = FALSE;
+            }
+
+            background_min_overflow_address = min (background_min_overflow_address, 
+                                                background_min_soh_overflow_address);
+            background_max_overflow_address = max (background_max_overflow_address,
+                                                background_max_soh_overflow_address);
+            processed_soh_overflow_p = TRUE;
+        }
+    }
+
+    BOOL  overflow_p = FALSE;
+recheck:
+    if ((! ((background_max_overflow_address == 0)) ||
+         ! ((background_min_overflow_address == MAX_PTR))))
+    {
+        overflow_p = TRUE;
+
+        if (grow_mark_array_p)
+        {
+            // Try to grow the array.
+            size_t new_size = max (MARK_STACK_INITIAL_LENGTH, 2*background_mark_stack_array_length);
+
+            if ((new_size * sizeof(mark)) > 100*1024)
+            {
+                size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark);
+
+                new_size = min(new_max_size, new_size);
+            }
+
+            if ((background_mark_stack_array_length < new_size) && 
+                ((new_size - background_mark_stack_array_length) > (background_mark_stack_array_length / 2)))
+            {
+                dprintf (2, ("h%d: ov grow to %Id", heap_number, new_size));
+
+                uint8_t** tmp = new (nothrow) uint8_t* [new_size];
+                if (tmp)
+                {
+                    delete background_mark_stack_array;
+                    background_mark_stack_array = tmp;
+                    background_mark_stack_array_length = new_size;
+                    background_mark_stack_tos = background_mark_stack_array;
+                }
+            }
+        }
+        else
+        {
+            grow_mark_array_p = TRUE;
+        }
+
+        uint8_t*  min_add = background_min_overflow_address;
+        uint8_t*  max_add = background_max_overflow_address;
+
+        background_max_overflow_address = 0;
+        background_min_overflow_address = MAX_PTR;
+
+        background_process_mark_overflow_internal (max_generation, min_add, max_add, concurrent_p);
+        if (!concurrent_p)
+        {        
+            goto recheck;
+        }
+    }
+
+    return overflow_p;
+}
+
+#endif //BACKGROUND_GC
+
+inline
+void gc_heap::mark_through_object (uint8_t* oo, BOOL mark_class_object_p THREAD_NUMBER_DCL)
+{
+#ifndef COLLECTIBLE_CLASS
+    UNREFERENCED_PARAMETER(mark_class_object_p);
+    BOOL to_mark_class_object = FALSE;
+#else //COLLECTIBLE_CLASS
+    BOOL to_mark_class_object = (mark_class_object_p && (is_collectible(oo)));
+#endif //COLLECTIBLE_CLASS
+    if (contain_pointers (oo) || to_mark_class_object)
+    {
+        dprintf(3,( "Marking through %Ix", (size_t)oo));
+        size_t s = size (oo);
+
+#ifdef COLLECTIBLE_CLASS
+        if (to_mark_class_object)
+        {
+            uint8_t* class_obj = get_class_object (oo);
+            mark_object (class_obj THREAD_NUMBER_ARG);
+        }
+#endif //COLLECTIBLE_CLASS
+
+        if (contain_pointers (oo))
+        {
+            go_through_object_nostart (method_table(oo), oo, s, po,
+                                uint8_t* o = *po;
+                                mark_object (o THREAD_NUMBER_ARG);
+                                );
+        }
+    }
+}
+
+size_t gc_heap::get_total_heap_size()
+{
+    size_t total_heap_size = 0;
+
+#ifdef MULTIPLE_HEAPS
+    int hn = 0;
+
+    for (hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp2 = gc_heap::g_heaps [hn];
+        total_heap_size += hp2->generation_size (max_generation + 1) + hp2->generation_sizes (hp2->generation_of (max_generation));
+    }
+#else
+    total_heap_size = generation_size (max_generation + 1) + generation_sizes (generation_of (max_generation));
+#endif //MULTIPLE_HEAPS
+
+    return total_heap_size;
+}
+
+size_t gc_heap::committed_size()
+{
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+    size_t total_committed = 0;
+
+    while (1)
+    {
+        total_committed += heap_segment_committed (seg) - (uint8_t*)seg;
+
+        seg = heap_segment_next (seg);
+        if (!seg)
+        {
+            if (gen != large_object_generation)
+            {
+                gen = generation_of (max_generation + 1);
+                seg = generation_start_segment (gen);
+            }
+            else
+                break;
+        }
+    }
+
+    return total_committed;
+}
+
+size_t gc_heap::get_total_committed_size()
+{
+    size_t total_committed = 0;
+
+#ifdef MULTIPLE_HEAPS
+    int hn = 0;
+
+    for (hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        total_committed += hp->committed_size();
+    }
+#else
+    total_committed = committed_size();
+#endif //MULTIPLE_HEAPS
+
+    return total_committed;
+}
+
+void gc_heap::get_memory_info (uint32_t* memory_load, 
+                               uint64_t* available_physical,
+                               uint64_t* available_page_file)
+{
+    GCToOSInterface::GetMemoryStatus(memory_load, available_physical, available_page_file);
+}
+
+void fire_mark_event (int heap_num, int root_type, size_t bytes_marked)
+{
+    dprintf (DT_LOG_0, ("-----------[%d]mark %d: %Id", heap_num, root_type, bytes_marked));
+    FireEtwGCMarkWithType (heap_num, GetClrInstanceId(), root_type, bytes_marked);
+}
+
+//returns TRUE is an overflow happened.
+BOOL gc_heap::process_mark_overflow(int condemned_gen_number)
+{
+    size_t last_promoted_bytes = promoted_bytes (heap_number);
+    BOOL  overflow_p = FALSE;
+recheck:
+    if ((! (max_overflow_address == 0) ||
+         ! (min_overflow_address == MAX_PTR)))
+    {
+        overflow_p = TRUE;
+        // Try to grow the array.
+        size_t new_size =
+            max (MARK_STACK_INITIAL_LENGTH, 2*mark_stack_array_length);
+
+        if ((new_size * sizeof(mark)) > 100*1024)
+        {
+            size_t new_max_size = (get_total_heap_size() / 10) / sizeof(mark);
+
+            new_size = min(new_max_size, new_size);
+        }
+
+        if ((mark_stack_array_length < new_size) && 
+            ((new_size - mark_stack_array_length) > (mark_stack_array_length / 2)))
+        {
+            mark* tmp = new (nothrow) mark [new_size];
+            if (tmp)
+            {
+                delete mark_stack_array;
+                mark_stack_array = tmp;
+                mark_stack_array_length = new_size;
+            }
+        }
+
+        uint8_t*  min_add = min_overflow_address;
+        uint8_t*  max_add = max_overflow_address;
+        max_overflow_address = 0;
+        min_overflow_address = MAX_PTR;
+        process_mark_overflow_internal (condemned_gen_number, min_add, max_add);
+        goto recheck;
+    }
+
+    size_t current_promoted_bytes = promoted_bytes (heap_number);
+
+    if (current_promoted_bytes != last_promoted_bytes)
+        fire_mark_event (heap_number, ETW::GC_ROOT_OVERFLOW, (current_promoted_bytes - last_promoted_bytes));
+    return overflow_p;
+}
+
+void gc_heap::process_mark_overflow_internal (int condemned_gen_number,
+                                              uint8_t* min_add, uint8_t* max_add)
+{
+#ifdef MULTIPLE_HEAPS
+    int thread = heap_number;
+#endif //MULTIPLE_HEAPS
+    BOOL  full_p = (condemned_gen_number == max_generation);
+
+        dprintf(3,("Processing Mark overflow [%Ix %Ix]", (size_t)min_add, (size_t)max_add));
+#ifdef MULTIPLE_HEAPS
+            for (int hi = 0; hi < n_heaps; hi++)
+            {
+                gc_heap*  hp = g_heaps [(heap_number + hi) % n_heaps];
+
+#else
+            {
+                gc_heap*  hp = 0;
+
+#endif //MULTIPLE_HEAPS
+        BOOL small_object_segments = TRUE;
+        int align_const = get_alignment_constant (small_object_segments);
+        generation* gen = hp->generation_of (condemned_gen_number);
+        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+        
+        PREFIX_ASSUME(seg != NULL);
+        uint8_t*  o = max (heap_segment_mem (seg), min_add);
+        while (1)
+        {
+            uint8_t*  end = heap_segment_allocated (seg);
+
+            while ((o < end) && (o <= max_add))
+            {
+                assert ((min_add <= o) && (max_add >= o));
+                dprintf (3, ("considering %Ix", (size_t)o));
+                if (marked (o))
+                {
+                    mark_through_object (o, TRUE THREAD_NUMBER_ARG);
+                }
+
+                o = o + Align (size (o), align_const);
+            }
+
+            if (( seg = heap_segment_next_in_range (seg)) == 0)
+            {
+                if (small_object_segments && full_p)
+                {
+                    small_object_segments = FALSE;
+                    align_const = get_alignment_constant (small_object_segments);
+                    seg = heap_segment_in_range (generation_start_segment (hp->generation_of (max_generation+1)));
+
+                    PREFIX_ASSUME(seg != NULL);
+
+                    o = max (heap_segment_mem (seg), min_add);
+                    continue;
+                }
+                else
+                {
+                    break;
+                } 
+            } 
+            else
+            {
+                o = max (heap_segment_mem (seg), min_add);
+                continue;
+            }
+        }
+    }
+}
+
+// Scanning for promotion for dependent handles need special handling. Because the primary holds a strong
+// reference to the secondary (when the primary itself is reachable) and this can cause a cascading series of
+// promotions (the secondary of one handle is or promotes the primary of another) we might need to perform the
+// promotion scan multiple times.
+// This helper encapsulates the logic to complete all dependent handle promotions when running a server GC. It
+// also has the effect of processing any mark stack overflow.
+
+#ifdef MULTIPLE_HEAPS
+// When multiple heaps are enabled we have must utilize a more complex algorithm in order to keep all the GC
+// worker threads synchronized. The algorithms are sufficiently divergent that we have different
+// implementations based on whether MULTIPLE_HEAPS is defined or not.
+//
+// Define some static variables used for synchronization in the method below. These should really be defined
+// locally but MSVC complains when the VOLATILE macro is expanded into an instantiation of the Volatile class.
+//
+// A note about the synchronization used within this method. Communication between the worker threads is
+// achieved via two shared booleans (defined below). These both act as latches that are transitioned only from
+// false -> true by unsynchronized code. They are only read or reset to false by a single thread under the
+// protection of a join.
+static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE;
+static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE;
+static VOLATILE(BOOL) s_fScanRequired;
+void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p)
+{
+    // Whenever we call this method there may have been preceding object promotions. So set
+    // s_fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
+    // based on the how the scanning proceeded).
+    s_fUnscannedPromotions = TRUE;
+
+    // We don't know how many times we need to loop yet. In particular we can't base the loop condition on
+    // the state of this thread's portion of the dependent handle table. That's because promotions on other
+    // threads could cause handle promotions to become necessary here. Even if there are definitely no more
+    // promotions possible in this thread's handles, we still have to stay in lock-step with those worker
+    // threads that haven't finished yet (each GC worker thread has to join exactly the same number of times
+    // as all the others or they'll get out of step).
+    while (true)
+    {
+        // The various worker threads are all currently racing in this code. We need to work out if at least
+        // one of them think they have work to do this cycle. Each thread needs to rescan its portion of the
+        // dependent handle table when both of the following conditions apply:
+        //  1) At least one (arbitrary) object might have been promoted since the last scan (because if this
+        //     object happens to correspond to a primary in one of our handles we might potentially have to
+        //     promote the associated secondary).
+        //  2) The table for this thread has at least one handle with a secondary that isn't promoted yet.
+        //
+        // The first condition is represented by s_fUnscannedPromotions. This is always non-zero for the first
+        // iteration of this loop (see comment above) and in subsequent cycles each thread updates this
+        // whenever a mark stack overflow occurs or scanning their dependent handles results in a secondary
+        // being promoted. This value is cleared back to zero in a synchronized fashion in the join that
+        // follows below. Note that we can't read this outside of the join since on any iteration apart from
+        // the first threads will be racing between reading this value and completing their previous
+        // iteration's table scan.
+        //
+        // The second condition is tracked by the dependent handle code itself on a per worker thread basis
+        // (and updated by the GcDhReScan() method). We call GcDhUnpromotedHandlesExist() on each thread to
+        // determine the local value and collect the results into the s_fUnpromotedHandles variable in what is
+        // effectively an OR operation. As per s_fUnscannedPromotions we can't read the final result until
+        // we're safely joined.
+        if (GCScan::GcDhUnpromotedHandlesExist(sc))
+            s_fUnpromotedHandles = TRUE;
+
+        // Synchronize all the threads so we can read our state variables safely. The shared variable
+        // s_fScanRequired, indicating whether we should scan the tables or terminate the loop, will be set by
+        // a single thread inside the join.
+        gc_t_join.join(this, gc_join_scan_dependent_handles);
+        if (gc_t_join.joined())
+        {
+            // We're synchronized so it's safe to read our shared state variables. We update another shared
+            // variable to indicate to all threads whether we'll be scanning for another cycle or terminating
+            // the loop. We scan if there has been at least one object promotion since last time and at least
+            // one thread has a dependent handle table with a potential handle promotion possible.
+            s_fScanRequired = s_fUnscannedPromotions && s_fUnpromotedHandles;
+
+            // Reset our shared state variables (ready to be set again on this scan or with a good initial
+            // value for the next call if we're terminating the loop).
+            s_fUnscannedPromotions = FALSE;
+            s_fUnpromotedHandles = FALSE;
+
+            if (!s_fScanRequired)
+            {
+                // We're terminating the loop. Perform any last operations that require single threaded access.
+                if (!initial_scan_p)
+                {
+                    // On the second invocation we reconcile all mark overflow ranges across the heaps. This can help
+                    // load balance if some of the heaps have an abnormally large workload.
+                    uint8_t* all_heaps_max = 0;
+                    uint8_t* all_heaps_min = MAX_PTR;
+                    int i;
+                    for (i = 0; i < n_heaps; i++)
+                    {
+                        if (all_heaps_max < g_heaps[i]->max_overflow_address)
+                            all_heaps_max = g_heaps[i]->max_overflow_address;
+                        if (all_heaps_min > g_heaps[i]->min_overflow_address)
+                            all_heaps_min = g_heaps[i]->min_overflow_address;
+                    }
+                    for (i = 0; i < n_heaps; i++)
+                    {
+                        g_heaps[i]->max_overflow_address = all_heaps_max;
+                        g_heaps[i]->min_overflow_address = all_heaps_min;
+                    }
+                }
+            }
+
+            // Restart all the workers.
+            dprintf(3, ("Starting all gc thread mark stack overflow processing"));
+            gc_t_join.restart();
+        }
+
+        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
+        // being visible. If there really was an overflow (process_mark_overflow returns true) then set the
+        // global flag indicating that at least one object promotion may have occurred (the usual comment
+        // about races applies). (Note it's OK to set this flag even if we're about to terminate the loop and
+        // exit the method since we unconditionally set this variable on method entry anyway).
+        if (process_mark_overflow(condemned_gen_number))
+            s_fUnscannedPromotions = TRUE;
+
+        // If we decided that no scan was required we can terminate the loop now.
+        if (!s_fScanRequired)
+            break;
+
+        // Otherwise we must join with the other workers to ensure that all mark stack overflows have been
+        // processed before we start scanning dependent handle tables (if overflows remain while we scan we
+        // could miss noting the promotion of some primary objects).
+        gc_t_join.join(this, gc_join_rescan_dependent_handles);
+        if (gc_t_join.joined())
+        {
+            // Restart all the workers.
+            dprintf(3, ("Starting all gc thread for dependent handle promotion"));
+            gc_t_join.restart();
+        }
+
+        // If the portion of the dependent handle table managed by this worker has handles that could still be
+        // promoted perform a rescan. If the rescan resulted in at least one promotion note this fact since it
+        // could require a rescan of handles on this or other workers.
+        if (GCScan::GcDhUnpromotedHandlesExist(sc))
+            if (GCScan::GcDhReScan(sc))
+                s_fUnscannedPromotions = TRUE;
+    }
+}
+#else //MULTIPLE_HEAPS
+// Non-multiple heap version of scan_dependent_handles: much simpler without the need to keep multiple worker
+// threads synchronized.
+void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p)
+{
+    UNREFERENCED_PARAMETER(initial_scan_p);
+
+    // Whenever we call this method there may have been preceding object promotions. So set
+    // fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
+    // based on the how the scanning proceeded).
+    bool fUnscannedPromotions = true;
+
+    // Loop until there are either no more dependent handles that can have their secondary promoted or we've
+    // managed to perform a scan without promoting anything new.
+    while (GCScan::GcDhUnpromotedHandlesExist(sc) && fUnscannedPromotions)
+    {
+        // On each iteration of the loop start with the assumption that no further objects have been promoted.
+        fUnscannedPromotions = false;
+
+        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
+        // being visible. If there was an overflow (process_mark_overflow returned true) then additional
+        // objects now appear to be promoted and we should set the flag.
+        if (process_mark_overflow(condemned_gen_number))
+            fUnscannedPromotions = true;
+
+        // Perform the scan and set the flag if any promotions resulted.
+        if (GCScan::GcDhReScan(sc))
+            fUnscannedPromotions = true;
+    }
+
+    // Process any mark stack overflow that may have resulted from scanning handles (or if we didn't need to
+    // scan any handles at all this is the processing of overflows that may have occurred prior to this method
+    // invocation).
+    process_mark_overflow(condemned_gen_number);
+}
+#endif //MULTIPLE_HEAPS
+
+void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
+{
+    assert (settings.concurrent == FALSE);
+
+    ScanContext sc;
+    sc.thread_number = heap_number;
+    sc.promotion = TRUE;
+    sc.concurrent = FALSE;
+
+    dprintf(2,("---- Mark Phase condemning %d ----", condemned_gen_number));
+    BOOL  full_p = (condemned_gen_number == max_generation);
+
+#ifdef TIME_GC
+    unsigned start;
+    unsigned finish;
+    start = GetCycleCount32();
+#endif //TIME_GC
+
+    int gen_to_init = condemned_gen_number;
+    if (condemned_gen_number == max_generation)
+    {
+        gen_to_init = max_generation + 1;
+    }
+    for (int gen_idx = 0; gen_idx <= gen_to_init; gen_idx++)
+    {
+        dynamic_data* dd = dynamic_data_of (gen_idx);
+        dd_begin_data_size (dd) = generation_size (gen_idx) - 
+                                   dd_fragmentation (dd) -
+                                   Align (size (generation_allocation_start (generation_of (gen_idx))));
+        dprintf (2, ("begin data size for gen%d is %Id", gen_idx, dd_begin_data_size (dd)));
+        dd_survived_size (dd) = 0;
+        dd_pinned_survived_size (dd) = 0;
+        dd_artificial_pinned_survived_size (dd) = 0;
+        dd_added_pinned_size (dd) = 0;
+#ifdef SHORT_PLUGS
+        dd_padding_size (dd) = 0;
+#endif //SHORT_PLUGS
+#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
+        dd_num_npinned_plugs (dd) = 0;
+#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
+    }
+
+#ifdef FFIND_OBJECT
+    if (gen0_must_clear_bricks > 0)
+        gen0_must_clear_bricks--;
+#endif //FFIND_OBJECT
+
+    size_t last_promoted_bytes = 0;
+
+    promoted_bytes (heap_number) = 0;
+    reset_mark_stack();
+
+#ifdef SNOOP_STATS
+    memset (&snoop_stat, 0, sizeof(snoop_stat));
+    snoop_stat.heap_index = heap_number;
+#endif //SNOOP_STATS
+
+#ifdef MH_SC_MARK
+    if (full_p)
+    {
+        //initialize the mark stack
+        for (int i = 0; i < max_snoop_level; i++)
+        {
+            ((uint8_t**)(mark_stack_array))[i] = 0;
+        }
+
+        mark_stack_busy() = 1;
+    }
+#endif //MH_SC_MARK
+
+    static uint32_t num_sizedrefs = 0;
+
+#ifdef MH_SC_MARK
+    static BOOL do_mark_steal_p = FALSE;
+#endif //MH_SC_MARK
+
+#ifdef MULTIPLE_HEAPS
+    gc_t_join.join(this, gc_join_begin_mark_phase);
+    if (gc_t_join.joined())
+    {
+#endif //MULTIPLE_HEAPS
+
+        num_sizedrefs = SystemDomain::System()->GetTotalNumSizedRefHandles();
+
+#ifdef MULTIPLE_HEAPS
+
+#ifdef MH_SC_MARK
+        if (full_p)
+        {
+            size_t total_heap_size = get_total_heap_size();
+
+            if (total_heap_size > (100 * 1024 * 1024))
+            {
+                do_mark_steal_p = TRUE;
+            }
+            else
+            {
+                do_mark_steal_p = FALSE;
+            }
+        }
+        else
+        {
+            do_mark_steal_p = FALSE;
+        }
+#endif //MH_SC_MARK
+
+        gc_t_join.restart();
+    }
+#endif //MULTIPLE_HEAPS
+
+    {
+
+#ifdef MARK_LIST
+        //set up the mark lists from g_mark_list
+        assert (g_mark_list);
+#ifdef MULTIPLE_HEAPS
+        mark_list = &g_mark_list [heap_number*mark_list_size];
+#else
+        mark_list = g_mark_list;
+#endif //MULTIPLE_HEAPS
+        //dont use the mark list for full gc
+        //because multiple segments are more complex to handle and the list
+        //is likely to overflow
+        if (condemned_gen_number != max_generation)
+            mark_list_end = &mark_list [mark_list_size-1];
+        else
+            mark_list_end = &mark_list [0];
+        mark_list_index = &mark_list [0];
+#endif //MARK_LIST
+
+        shigh = (uint8_t*) 0;
+        slow  = MAX_PTR;
+
+        //%type%  category = quote (mark);
+
+        if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0))
+        {
+            GCScan::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
+            fire_mark_event (heap_number, ETW::GC_ROOT_SIZEDREF, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
+
+#ifdef MULTIPLE_HEAPS
+            gc_t_join.join(this, gc_join_scan_sizedref_done);
+            if (gc_t_join.joined())
+            {
+                dprintf(3, ("Done with marking all sized refs. Starting all gc thread for marking other strong roots"));
+                gc_t_join.restart();
+            }
+#endif //MULTIPLE_HEAPS
+        }
+    
+        dprintf(3,("Marking Roots"));
+
+        GCScan::GcScanRoots(GCHeap::Promote,
+                                condemned_gen_number, max_generation,
+                                &sc);
+
+        fire_mark_event (heap_number, ETW::GC_ROOT_STACK, (promoted_bytes (heap_number) - last_promoted_bytes));
+        last_promoted_bytes = promoted_bytes (heap_number);
+
+#ifdef BACKGROUND_GC
+        if (recursive_gc_sync::background_running_p())
+        {
+            scan_background_roots (GCHeap::Promote, heap_number, &sc);
+        }
+#endif //BACKGROUND_GC
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+        dprintf(3, ("Marking finalization data"));
+        finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0);
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+        fire_mark_event (heap_number, ETW::GC_ROOT_FQ, (promoted_bytes (heap_number) - last_promoted_bytes));
+        last_promoted_bytes = promoted_bytes (heap_number);
+
+// MTHTS
+        {
+
+            dprintf(3,("Marking handle table"));
+            GCScan::GcScanHandles(GCHeap::Promote,
+                                      condemned_gen_number, max_generation,
+                                      &sc);
+            fire_mark_event (heap_number, ETW::GC_ROOT_HANDLES, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
+        }
+
+#ifdef TRACE_GC
+        size_t promoted_before_cards = promoted_bytes (heap_number);
+#endif //TRACE_GC
+
+        dprintf (3, ("before cards: %Id", promoted_before_cards));
+        if (!full_p)
+        {
+#ifdef CARD_BUNDLE
+#ifdef MULTIPLE_HEAPS
+            if (gc_t_join.r_join(this, gc_r_join_update_card_bundle))
+            {
+#endif //MULTIPLE_HEAPS
+
+                update_card_table_bundle ();
+
+#ifdef MULTIPLE_HEAPS
+                gc_t_join.r_restart();
+            }
+#endif //MULTIPLE_HEAPS
+#endif //CARD_BUNDLE
+
+            card_fn mark_object_fn = &gc_heap::mark_object_simple;
+#ifdef HEAP_ANALYZE
+            heap_analyze_success = TRUE;
+            if (heap_analyze_enabled)
+            {
+                internal_root_array_index = 0;
+                current_obj = 0;
+                current_obj_size = 0;
+                mark_object_fn = &gc_heap::ha_mark_object_simple;
+            }
+#endif //HEAP_ANALYZE
+
+            dprintf(3,("Marking cross generation pointers"));
+            mark_through_cards_for_segments (mark_object_fn, FALSE);
+
+            dprintf(3,("Marking cross generation pointers for large objects"));
+            mark_through_cards_for_large_objects (mark_object_fn, FALSE);
+
+            dprintf (3, ("marked by cards: %Id", 
+                (promoted_bytes (heap_number) - promoted_before_cards)));
+            fire_mark_event (heap_number, ETW::GC_ROOT_OLDER, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
+        }
+    }
+
+#ifdef MH_SC_MARK
+    if (do_mark_steal_p)
+    {
+        mark_steal();
+    }
+#endif //MH_SC_MARK
+
+    // Dependent handles need to be scanned with a special algorithm (see the header comment on
+    // scan_dependent_handles for more detail). We perform an initial scan without synchronizing with other
+    // worker threads or processing any mark stack overflow. This is not guaranteed to complete the operation
+    // but in a common case (where there are no dependent handles that are due to be collected) it allows us
+    // to optimize away further scans. The call to scan_dependent_handles is what will cycle through more
+    // iterations if required and will also perform processing of any mark stack overflow once the dependent
+    // handle table has been fully promoted.
+    GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
+    scan_dependent_handles(condemned_gen_number, &sc, true);
+
+#ifdef MULTIPLE_HEAPS
+    dprintf(3, ("Joining for short weak handle scan"));
+    gc_t_join.join(this, gc_join_null_dead_short_weak);
+    if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+#ifdef HEAP_ANALYZE
+        heap_analyze_enabled = FALSE;
+        DACNotifyGcMarkEnd(condemned_gen_number);
+#endif // HEAP_ANALYZE
+        GCToEEInterface::AfterGcScanRoots (condemned_gen_number, max_generation, &sc);
+
+#ifdef MULTIPLE_HEAPS
+        if (!full_p)
+        {
+            // we used r_join and need to reinitialize states for it here.
+            gc_t_join.r_init();
+        }
+
+        //start all threads on the roots.
+        dprintf(3, ("Starting all gc thread for short weak handle scan"));
+        gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+
+    }
+
+    // null out the target of short weakref that were not promoted.
+    GCScan::GcShortWeakPtrScan(GCHeap::Promote, condemned_gen_number, max_generation,&sc);
+
+// MTHTS: keep by single thread
+#ifdef MULTIPLE_HEAPS
+    dprintf(3, ("Joining for finalization"));
+    gc_t_join.join(this, gc_join_scan_finalization);
+    if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+
+    {
+#ifdef MULTIPLE_HEAPS
+        //start all threads on the roots.
+        dprintf(3, ("Starting all gc thread for Finalization"));
+        gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    //Handle finalization.
+    size_t promoted_bytes_live = promoted_bytes (heap_number);
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    dprintf (3, ("Finalize marking"));
+    finalize_queue->ScanForFinalization (GCHeap::Promote, condemned_gen_number, mark_only_p, __this);
+
+#ifdef GC_PROFILING
+    if (CORProfilerTrackGC())
+    {
+        finalize_queue->WalkFReachableObjects (__this);
+    }
+#endif //GC_PROFILING
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+    // Scan dependent handles again to promote any secondaries associated with primaries that were promoted
+    // for finalization. As before scan_dependent_handles will also process any mark stack overflow.
+    scan_dependent_handles(condemned_gen_number, &sc, false);
+
+#ifdef MULTIPLE_HEAPS
+    dprintf(3, ("Joining for weak pointer deletion"));
+    gc_t_join.join(this, gc_join_null_dead_long_weak);
+    if (gc_t_join.joined())
+    {
+        //start all threads on the roots.
+        dprintf(3, ("Starting all gc thread for weak pointer deletion"));
+        gc_t_join.restart();
+    }
+#endif //MULTIPLE_HEAPS
+
+    // null out the target of long weakref that were not promoted.
+    GCScan::GcWeakPtrScan (GCHeap::Promote, condemned_gen_number, max_generation, &sc);
+
+// MTHTS: keep by single thread
+#ifdef MULTIPLE_HEAPS
+#ifdef MARK_LIST
+#ifdef PARALLEL_MARK_LIST_SORT
+//    unsigned long start = GetCycleCount32();
+    sort_mark_list();
+//    printf("sort_mark_list took %u cycles\n", GetCycleCount32() - start);
+#endif //PARALLEL_MARK_LIST_SORT
+#endif //MARK_LIST
+
+    dprintf (3, ("Joining for sync block cache entry scanning"));
+    gc_t_join.join(this, gc_join_null_dead_syncblk);
+    if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+        // scan for deleted entries in the syncblk cache
+        GCScan::GcWeakPtrScanBySingleThread (condemned_gen_number, max_generation, &sc);
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+        if (g_fEnableARM)
+        {
+            size_t promoted_all_heaps = 0;
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                promoted_all_heaps += promoted_bytes (i);
+            }
+#else
+            promoted_all_heaps = promoted_bytes (heap_number);
+#endif //MULTIPLE_HEAPS
+            SystemDomain::RecordTotalSurvivedBytes (promoted_all_heaps);
+        }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+#ifdef MULTIPLE_HEAPS
+
+#ifdef MARK_LIST
+#ifndef PARALLEL_MARK_LIST_SORT
+        //compact g_mark_list and sort it.
+        combine_mark_lists();
+#endif //PARALLEL_MARK_LIST_SORT
+#endif //MARK_LIST
+
+        //decide on promotion
+        if (!settings.promotion)
+        {
+            size_t m = 0;
+            for (int n = 0; n <= condemned_gen_number;n++)
+            {
+                m +=  (size_t)(dd_min_gc_size (dynamic_data_of (n))*(n+1)*0.1);
+            }
+
+            for (int i = 0; i < n_heaps; i++)
+            {
+                dynamic_data* dd = g_heaps[i]->dynamic_data_of (min (condemned_gen_number +1,
+                                                                     max_generation));
+                size_t older_gen_size = (dd_current_size (dd) +
+                                         (dd_desired_allocation (dd) -
+                                         dd_new_allocation (dd)));
+
+                if ((m > (older_gen_size)) ||
+                    (promoted_bytes (i) > m))
+                {
+                    settings.promotion = TRUE;
+                }
+            }
+        }
+
+#ifdef SNOOP_STATS
+        if (do_mark_steal_p)
+        {
+            size_t objects_checked_count = 0;
+            size_t zero_ref_count = 0;
+            size_t objects_marked_count = 0;
+            size_t check_level_count = 0;
+            size_t busy_count = 0;
+            size_t interlocked_count = 0;
+            size_t partial_mark_parent_count = 0;
+            size_t stolen_or_pm_count = 0; 
+            size_t stolen_entry_count = 0; 
+            size_t pm_not_ready_count = 0; 
+            size_t normal_count = 0;
+            size_t stack_bottom_clear_count = 0;
+
+            for (int i = 0; i < n_heaps; i++)
+            {
+                gc_heap* hp = g_heaps[i];
+                hp->print_snoop_stat();
+                objects_checked_count += hp->snoop_stat.objects_checked_count;
+                zero_ref_count += hp->snoop_stat.zero_ref_count;
+                objects_marked_count += hp->snoop_stat.objects_marked_count;
+                check_level_count += hp->snoop_stat.check_level_count;
+                busy_count += hp->snoop_stat.busy_count;
+                interlocked_count += hp->snoop_stat.interlocked_count;
+                partial_mark_parent_count += hp->snoop_stat.partial_mark_parent_count;
+                stolen_or_pm_count += hp->snoop_stat.stolen_or_pm_count;
+                stolen_entry_count += hp->snoop_stat.stolen_entry_count;
+                pm_not_ready_count += hp->snoop_stat.pm_not_ready_count;
+                normal_count += hp->snoop_stat.normal_count;
+                stack_bottom_clear_count += hp->snoop_stat.stack_bottom_clear_count;
+            }
+
+            fflush (stdout);
+
+            printf ("-------total stats-------\n");
+            printf ("%8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s | %8s\n", 
+                "checked", "zero", "marked", "level", "busy", "xchg", "pmparent", "s_pm", "stolen", "nready", "normal", "clear");
+            printf ("%8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d | %8d\n",
+                objects_checked_count,
+                zero_ref_count,
+                objects_marked_count,
+                check_level_count,
+                busy_count,
+                interlocked_count,
+                partial_mark_parent_count,
+                stolen_or_pm_count,
+                stolen_entry_count,
+                pm_not_ready_count,
+                normal_count,
+                stack_bottom_clear_count);
+        }
+#endif //SNOOP_STATS
+
+        //start all threads.
+        dprintf(3, ("Starting all threads for end of mark phase"));
+        gc_t_join.restart();
+#else //MULTIPLE_HEAPS
+
+        //decide on promotion
+        if (!settings.promotion)
+        {
+            size_t m = 0;
+            for (int n = 0; n <= condemned_gen_number;n++)
+            {
+                m +=  (size_t)(dd_min_gc_size (dynamic_data_of (n))*(n+1)*0.06);
+            }
+            dynamic_data* dd = dynamic_data_of (min (condemned_gen_number +1,
+                                                     max_generation));
+            size_t older_gen_size = (dd_current_size (dd) +
+                                     (dd_desired_allocation (dd) -
+                                     dd_new_allocation (dd)));
+
+            dprintf (2, ("promotion threshold: %Id, promoted bytes: %Id size n+1: %Id",
+                         m, promoted_bytes (heap_number), older_gen_size));
+
+            if ((m > older_gen_size) ||
+                    (promoted_bytes (heap_number) > m))
+            {
+                settings.promotion = TRUE;
+            }
+        }
+
+#endif //MULTIPLE_HEAPS
+    }
+
+#ifdef MULTIPLE_HEAPS
+#ifdef MARK_LIST
+#ifdef PARALLEL_MARK_LIST_SORT
+//    start = GetCycleCount32();
+    merge_mark_lists();
+//    printf("merge_mark_lists took %u cycles\n", GetCycleCount32() - start);
+#endif //PARALLEL_MARK_LIST_SORT
+#endif //MARK_LIST
+#endif //MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+    total_promoted_bytes = promoted_bytes (heap_number);
+#endif //BACKGROUND_GC
+
+    promoted_bytes (heap_number) -= promoted_bytes_live;
+
+#ifdef TIME_GC
+        finish = GetCycleCount32();
+        mark_time = finish - start;
+#endif //TIME_GC
+
+    dprintf(2,("---- End of mark phase ----"));
+}
+
+inline
+void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject, uint8_t* low, uint8_t* high)
+{
+    dprintf (3, ("Pinning %Ix", (size_t)o));
+    if ((o >= low) && (o < high))
+    {
+        dprintf(3,("^%Ix^", (size_t)o));
+        set_pinned (o);
+
+#ifdef FEATURE_EVENT_TRACE        
+        if(EventEnabledPinObjectAtGCTime())
+        {
+            fire_etw_pin_object_event(o, ppObject);
+        }
+#endif // FEATURE_EVENT_TRACE
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+        num_pinned_objects++;
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+    }
+}
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+size_t gc_heap::get_total_pinned_objects()
+{
+#ifdef MULTIPLE_HEAPS
+    size_t total_num_pinned_objects = 0;
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+        total_num_pinned_objects += hp->num_pinned_objects;
+    }
+    return total_num_pinned_objects;
+#else //MULTIPLE_HEAPS
+    return num_pinned_objects;
+#endif //MULTIPLE_HEAPS
+}
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+void gc_heap::reset_mark_stack ()
+{
+    reset_pinned_queue();
+    max_overflow_address = 0;
+    min_overflow_address = MAX_PTR;
+}
+
+#ifdef FEATURE_STRUCTALIGN
+//
+// The word with left child, right child, and align info is laid out as follows:
+//
+//      |   upper short word   |   lower short word   |
+//      |<------------> <----->|<------------> <----->|
+//      |  left child   info hi| right child   info lo|
+// x86: |    10 bits     6 bits|   10 bits      6 bits|
+//
+// where left/right child are signed values and concat(info hi, info lo) is unsigned.
+//
+// The "align info" encodes two numbers: the required alignment (a power of two)
+// and the misalignment (the number of machine words the destination address needs
+// to be adjusted by to provide alignment - so this number is always smaller than
+// the required alignment).  Thus, the two can be represented as the "logical or"
+// of the two numbers.  Note that the actual pad is computed from the misalignment
+// by adding the alignment iff the misalignment is non-zero and less than min_obj_size.
+//
+
+// The number of bits in a brick.
+#if defined (_TARGET_AMD64_)
+#define brick_bits (12)
+#else
+#define brick_bits (11)
+#endif //_TARGET_AMD64_
+C_ASSERT(brick_size == (1 << brick_bits));
+
+// The number of bits needed to represent the offset to a child node.
+// "brick_bits + 1" allows us to represent a signed offset within a brick.
+#define child_bits (brick_bits + 1 - LOG2_PTRSIZE)
+
+// The number of bits in each of the pad hi, pad lo fields.
+#define pad_bits (sizeof(short) * 8 - child_bits)
+
+#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1))
+#define pad_mask ((1 << pad_bits) - 1)
+#define pad_from_short(w) ((size_t)(w) & pad_mask)
+#else // FEATURE_STRUCTALIGN
+#define child_from_short(w) (w)
+#endif // FEATURE_STRUCTALIGN
+
+inline
+short node_left_child(uint8_t* node)
+{
+    return child_from_short(((plug_and_pair*)node)[-1].m_pair.left);
+}
+
+inline
+void set_node_left_child(uint8_t* node, ptrdiff_t val)
+{
+    assert (val > -(ptrdiff_t)brick_size);
+    assert (val < (ptrdiff_t)brick_size);
+    assert (Aligned (val));
+#ifdef FEATURE_STRUCTALIGN
+    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left);
+    ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
+#else // FEATURE_STRUCTALIGN
+    ((plug_and_pair*)node)[-1].m_pair.left = (short)val;
+#endif // FEATURE_STRUCTALIGN
+    assert (node_left_child (node) == val);
+}
+
+inline
+short node_right_child(uint8_t* node)
+{
+    return child_from_short(((plug_and_pair*)node)[-1].m_pair.right);
+}
+
+inline
+void set_node_right_child(uint8_t* node, ptrdiff_t val)
+{
+    assert (val > -(ptrdiff_t)brick_size);
+    assert (val < (ptrdiff_t)brick_size);
+    assert (Aligned (val));
+#ifdef FEATURE_STRUCTALIGN
+    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right);
+    ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
+#else // FEATURE_STRUCTALIGN
+    ((plug_and_pair*)node)[-1].m_pair.right = (short)val;
+#endif // FEATURE_STRUCTALIGN
+    assert (node_right_child (node) == val);
+}
+
+#ifdef FEATURE_STRUCTALIGN
+void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad)
+{
+    // Extract the single-number aligninfo from the fields.
+    short left = ((plug_and_pair*)node)[-1].m_pair.left;
+    short right = ((plug_and_pair*)node)[-1].m_pair.right;
+    ptrdiff_t pad_shifted = (pad_from_short(left) << pad_bits) | pad_from_short(right);
+    ptrdiff_t aligninfo = pad_shifted * DATA_ALIGNMENT;
+
+    // Replicate the topmost bit into all lower bits.
+    ptrdiff_t x = aligninfo;
+    x |= x >> 8;
+    x |= x >> 4;
+    x |= x >> 2;
+    x |= x >> 1;
+
+    // Clear all bits but the highest.
+    requiredAlignment = (int)(x ^ (x >> 1));
+    pad = aligninfo - requiredAlignment;
+    pad += AdjustmentForMinPadSize(pad, requiredAlignment);
+}
+
+inline
+ptrdiff_t node_alignpad (uint8_t* node)
+{
+    int requiredAlignment;
+    ptrdiff_t alignpad;
+    node_aligninfo (node, requiredAlignment, alignpad);
+    return alignpad;
+}
+
+void clear_node_aligninfo (uint8_t* node)
+{
+    ((plug_and_pair*)node)[-1].m_pair.left &= ~0 << pad_bits;
+    ((plug_and_pair*)node)[-1].m_pair.right &= ~0 << pad_bits;
+}
+
+void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad)
+{
+    // Encode the alignment requirement and alignment offset as a single number
+    // as described above.
+    ptrdiff_t aligninfo = (size_t)requiredAlignment + (pad & (requiredAlignment-1));
+    assert (Aligned (aligninfo));
+    ptrdiff_t aligninfo_shifted = aligninfo / DATA_ALIGNMENT;
+    assert (aligninfo_shifted < (1 << (pad_bits + pad_bits)));
+
+    ptrdiff_t hi = aligninfo_shifted >> pad_bits;
+    assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.left) == 0);
+    ((plug_and_pair*)node)[-1].m_pair.left |= hi;
+
+    ptrdiff_t lo = aligninfo_shifted & pad_mask;
+    assert (pad_from_short(((plug_and_gap*)node)[-1].m_pair.right) == 0);
+    ((plug_and_pair*)node)[-1].m_pair.right |= lo;
+
+#ifdef _DEBUG
+    int requiredAlignment2;
+    ptrdiff_t pad2;
+    node_aligninfo (node, requiredAlignment2, pad2);
+    assert (requiredAlignment == requiredAlignment2);
+    assert (pad == pad2);
+#endif // _DEBUG
+}
+#endif // FEATURE_STRUCTALIGN
+
+inline
+void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
+{
+    ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc);
+    *place = val;
+}
+
+inline
+ptrdiff_t loh_node_relocation_distance(uint8_t* node)
+{
+    return (((loh_obj_and_pad*)node)[-1].reloc);
+}
+
+inline
+ptrdiff_t node_relocation_distance (uint8_t* node)
+{
+    return (((plug_and_reloc*)(node))[-1].reloc & ~3);
+}
+
+inline
+void set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
+{
+    assert (val == (val & ~3));
+    ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc);
+    //clear the left bit and the relocation field
+    *place &= 1;
+    // store the value
+    *place |= val;
+}
+
+#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2)
+
+#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2;
+
+#ifndef FEATURE_STRUCTALIGN
+void set_node_realigned(uint8_t* node)
+{
+    ((plug_and_reloc*)(node))[-1].reloc |= 1;
+}
+
+void clear_node_realigned(uint8_t* node)
+{
+#ifdef RESPECT_LARGE_ALIGNMENT
+    ((plug_and_reloc*)(node))[-1].reloc &= ~1;
+#else
+    UNREFERENCED_PARAMETER(node);
+#endif //RESPECT_LARGE_ALIGNMENT
+}
+#endif // FEATURE_STRUCTALIGN
+
+inline
+size_t  node_gap_size (uint8_t* node)
+{
+    return ((plug_and_gap *)node)[-1].gap;
+}
+
+void set_gap_size (uint8_t* node, size_t size)
+{
+    assert (Aligned (size));
+
+    // clear the 2 uint32_t used by the node.
+    ((plug_and_gap *)node)[-1].reloc = 0;
+    ((plug_and_gap *)node)[-1].lr =0;
+    ((plug_and_gap *)node)[-1].gap = size;
+
+    assert ((size == 0 )||(size >= sizeof(plug_and_reloc)));
+
+}
+
+uint8_t* gc_heap::insert_node (uint8_t* new_node, size_t sequence_number,
+                   uint8_t* tree, uint8_t* last_node)
+{
+    dprintf (3, ("IN: %Ix(%Ix), T: %Ix(%Ix), L: %Ix(%Ix) [%Ix]",
+                 (size_t)new_node, brick_of(new_node), 
+                 (size_t)tree, brick_of(tree), 
+                 (size_t)last_node, brick_of(last_node),
+                 sequence_number));
+    if (power_of_two_p (sequence_number))
+    {
+        set_node_left_child (new_node, (tree - new_node));
+        dprintf (3, ("NT: %Ix, LC->%Ix", (size_t)new_node, (tree - new_node)));
+        tree = new_node;
+    }
+    else
+    {
+        if (oddp (sequence_number))
+        {
+            set_node_right_child (last_node, (new_node - last_node));
+            dprintf (3, ("%Ix RC->%Ix", last_node, (new_node - last_node)));
+        }
+        else
+        {
+            uint8_t*  earlier_node = tree;
+            size_t imax = logcount(sequence_number) - 2;
+            for (size_t i = 0; i != imax; i++)
+            {
+                earlier_node = earlier_node + node_right_child (earlier_node);
+            }
+            int tmp_offset = node_right_child (earlier_node);
+            assert (tmp_offset); // should never be empty
+            set_node_left_child (new_node, ((earlier_node + tmp_offset ) - new_node));
+            set_node_right_child (earlier_node, (new_node - earlier_node));
+
+            dprintf (3, ("%Ix LC->%Ix, %Ix RC->%Ix", 
+                new_node, ((earlier_node + tmp_offset ) - new_node),
+                earlier_node, (new_node - earlier_node)));
+        }
+    }
+    return tree;
+}
+
+size_t gc_heap::update_brick_table (uint8_t* tree, size_t current_brick,
+                                    uint8_t* x, uint8_t* plug_end)
+{
+    dprintf (3, ("tree: %Ix, current b: %Ix, x: %Ix, plug_end: %Ix",
+        tree, current_brick, x, plug_end));
+
+    if (tree > 0)
+    {
+        dprintf (3, ("b- %Ix->%Ix pointing to tree %Ix", 
+            current_brick, (size_t)(tree - brick_address (current_brick)), tree));
+        set_brick (current_brick, (tree - brick_address (current_brick)));
+    }
+    else
+    {
+        dprintf (3, ("b- %Ix->-1", current_brick));
+        set_brick (current_brick, -1);
+    }
+    size_t  b = 1 + current_brick;
+    ptrdiff_t  offset = 0;
+    size_t last_br = brick_of (plug_end-1);
+    current_brick = brick_of (x-1);
+    dprintf (3, ("ubt: %Ix->%Ix]->%Ix]", b, last_br, current_brick));
+    while (b <= current_brick)
+    {
+        if (b <= last_br)
+        {
+            set_brick (b, --offset);
+        }
+        else
+        {
+            set_brick (b,-1);
+        }
+        b++;
+    }
+    return brick_of (x);
+}
+
+void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, uint8_t* next_plug_to_allocate)
+{
+#ifdef BIT64
+    // We should never demote big plugs to gen0.
+    if (gen == youngest_generation)
+    {
+        heap_segment* seg = ephemeral_heap_segment;
+        size_t mark_stack_large_bos = mark_stack_bos;
+        size_t large_plug_pos = 0;
+        while (mark_stack_large_bos < mark_stack_tos)
+        {
+            if (mark_stack_array[mark_stack_large_bos].len > demotion_plug_len_th)
+            {
+                while (mark_stack_bos <= mark_stack_large_bos)
+                {
+                    size_t entry = deque_pinned_plug();
+                    size_t len = pinned_len (pinned_plug_of (entry));
+                    uint8_t* plug = pinned_plug (pinned_plug_of(entry));
+                    if (len > demotion_plug_len_th)
+                    {
+                        dprintf (2, ("ps(%d): S %Ix (%Id)(%Ix)", gen->gen_num, plug, len, (plug+len)));
+                    }
+                    pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (consing_gen);
+                    assert(mark_stack_array[entry].len == 0 ||
+                            mark_stack_array[entry].len >= Align(min_obj_size));
+                    generation_allocation_pointer (consing_gen) = plug + len;
+                    generation_allocation_limit (consing_gen) = heap_segment_plan_allocated (seg);
+                    set_allocator_next_pin (consing_gen);
+                }
+            }
+
+            mark_stack_large_bos++;
+        }
+    }
+#endif // BIT64
+
+    generation_plan_allocation_start (gen) =
+        allocate_in_condemned_generations (consing_gen, Align (min_obj_size), -1);
+    generation_plan_allocation_start_size (gen) = Align (min_obj_size);
+    size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
+    if (next_plug_to_allocate)
+    {
+        size_t dist_to_next_plug = (size_t)(next_plug_to_allocate - generation_allocation_pointer (consing_gen));
+        if (allocation_left > dist_to_next_plug)
+        {
+            allocation_left = dist_to_next_plug;
+        }
+    }
+    if (allocation_left < Align (min_obj_size))
+    {
+        generation_plan_allocation_start_size (gen) += allocation_left;
+        generation_allocation_pointer (consing_gen) += allocation_left;
+    }
+
+    dprintf (1, ("plan alloc gen%d(%Ix) start at %Ix (ptr: %Ix, limit: %Ix, next: %Ix)", gen->gen_num, 
+        generation_plan_allocation_start (gen),
+        generation_plan_allocation_start_size (gen),
+        generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen),
+        next_plug_to_allocate));
+}
+
+void gc_heap::realloc_plan_generation_start (generation* gen, generation* consing_gen)
+{
+    BOOL adjacentp = FALSE;
+
+    generation_plan_allocation_start (gen) =  
+        allocate_in_expanded_heap (consing_gen, Align(min_obj_size), adjacentp, 0, 
+#ifdef SHORT_PLUGS
+                                   FALSE, NULL, 
+#endif //SHORT_PLUGS
+                                   FALSE, -1 REQD_ALIGN_AND_OFFSET_ARG);
+
+    generation_plan_allocation_start_size (gen) = Align (min_obj_size);
+    size_t allocation_left = (size_t)(generation_allocation_limit (consing_gen) - generation_allocation_pointer (consing_gen));
+    if ((allocation_left < Align (min_obj_size)) && 
+         (generation_allocation_limit (consing_gen)!=heap_segment_plan_allocated (generation_allocation_segment (consing_gen))))
+    {
+        generation_plan_allocation_start_size (gen) += allocation_left;
+        generation_allocation_pointer (consing_gen) += allocation_left;
+    }
+
+    dprintf (1, ("plan re-alloc gen%d start at %Ix (ptr: %Ix, limit: %Ix)", gen->gen_num, 
+        generation_plan_allocation_start (consing_gen),
+        generation_allocation_pointer (consing_gen), 
+        generation_allocation_limit (consing_gen))); 
+}
+
+void gc_heap::plan_generation_starts (generation*& consing_gen)
+{
+    //make sure that every generation has a planned allocation start
+    int  gen_number = settings.condemned_generation;
+    while (gen_number >= 0)
+    {
+        if (gen_number < max_generation)
+        {
+            consing_gen = ensure_ephemeral_heap_segment (consing_gen);
+        }
+        generation* gen = generation_of (gen_number);
+        if (0 == generation_plan_allocation_start (gen))
+        {
+            plan_generation_start (gen, consing_gen, 0);
+            assert (generation_plan_allocation_start (gen));
+        }
+        gen_number--;
+    }
+    // now we know the planned allocation size
+    heap_segment_plan_allocated (ephemeral_heap_segment) =
+        generation_allocation_pointer (consing_gen);
+}
+
+void gc_heap::advance_pins_for_demotion (generation* gen)
+{
+    uint8_t* original_youngest_start = generation_allocation_start (youngest_generation);
+    heap_segment* seg = ephemeral_heap_segment;
+
+    if ((!(pinned_plug_que_empty_p())))
+    {
+        size_t gen1_pinned_promoted = generation_pinned_allocation_compact_size (generation_of (max_generation));
+        size_t gen1_pins_left = dd_pinned_survived_size (dynamic_data_of (max_generation - 1)) - gen1_pinned_promoted;
+        size_t total_space_to_skip = last_gen1_pin_end - generation_allocation_pointer (gen);
+        float pin_frag_ratio = (float)gen1_pins_left / (float)total_space_to_skip;
+        float pin_surv_ratio = (float)gen1_pins_left / (float)(dd_survived_size (dynamic_data_of (max_generation - 1)));
+        if ((pin_frag_ratio > 0.15) && (pin_surv_ratio > 0.30))
+        {
+            while (!pinned_plug_que_empty_p() &&
+                    (pinned_plug (oldest_pin()) < original_youngest_start))
+            {
+                size_t entry = deque_pinned_plug();
+                size_t len = pinned_len (pinned_plug_of (entry));
+                uint8_t* plug = pinned_plug (pinned_plug_of(entry));
+                pinned_len (pinned_plug_of (entry)) = plug - generation_allocation_pointer (gen);
+                assert(mark_stack_array[entry].len == 0 ||
+                        mark_stack_array[entry].len >= Align(min_obj_size));
+                generation_allocation_pointer (gen) = plug + len;
+                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                set_allocator_next_pin (gen);
+
+                //Add the size of the pinned plug to the right pinned allocations
+                //find out which gen this pinned plug came from 
+                int frgn = object_gennum (plug);
+                if ((frgn != (int)max_generation) && settings.promotion)
+                {
+                    int togn = object_gennum_plan (plug);
+                    generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len;
+                    if (frgn < togn)
+                    {
+                        generation_pinned_allocation_compact_size (generation_of (togn)) += len;
+                    }
+                }
+
+                dprintf (2, ("skipping gap %d, pin %Ix (%Id)", 
+                    pinned_len (pinned_plug_of (entry)), plug, len));
+            }
+        }
+        dprintf (2, ("ad_p_d: PL: %Id, SL: %Id, pfr: %d, psr: %d", 
+            gen1_pins_left, total_space_to_skip, (int)(pin_frag_ratio*100), (int)(pin_surv_ratio*100)));
+    }
+}
+
+void gc_heap::process_ephemeral_boundaries (uint8_t* x,
+                                            int& active_new_gen_number,
+                                            int& active_old_gen_number,
+                                            generation*& consing_gen,
+                                            BOOL& allocate_in_condemned)
+{
+retry:
+    if ((active_old_gen_number > 0) &&
+        (x >= generation_allocation_start (generation_of (active_old_gen_number - 1))))
+    {
+        dprintf (1, ("crossing gen%d, x is %Ix", active_old_gen_number - 1, x));
+
+        if (!pinned_plug_que_empty_p())
+        {
+            dprintf (1, ("oldest pin: %Ix(%Id)",
+                pinned_plug (oldest_pin()), 
+                (x - pinned_plug (oldest_pin()))));
+        }
+
+        if (active_old_gen_number <= (settings.promotion ? (max_generation - 1) : max_generation))
+        {
+            active_new_gen_number--;
+        }
+
+        active_old_gen_number--;
+        assert ((!settings.promotion) || (active_new_gen_number>0));
+
+        if (active_new_gen_number == (max_generation - 1))
+        {
+#ifdef FREE_USAGE_STATS
+            if (settings.condemned_generation == max_generation)
+            {
+                // We need to do this before we skip the rest of the pinned plugs.
+                generation* gen_2 = generation_of (max_generation);
+                generation* gen_1 = generation_of (max_generation - 1);
+
+                size_t total_num_pinned_free_spaces_left = 0;
+
+                // We are about to allocate gen1, check to see how efficient fitting in gen2 pinned free spaces is.
+                for (int j = 0; j < NUM_GEN_POWER2; j++)
+                {
+                    dprintf (1, ("[h%d][#%Id]2^%d: current: %Id, S: 2: %Id, 1: %Id(%Id)", 
+                        heap_number, 
+                        settings.gc_index,
+                        (j + 10), 
+                        gen_2->gen_current_pinned_free_spaces[j],
+                        gen_2->gen_plugs[j], gen_1->gen_plugs[j],
+                        (gen_2->gen_plugs[j] + gen_1->gen_plugs[j])));
+
+                    total_num_pinned_free_spaces_left += gen_2->gen_current_pinned_free_spaces[j];
+                }
+
+                float pinned_free_list_efficiency = 0;
+                size_t total_pinned_free_space = generation_allocated_in_pinned_free (gen_2) + generation_pinned_free_obj_space (gen_2);
+                if (total_pinned_free_space != 0)
+                {
+                    pinned_free_list_efficiency = (float)(generation_allocated_in_pinned_free (gen_2)) / (float)total_pinned_free_space;
+                }
+
+                dprintf (1, ("[h%d] gen2 allocated %Id bytes with %Id bytes pinned free spaces (effi: %d%%), %Id (%Id) left",
+                            heap_number,
+                            generation_allocated_in_pinned_free (gen_2),
+                            total_pinned_free_space, 
+                            (int)(pinned_free_list_efficiency * 100),
+                            generation_pinned_free_obj_space (gen_2),
+                            total_num_pinned_free_spaces_left));
+            }
+#endif //FREE_USAGE_STATS
+
+            //Go past all of the pinned plugs for this generation.
+            while (!pinned_plug_que_empty_p() &&
+                   (!in_range_for_segment ((pinned_plug (oldest_pin())), ephemeral_heap_segment)))
+            {
+                size_t  entry = deque_pinned_plug();
+                mark*  m = pinned_plug_of (entry);
+                uint8_t*  plug = pinned_plug (m);
+                size_t  len = pinned_len (m);
+                // detect pinned block in different segment (later) than
+                // allocation segment, skip those until the oldest pin is in the ephemeral seg.
+                // adjust the allocation segment along the way (at the end it will
+                // be the ephemeral segment.
+                heap_segment* nseg = heap_segment_in_range (generation_allocation_segment (consing_gen));
+
+                PREFIX_ASSUME(nseg != NULL);
+
+                while (!((plug >= generation_allocation_pointer (consing_gen))&&
+                        (plug < heap_segment_allocated (nseg))))
+                {
+                    //adjust the end of the segment to be the end of the plug
+                    assert (generation_allocation_pointer (consing_gen)>=
+                            heap_segment_mem (nseg));
+                    assert (generation_allocation_pointer (consing_gen)<=
+                            heap_segment_committed (nseg));
+
+                    heap_segment_plan_allocated (nseg) =
+                        generation_allocation_pointer (consing_gen);
+                    //switch allocation segment
+                    nseg = heap_segment_next_rw (nseg);
+                    generation_allocation_segment (consing_gen) = nseg;
+                    //reset the allocation pointer and limits
+                    generation_allocation_pointer (consing_gen) =
+                        heap_segment_mem (nseg);
+                }
+                set_new_pin_info (m, generation_allocation_pointer (consing_gen));
+                assert(pinned_len(m) == 0 || pinned_len(m) >= Align(min_obj_size));
+                generation_allocation_pointer (consing_gen) = plug + len;
+                generation_allocation_limit (consing_gen) =
+                    generation_allocation_pointer (consing_gen);
+            }
+            allocate_in_condemned = TRUE;
+            consing_gen = ensure_ephemeral_heap_segment (consing_gen);
+        }
+
+        if (active_new_gen_number != max_generation)
+        {
+            if (active_new_gen_number == (max_generation - 1))
+            {
+                maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
+                if (!demote_gen1_p)
+                    advance_pins_for_demotion (consing_gen);
+            }
+
+            plan_generation_start (generation_of (active_new_gen_number), consing_gen, x);
+                
+            dprintf (1, ("process eph: allocated gen%d start at %Ix", 
+                active_new_gen_number,
+                generation_plan_allocation_start (generation_of (active_new_gen_number))));
+
+            if ((demotion_low == MAX_PTR) && !pinned_plug_que_empty_p())
+            {
+                uint8_t* pplug = pinned_plug (oldest_pin());
+                if (object_gennum (pplug) > 0)
+                {
+                    demotion_low = pplug;
+                    dprintf (3, ("process eph: dlow->%Ix", demotion_low));
+                }
+            }
+
+            assert (generation_plan_allocation_start (generation_of (active_new_gen_number)));
+        }
+
+        goto retry;
+    }
+}
+
+inline
+void gc_heap::seg_clear_mark_bits (heap_segment* seg)
+{
+    uint8_t* o = heap_segment_mem (seg);
+    while (o < heap_segment_allocated (seg))
+    {
+        if (marked (o))
+        {
+            clear_marked (o);
+        }
+        o = o  + Align (size (o));
+    }
+}
+
+#ifdef FEATURE_BASICFREEZE
+void gc_heap::sweep_ro_segments (heap_segment* start_seg)
+{
+    //go through all of the segment in range and reset the mark bit
+    //TODO works only on small object segments
+
+    heap_segment* seg = start_seg;
+
+    while (seg)
+    {
+        if (heap_segment_read_only_p (seg) &&
+            heap_segment_in_range_p (seg))
+        {
+#ifdef BACKGROUND_GC
+            if (settings.concurrent)
+            {
+                seg_clear_mark_array_bits_soh (seg);
+            }
+            else
+            {
+                seg_clear_mark_bits (seg);
+            }
+#else //BACKGROUND_GC
+
+#ifdef MARK_ARRAY
+            if(gc_can_use_concurrent)
+            {
+                clear_mark_array (max (heap_segment_mem (seg), lowest_address),
+                              min (heap_segment_allocated (seg), highest_address),
+                              FALSE); // read_only segments need the mark clear
+            }
+#else //MARK_ARRAY
+            seg_clear_mark_bits (seg);
+#endif //MARK_ARRAY
+
+#endif //BACKGROUND_GC
+        }
+        seg = heap_segment_next (seg);
+    }
+}
+#endif // FEATURE_BASICFREEZE
+
+#ifdef FEATURE_LOH_COMPACTION
+inline
+BOOL gc_heap::loh_pinned_plug_que_empty_p()
+{
+    return (loh_pinned_queue_bos == loh_pinned_queue_tos);
+}
+
+void gc_heap::loh_set_allocator_next_pin()
+{
+    if (!(loh_pinned_plug_que_empty_p()))
+    {
+        mark*  oldest_entry = loh_oldest_pin();
+        uint8_t* plug = pinned_plug (oldest_entry);
+        generation* gen = large_object_generation;
+        if ((plug >= generation_allocation_pointer (gen)) &&
+            (plug <  generation_allocation_limit (gen)))
+        {
+            generation_allocation_limit (gen) = pinned_plug (oldest_entry);
+        }
+        else
+            assert (!((plug < generation_allocation_pointer (gen)) &&
+                      (plug >= heap_segment_mem (generation_allocation_segment (gen)))));
+    }
+}
+
+size_t gc_heap::loh_deque_pinned_plug ()
+{
+    size_t m = loh_pinned_queue_bos;
+    loh_pinned_queue_bos++;
+    return m;
+}
+
+inline
+mark* gc_heap::loh_pinned_plug_of (size_t bos)
+{
+    return &loh_pinned_queue[bos];
+}
+
+inline
+mark* gc_heap::loh_oldest_pin()
+{
+    return loh_pinned_plug_of (loh_pinned_queue_bos);
+}
+
+// If we can't grow the queue, then don't compact.
+BOOL gc_heap::loh_enque_pinned_plug (uint8_t* plug, size_t len)
+{
+    assert(len >= Align(min_obj_size, get_alignment_constant (FALSE)));
+
+    if (loh_pinned_queue_length <= loh_pinned_queue_tos)
+    {
+        if (!grow_mark_stack (loh_pinned_queue, loh_pinned_queue_length, LOH_PIN_QUEUE_LENGTH))
+        {
+            return FALSE;
+        }
+    }
+    dprintf (3, (" P: %Ix(%Id)", plug, len));
+    mark& m = loh_pinned_queue[loh_pinned_queue_tos];
+    m.first = plug;
+    m.len = len;
+    loh_pinned_queue_tos++;
+    loh_set_allocator_next_pin();
+    return TRUE;
+}
+
+inline
+BOOL gc_heap::loh_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit)
+{
+    dprintf (1235, ("trying to fit %Id(%Id) between %Ix and %Ix (%Id)", 
+        size, 
+        (2* AlignQword (loh_padding_obj_size) +  size),
+        alloc_pointer,
+        alloc_limit,
+        (alloc_limit - alloc_pointer)));
+
+    return ((alloc_pointer + 2* AlignQword (loh_padding_obj_size) +  size) <= alloc_limit);
+}
+
+uint8_t* gc_heap::loh_allocate_in_condemned (uint8_t* old_loc, size_t size)
+{
+    UNREFERENCED_PARAMETER(old_loc);
+
+    generation* gen = large_object_generation;
+    dprintf (1235, ("E: p:%Ix, l:%Ix, s: %Id", 
+        generation_allocation_pointer (gen),
+        generation_allocation_limit (gen),
+        size));
+
+retry:
+    {
+        heap_segment* seg = generation_allocation_segment (gen);
+        if (!(loh_size_fit_p (size, generation_allocation_pointer (gen), generation_allocation_limit (gen))))
+        {
+            if ((!(loh_pinned_plug_que_empty_p()) &&
+                 (generation_allocation_limit (gen) ==
+                  pinned_plug (loh_oldest_pin()))))
+            {
+                mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
+                size_t len = pinned_len (m);
+                uint8_t* plug = pinned_plug (m);
+                dprintf (1235, ("AIC: %Ix->%Ix(%Id)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen)));
+                pinned_len (m) = plug - generation_allocation_pointer (gen);
+                generation_allocation_pointer (gen) = plug + len;
+                
+                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                loh_set_allocator_next_pin();
+                dprintf (1235, ("s: p: %Ix, l: %Ix (%Id)", 
+                    generation_allocation_pointer (gen), 
+                    generation_allocation_limit (gen),
+                    (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+
+                goto retry;
+            }
+
+            if (generation_allocation_limit (gen) != heap_segment_plan_allocated (seg))
+            {
+                generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                dprintf (1235, ("l->pa(%Ix)", generation_allocation_limit (gen)));
+            }
+            else
+            {
+                if (heap_segment_plan_allocated (seg) != heap_segment_committed (seg))
+                {
+                    heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
+                    generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+                    dprintf (1235, ("l->c(%Ix)", generation_allocation_limit (gen)));
+                }
+                else
+                {
+                    if (loh_size_fit_p (size, generation_allocation_pointer (gen), heap_segment_reserved (seg)) &&
+                        (grow_heap_segment (seg, (generation_allocation_pointer (gen) + size + 2* AlignQword (loh_padding_obj_size)))))
+                    {
+                        dprintf (1235, ("growing seg from %Ix to %Ix\n", heap_segment_committed (seg),
+                                         (generation_allocation_pointer (gen) + size)));
+
+                        heap_segment_plan_allocated (seg) = heap_segment_committed (seg);
+                        generation_allocation_limit (gen) = heap_segment_plan_allocated (seg);
+
+                        dprintf (1235, ("g: p: %Ix, l: %Ix (%Id)", 
+                            generation_allocation_pointer (gen), 
+                            generation_allocation_limit (gen),
+                            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+                    }
+                    else
+                    {
+                        heap_segment* next_seg = heap_segment_next (seg);
+                        assert (generation_allocation_pointer (gen)>=
+                                heap_segment_mem (seg));
+                        // Verify that all pinned plugs for this segment are consumed
+                        if (!loh_pinned_plug_que_empty_p() &&
+                            ((pinned_plug (loh_oldest_pin()) <
+                              heap_segment_allocated (seg)) &&
+                             (pinned_plug (loh_oldest_pin()) >=
+                              generation_allocation_pointer (gen))))
+                        {
+                            LOG((LF_GC, LL_INFO10, "remaining pinned plug %Ix while leaving segment on allocation",
+                                         pinned_plug (loh_oldest_pin())));
+                            dprintf (1236, ("queue empty: %d", loh_pinned_plug_que_empty_p()));
+                            FATAL_GC_ERROR();
+                        }
+                        assert (generation_allocation_pointer (gen)>=
+                                heap_segment_mem (seg));
+                        assert (generation_allocation_pointer (gen)<=
+                                heap_segment_committed (seg));
+                        heap_segment_plan_allocated (seg) = generation_allocation_pointer (gen);
+
+                        if (next_seg)
+                        {
+                            // for LOH do we want to try starting from the first LOH every time though?
+                            generation_allocation_segment (gen) = next_seg;
+                            generation_allocation_pointer (gen) = heap_segment_mem (next_seg);
+                            generation_allocation_limit (gen) = generation_allocation_pointer (gen);
+
+                            dprintf (1235, ("n: p: %Ix, l: %Ix (%Id)", 
+                                generation_allocation_pointer (gen), 
+                                generation_allocation_limit (gen),
+                                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+                        }
+                        else
+                        {
+                            dprintf (1, ("We ran out of space compacting, shouldn't happen"));
+                            FATAL_GC_ERROR();
+                        }
+                    }
+                }
+            }
+            loh_set_allocator_next_pin();
+
+            dprintf (1235, ("r: p: %Ix, l: %Ix (%Id)", 
+                generation_allocation_pointer (gen), 
+                generation_allocation_limit (gen),
+                (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+
+            goto retry;
+        }
+    }
+
+    {
+        assert (generation_allocation_pointer (gen)>=
+                heap_segment_mem (generation_allocation_segment (gen)));
+        uint8_t* result = generation_allocation_pointer (gen);
+        size_t loh_pad = AlignQword (loh_padding_obj_size);
+
+        generation_allocation_pointer (gen) += size + loh_pad;
+        assert (generation_allocation_pointer (gen) <= generation_allocation_limit (gen));
+
+        dprintf (1235, ("p: %Ix, l: %Ix (%Id)", 
+            generation_allocation_pointer (gen), 
+            generation_allocation_limit (gen),
+            (generation_allocation_limit (gen) - generation_allocation_pointer (gen))));
+
+        assert (result + loh_pad);
+        return result + loh_pad;
+    }
+}
+
+BOOL gc_heap::should_compact_loh()
+{
+    return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
+}
+
+inline
+void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p)
+{
+    if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once))
+    {
+        if (all_heaps_compacted_p)
+        {
+            // If the compaction mode says to compact once and we are going to compact LOH, 
+            // we need to revert it back to no compaction.
+            loh_compaction_mode = loh_compaction_default;
+        }
+    }
+}
+
+BOOL gc_heap::plan_loh()
+{
+    if (!loh_pinned_queue)
+    {
+        loh_pinned_queue = new (nothrow) (mark [LOH_PIN_QUEUE_LENGTH]);
+        if (!loh_pinned_queue)
+        {
+            dprintf (1, ("Cannot allocate the LOH pinned queue (%Id bytes), no compaction", 
+                         LOH_PIN_QUEUE_LENGTH * sizeof (mark)));
+            return FALSE;
+        }
+
+        loh_pinned_queue_length = LOH_PIN_QUEUE_LENGTH;
+    }
+
+    if (heap_number == 0)
+        loh_pinned_queue_decay = LOH_PIN_DECAY;
+
+    loh_pinned_queue_tos = 0;
+    loh_pinned_queue_bos = 0;
+    
+    generation* gen        = large_object_generation;
+    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
+    PREFIX_ASSUME(start_seg != NULL);
+    heap_segment* seg      = start_seg;
+    uint8_t* o             = generation_allocation_start (gen);
+
+    dprintf (1235, ("before GC LOH size: %Id, free list: %Id, free obj: %Id\n", 
+        generation_size (max_generation + 1), 
+        generation_free_list_space (gen),
+        generation_free_obj_space (gen)));
+
+    while (seg)
+    {
+        heap_segment_plan_allocated (seg) = heap_segment_mem (seg);
+        seg = heap_segment_next (seg);
+    }
+
+    seg = start_seg;
+
+    //Skip the generation gap object
+    o = o + AlignQword (size (o));
+    // We don't need to ever realloc gen3 start so don't touch it.
+    heap_segment_plan_allocated (seg) = o;
+    generation_allocation_pointer (gen) = o;
+    generation_allocation_limit (gen) = generation_allocation_pointer (gen);
+    generation_allocation_segment (gen) = start_seg;
+
+    uint8_t* free_space_start = o;
+    uint8_t* free_space_end = o;
+    uint8_t* new_address = 0;
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            seg = heap_segment_next (seg);
+            if (seg == 0)
+            {
+                break;
+            }
+
+            o = heap_segment_mem (seg);
+        }
+
+        if (marked (o))
+        {
+            free_space_end = o;
+            size_t size = AlignQword (size (o));
+            dprintf (1235, ("%Ix(%Id) M", o, size));
+
+            if (pinned (o))
+            {
+                // We don't clear the pinned bit yet so we can check in 
+                // compact phase how big a free object we should allocate
+                // in front of the pinned object. We use the reloc address
+                // field to store this.
+                if (!loh_enque_pinned_plug (o, size))
+                {
+                    return FALSE;
+                }
+                new_address = o;
+            }
+            else
+            {
+                new_address = loh_allocate_in_condemned (o, size);
+            }
+
+            loh_set_node_relocation_distance (o, (new_address - o));
+            dprintf (1235, ("lobj %Ix-%Ix -> %Ix-%Ix (%Id)", o, (o + size), new_address, (new_address + size), (new_address - o)));
+
+            o = o + size;
+            free_space_start = o;
+            if (o < heap_segment_allocated (seg))
+            {
+                assert (!marked (o));
+            }
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !marked (o))
+            {
+                dprintf (1235, ("%Ix(%Id) F (%d)", o, AlignQword (size (o)), ((method_table (o) == g_pFreeObjectMethodTable) ? 1 : 0)));
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+
+    while (!loh_pinned_plug_que_empty_p())
+    {
+        mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
+        size_t len = pinned_len (m);
+        uint8_t* plug = pinned_plug (m);
+
+        // detect pinned block in different segment (later) than
+        // allocation segment
+        heap_segment* nseg = heap_segment_rw (generation_allocation_segment (gen));
+
+        while ((plug < generation_allocation_pointer (gen)) ||
+               (plug >= heap_segment_allocated (nseg)))
+        {
+            assert ((plug < heap_segment_mem (nseg)) ||
+                    (plug > heap_segment_reserved (nseg)));
+            //adjust the end of the segment to be the end of the plug
+            assert (generation_allocation_pointer (gen)>=
+                    heap_segment_mem (nseg));
+            assert (generation_allocation_pointer (gen)<=
+                    heap_segment_committed (nseg));
+
+            heap_segment_plan_allocated (nseg) =
+                generation_allocation_pointer (gen);
+            //switch allocation segment
+            nseg = heap_segment_next_rw (nseg);
+            generation_allocation_segment (gen) = nseg;
+            //reset the allocation pointer and limits
+            generation_allocation_pointer (gen) =
+                heap_segment_mem (nseg);
+        }
+
+        dprintf (1235, ("SP: %Ix->%Ix(%Id)", generation_allocation_pointer (gen), plug, plug - generation_allocation_pointer (gen)));
+        pinned_len (m) = plug - generation_allocation_pointer (gen);
+        generation_allocation_pointer (gen) = plug + len;
+    }
+
+    heap_segment_plan_allocated (generation_allocation_segment (gen)) = generation_allocation_pointer (gen);
+    generation_allocation_pointer (gen) = 0;
+    generation_allocation_limit (gen) = 0;
+
+    return TRUE;
+}
+
+void gc_heap::compact_loh()
+{
+    assert (should_compact_loh());
+
+    generation* gen        = large_object_generation;
+    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
+    PREFIX_ASSUME(start_seg != NULL);
+    heap_segment* seg      = start_seg;
+    heap_segment* prev_seg = 0;
+    uint8_t* o             = generation_allocation_start (gen);
+
+    //Skip the generation gap object
+    o = o + AlignQword (size (o));
+    // We don't need to ever realloc gen3 start so don't touch it.
+    uint8_t* free_space_start = o;
+    uint8_t* free_space_end = o;
+    generation_allocator (gen)->clear();
+    generation_free_list_space (gen) = 0;
+    generation_free_obj_space (gen) = 0;
+
+    loh_pinned_queue_bos = 0;
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            heap_segment* next_seg = heap_segment_next (seg);
+
+            if ((heap_segment_plan_allocated (seg) == heap_segment_mem (seg)) &&
+                (seg != start_seg) && !heap_segment_read_only_p (seg))
+            {
+                dprintf (3, ("Preparing empty large segment %Ix", (size_t)seg));
+                assert (prev_seg);
+                heap_segment_next (prev_seg) = next_seg;
+                heap_segment_next (seg) = freeable_large_heap_segment;
+                freeable_large_heap_segment = seg;
+            }
+            else
+            {
+                if (!heap_segment_read_only_p (seg))
+                {
+                    // We grew the segment to accommondate allocations.
+                    if (heap_segment_plan_allocated (seg) > heap_segment_allocated (seg))
+                    {
+                        if ((heap_segment_plan_allocated (seg) - plug_skew)  > heap_segment_used (seg))
+                        {
+                            heap_segment_used (seg) = heap_segment_plan_allocated (seg) - plug_skew;
+                        }
+                    }
+
+                    heap_segment_allocated (seg) = heap_segment_plan_allocated (seg);
+                    dprintf (3, ("Trimming seg to %Ix[", heap_segment_allocated (seg)));
+                    decommit_heap_segment_pages (seg, 0);
+                    dprintf (1236, ("CLOH: seg: %Ix, alloc: %Ix, used: %Ix, committed: %Ix",
+                        seg, 
+                        heap_segment_allocated (seg),
+                        heap_segment_used (seg),
+                        heap_segment_committed (seg)));
+                    //heap_segment_used (seg) = heap_segment_allocated (seg) - plug_skew;
+                    dprintf (1236, ("CLOH: used is set to %Ix", heap_segment_used (seg)));
+                }
+                prev_seg = seg;
+            }
+
+            seg = next_seg;
+            if (seg == 0)
+                break;
+            else
+            {
+                o = heap_segment_mem (seg);
+            }
+        }
+
+        if (marked (o))
+        {
+            free_space_end = o;
+            size_t size = AlignQword (size (o));
+
+            size_t loh_pad;
+            uint8_t* reloc = o;
+            clear_marked (o);
+
+            if (pinned (o))
+            {
+                // We are relying on the fact the pinned objects are always looked at in the same order 
+                // in plan phase and in compact phase.
+                mark* m = loh_pinned_plug_of (loh_deque_pinned_plug());
+                uint8_t* plug = pinned_plug (m);
+                assert (plug == o);
+
+                loh_pad = pinned_len (m);
+                clear_pinned (o);
+            }
+            else
+            {
+                loh_pad = AlignQword (loh_padding_obj_size);
+
+                reloc += loh_node_relocation_distance (o);
+                gcmemcopy (reloc, o, size, TRUE);
+            }
+
+            thread_gap ((reloc - loh_pad), loh_pad, gen);
+
+            o = o + size;
+            free_space_start = o;
+            if (o < heap_segment_allocated (seg))
+            {
+                assert (!marked (o));
+            }
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !marked (o))
+            {
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+
+    assert (loh_pinned_plug_que_empty_p());
+
+    dprintf (1235, ("after GC LOH size: %Id, free list: %Id, free obj: %Id\n\n", 
+        generation_size (max_generation + 1), 
+        generation_free_list_space (gen),
+        generation_free_obj_space (gen)));
+}
+
+void gc_heap::relocate_in_loh_compact()
+{
+    generation* gen        = large_object_generation;
+    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));
+    uint8_t* o             = generation_allocation_start (gen);
+
+    //Skip the generation gap object
+    o = o + AlignQword (size (o));
+
+    relocate_args args;
+    args.low = gc_low;
+    args.high = gc_high;
+    args.last_plug = 0;
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            seg = heap_segment_next (seg);
+            if (seg == 0)
+            {
+                break;
+            }
+
+            o = heap_segment_mem (seg);
+        }
+
+        if (marked (o))
+        {
+            size_t size = AlignQword (size (o));
+
+            check_class_object_demotion (o);
+            if (contain_pointers (o))
+            {
+                go_through_object_nostart (method_table (o), o, size(o), pval,
+                {
+                    reloc_survivor_helper (pval);
+                });
+            }
+
+            o = o + size;
+            if (o < heap_segment_allocated (seg))
+            {
+                assert (!marked (o));
+            }
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !marked (o))
+            {
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+
+    dprintf (1235, ("after GC LOH size: %Id, free list: %Id, free obj: %Id\n\n", 
+        generation_size (max_generation + 1), 
+        generation_free_list_space (gen),
+        generation_free_obj_space (gen)));
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void gc_heap::walk_relocation_loh (size_t profiling_context)
+{
+    generation* gen        = large_object_generation;
+    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));
+    uint8_t* o             = generation_allocation_start (gen);
+
+    //Skip the generation gap object
+    o = o + AlignQword (size (o));
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            seg = heap_segment_next (seg);
+            if (seg == 0)
+            {
+                break;
+            }
+
+            o = heap_segment_mem (seg);
+        }
+
+        if (marked (o))
+        {
+            size_t size = AlignQword (size (o));
+
+            ptrdiff_t reloc = loh_node_relocation_distance (o);
+
+            STRESS_LOG_PLUG_MOVE(o, (o + size), -reloc);
+
+            {
+                ETW::GCLog::MovedReference(
+                                    o,
+                                   (o + size),
+                                   reloc,
+                                   profiling_context,
+                                   settings.compaction);
+            }
+
+            o = o + size;
+            if (o < heap_segment_allocated (seg))
+            {
+                assert (!marked (o));
+            }
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !marked (o))
+            {
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+BOOL gc_heap::loh_object_p (uint8_t* o)
+{
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = gc_heap::g_heaps [0];
+    int brick_entry = hp->brick_table[hp->brick_of (o)];
+#else //MULTIPLE_HEAPS
+    int brick_entry = brick_table[brick_of (o)];
+#endif //MULTIPLE_HEAPS
+
+    return (brick_entry == 0);
+}
+#endif //FEATURE_LOH_COMPACTION
+
+void gc_heap::convert_to_pinned_plug (BOOL& last_npinned_plug_p, 
+                                      BOOL& last_pinned_plug_p, 
+                                      BOOL& pinned_plug_p,
+                                      size_t ps,
+                                      size_t& artificial_pinned_size)
+{
+    last_npinned_plug_p = FALSE;
+    last_pinned_plug_p = TRUE;
+    pinned_plug_p = TRUE;
+    artificial_pinned_size = ps;
+}
+
+// Because we have the artifical pinning, we can't gaurantee that pinned and npinned
+// plugs are always interleaved.
+void gc_heap::store_plug_gap_info (uint8_t* plug_start,
+                                   uint8_t* plug_end,
+                                   BOOL& last_npinned_plug_p, 
+                                   BOOL& last_pinned_plug_p, 
+                                   uint8_t*& last_pinned_plug,
+                                   BOOL& pinned_plug_p,
+                                   uint8_t* last_object_in_last_plug,
+                                   BOOL& merge_with_last_pin_p,
+                                   // this is only for verification purpose
+                                   size_t last_plug_len)
+{
+    UNREFERENCED_PARAMETER(last_plug_len);
+
+    if (!last_npinned_plug_p && !last_pinned_plug_p)
+    {
+        //dprintf (3, ("last full plug end: %Ix, full plug start: %Ix", plug_end, plug_start));
+        dprintf (3, ("Free: %Ix", (plug_start - plug_end)));
+        assert ((plug_start == plug_end) || ((size_t)(plug_start - plug_end) >= Align (min_obj_size)));
+        set_gap_size (plug_start, plug_start - plug_end);
+    }
+
+    if (pinned (plug_start))
+    {
+        BOOL save_pre_plug_info_p = FALSE;
+
+        if (last_npinned_plug_p || last_pinned_plug_p)
+        {
+            //if (last_plug_len == Align (min_obj_size))
+            //{
+            //    dprintf (3, ("debugging only - last npinned plug is min, check to see if it's correct"));
+            //    GCToOSInterface::DebugBreak();
+            //}
+            save_pre_plug_info_p = TRUE;
+        }
+
+        pinned_plug_p = TRUE;
+        last_npinned_plug_p = FALSE;
+
+        if (last_pinned_plug_p)
+        {
+            dprintf (3, ("last plug %Ix was also pinned, should merge", last_pinned_plug));
+            merge_with_last_pin_p = TRUE;
+        }
+        else
+        {
+            last_pinned_plug_p = TRUE;
+            last_pinned_plug = plug_start;
+                
+            enque_pinned_plug (last_pinned_plug, save_pre_plug_info_p, last_object_in_last_plug);
+
+            if (save_pre_plug_info_p)
+            {
+                set_gap_size (plug_start, sizeof (gap_reloc_pair));
+            }
+        }
+    }
+    else
+    {
+        if (last_pinned_plug_p)
+        {
+            //if (Align (last_plug_len) < min_pre_pin_obj_size)
+            //{
+            //    dprintf (3, ("debugging only - last pinned plug is min, check to see if it's correct"));
+            //    GCToOSInterface::DebugBreak();
+            //}
+
+            save_post_plug_info (last_pinned_plug, last_object_in_last_plug, plug_start);
+            set_gap_size (plug_start, sizeof (gap_reloc_pair));
+
+            verify_pins_with_post_plug_info("after saving post plug info");
+        }
+        last_npinned_plug_p = TRUE;
+        last_pinned_plug_p = FALSE;
+    }
+}
+
+void gc_heap::record_interesting_data_point (interesting_data_point idp)
+{
+#ifdef GC_CONFIG_DRIVEN
+    (interesting_data_per_gc[idp])++;
+#else
+    UNREFERENCED_PARAMETER(idp);
+#endif //GC_CONFIG_DRIVEN
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif //_PREFAST_
+void gc_heap::plan_phase (int condemned_gen_number)
+{
+    size_t old_gen2_allocated = 0;
+    size_t old_gen2_size = 0;
+
+    if (condemned_gen_number == (max_generation - 1))
+    {
+        old_gen2_allocated = generation_free_list_allocated (generation_of (max_generation));
+        old_gen2_size = generation_size (max_generation);
+    }
+
+    assert (settings.concurrent == FALSE);
+
+    // %type%  category = quote (plan);
+#ifdef TIME_GC
+    unsigned start;
+    unsigned finish;
+    start = GetCycleCount32();
+#endif //TIME_GC
+
+    dprintf (2,("---- Plan Phase ---- Condemned generation %d, promotion: %d",
+                condemned_gen_number, settings.promotion ? 1 : 0));
+
+    generation*  condemned_gen1 = generation_of (condemned_gen_number);
+
+#ifdef MARK_LIST
+    BOOL use_mark_list = FALSE;
+    uint8_t** mark_list_next = &mark_list[0];
+#ifdef GC_CONFIG_DRIVEN
+    dprintf (3, ("total number of marked objects: %Id (%Id)",
+                 (mark_list_index - &mark_list[0]), ((mark_list_end - &mark_list[0]))));
+#else
+    dprintf (3, ("mark_list length: %Id",
+                 (mark_list_index - &mark_list[0])));
+#endif //GC_CONFIG_DRIVEN
+
+    if ((condemned_gen_number < max_generation) &&
+        (mark_list_index <= mark_list_end) 
+#ifdef BACKGROUND_GC        
+        && (!recursive_gc_sync::background_running_p())
+#endif //BACKGROUND_GC
+        )
+    {
+#ifndef MULTIPLE_HEAPS
+        _sort (&mark_list[0], mark_list_index-1, 0);
+        //printf ("using mark list at GC #%d", dd_collection_count (dynamic_data_of (0)));
+        //verify_qsort_array (&mark_list[0], mark_list_index-1);
+#endif //!MULTIPLE_HEAPS
+        use_mark_list = TRUE;
+        get_gc_data_per_heap()->set_mechanism_bit (gc_mark_list_bit);
+    }
+    else
+    {
+        dprintf (3, ("mark_list not used"));
+    }
+
+#endif //MARK_LIST
+
+#ifdef FEATURE_BASICFREEZE
+    if ((generation_start_segment (condemned_gen1) != ephemeral_heap_segment) &&
+        ro_segments_in_range)
+    {
+        sweep_ro_segments (generation_start_segment (condemned_gen1));
+    }
+#endif // FEATURE_BASICFREEZE
+
+#ifndef MULTIPLE_HEAPS
+    if (shigh != (uint8_t*)0)
+    {
+        heap_segment* seg = heap_segment_rw (generation_start_segment (condemned_gen1));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        heap_segment* fseg = seg;
+        do
+        {
+            if (slow > heap_segment_mem (seg) &&
+                slow < heap_segment_reserved (seg))
+            {
+                if (seg == fseg)
+                {
+                    uint8_t* o = generation_allocation_start (condemned_gen1) +
+                        Align (size (generation_allocation_start (condemned_gen1)));
+                    if (slow > o)
+                    {
+                        assert ((slow - o) >= (int)Align (min_obj_size));
+#ifdef BACKGROUND_GC
+                        if (current_c_gc_state == c_gc_state_marking)
+                        {
+                            bgc_clear_batch_mark_array_bits (o, slow);
+                        }
+#endif //BACKGROUND_GC
+                        make_unused_array (o, slow - o);
+                    }
+                } 
+                else
+                {
+                    assert (condemned_gen_number == max_generation);
+                    make_unused_array (heap_segment_mem (seg),
+                                       slow - heap_segment_mem (seg));
+                }
+            }
+            if (in_range_for_segment (shigh, seg))
+            {
+#ifdef BACKGROUND_GC
+                if (current_c_gc_state == c_gc_state_marking)
+                {
+                    bgc_clear_batch_mark_array_bits ((shigh + Align (size (shigh))), heap_segment_allocated (seg));
+                }
+#endif //BACKGROUND_GC
+                heap_segment_allocated (seg) = shigh + Align (size (shigh));
+            }
+            // test if the segment is in the range of [slow, shigh]
+            if (!((heap_segment_reserved (seg) >= slow) &&
+                  (heap_segment_mem (seg) <= shigh)))
+            {
+                // shorten it to minimum
+                heap_segment_allocated (seg) =  heap_segment_mem (seg);
+            }
+            seg = heap_segment_next_rw (seg);
+        } while (seg);
+    }
+    else
+    {
+        heap_segment* seg = heap_segment_rw (generation_start_segment (condemned_gen1));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        heap_segment* sseg = seg;
+        do
+        {
+            // shorten it to minimum
+            if (seg == sseg)
+            {
+                // no survivors make all generations look empty
+                uint8_t* o = generation_allocation_start (condemned_gen1) +
+                    Align (size (generation_allocation_start (condemned_gen1)));
+#ifdef BACKGROUND_GC
+                if (current_c_gc_state == c_gc_state_marking)
+                {
+                    bgc_clear_batch_mark_array_bits (o, heap_segment_allocated (seg));
+                }
+#endif //BACKGROUND_GC
+                heap_segment_allocated (seg) = o;
+            }
+            else
+            {
+                assert (condemned_gen_number == max_generation);
+#ifdef BACKGROUND_GC
+                if (current_c_gc_state == c_gc_state_marking)
+                {
+                    bgc_clear_batch_mark_array_bits (heap_segment_mem (seg), heap_segment_allocated (seg));
+                }
+#endif //BACKGROUND_GC
+                heap_segment_allocated (seg) =  heap_segment_mem (seg);
+            }
+            seg = heap_segment_next_rw (seg);
+        } while (seg);
+    }
+
+#endif //MULTIPLE_HEAPS
+
+    heap_segment*  seg1 = heap_segment_rw (generation_start_segment (condemned_gen1));
+
+    PREFIX_ASSUME(seg1 != NULL);
+
+    uint8_t*  end = heap_segment_allocated (seg1);
+    uint8_t*  first_condemned_address = generation_allocation_start (condemned_gen1);
+    uint8_t*  x = first_condemned_address;
+
+    assert (!marked (x));
+    uint8_t*  plug_end = x;
+    uint8_t*  tree = 0;
+    size_t  sequence_number = 0;
+    uint8_t*  last_node = 0;
+    size_t  current_brick = brick_of (x);
+    BOOL  allocate_in_condemned = ((condemned_gen_number == max_generation)||
+                                   (settings.promotion == FALSE));
+    int  active_old_gen_number = condemned_gen_number;
+    int  active_new_gen_number = (allocate_in_condemned ? condemned_gen_number:
+                                  (1 + condemned_gen_number));
+    generation*  older_gen = 0;
+    generation* consing_gen = condemned_gen1;
+    alloc_list  r_free_list [MAX_BUCKET_COUNT];
+
+    size_t r_free_list_space = 0;
+    size_t r_free_obj_space = 0;
+    size_t r_older_gen_free_list_allocated = 0;
+    size_t r_older_gen_condemned_allocated = 0;
+    size_t r_older_gen_end_seg_allocated = 0;
+    uint8_t*  r_allocation_pointer = 0;
+    uint8_t*  r_allocation_limit = 0;
+    uint8_t* r_allocation_start_region = 0;
+    heap_segment*  r_allocation_segment = 0;
+#ifdef FREE_USAGE_STATS
+    size_t r_older_gen_free_space[NUM_GEN_POWER2];
+#endif //FREE_USAGE_STATS
+
+    if ((condemned_gen_number < max_generation))
+    {
+        older_gen = generation_of (min (max_generation, 1 + condemned_gen_number));
+        generation_allocator (older_gen)->copy_to_alloc_list (r_free_list);
+
+        r_free_list_space = generation_free_list_space (older_gen);
+        r_free_obj_space = generation_free_obj_space (older_gen);
+#ifdef FREE_USAGE_STATS
+        memcpy (r_older_gen_free_space, older_gen->gen_free_spaces, sizeof (r_older_gen_free_space));
+#endif //FREE_USAGE_STATS
+        generation_allocate_end_seg_p (older_gen) = FALSE;
+        r_older_gen_free_list_allocated = generation_free_list_allocated (older_gen);
+        r_older_gen_condemned_allocated = generation_condemned_allocated (older_gen);
+        r_older_gen_end_seg_allocated = generation_end_seg_allocated (older_gen);
+        r_allocation_limit = generation_allocation_limit (older_gen);
+        r_allocation_pointer = generation_allocation_pointer (older_gen);
+        r_allocation_start_region = generation_allocation_context_start_region (older_gen);
+        r_allocation_segment = generation_allocation_segment (older_gen);
+        heap_segment* start_seg = heap_segment_rw (generation_start_segment (older_gen));
+
+        PREFIX_ASSUME(start_seg != NULL);
+
+        if (start_seg != ephemeral_heap_segment)
+        {
+            assert (condemned_gen_number == (max_generation - 1));
+            while (start_seg && (start_seg != ephemeral_heap_segment))
+            {
+                assert (heap_segment_allocated (start_seg) >=
+                        heap_segment_mem (start_seg));
+                assert (heap_segment_allocated (start_seg) <=
+                        heap_segment_reserved (start_seg));
+                heap_segment_plan_allocated (start_seg) =
+                    heap_segment_allocated (start_seg);
+                start_seg = heap_segment_next_rw (start_seg);
+            }
+        }
+    }
+
+    //reset all of the segment allocated sizes
+    {
+        heap_segment*  seg2 = heap_segment_rw (generation_start_segment (condemned_gen1));
+
+        PREFIX_ASSUME(seg2 != NULL);
+
+        while (seg2)
+        {
+            heap_segment_plan_allocated (seg2) =
+                heap_segment_mem (seg2);
+            seg2 = heap_segment_next_rw (seg2);
+        }
+    }
+    int  condemned_gn = condemned_gen_number;
+
+    int bottom_gen = 0;
+    init_free_and_plug();
+
+    while (condemned_gn >= bottom_gen)
+    {
+        generation*  condemned_gen2 = generation_of (condemned_gn);
+        generation_allocator (condemned_gen2)->clear();
+        generation_free_list_space (condemned_gen2) = 0;
+        generation_free_obj_space (condemned_gen2) = 0;
+        generation_allocation_size (condemned_gen2) = 0;
+        generation_condemned_allocated (condemned_gen2) = 0; 
+        generation_pinned_allocated (condemned_gen2) = 0; 
+        generation_free_list_allocated(condemned_gen2) = 0; 
+        generation_end_seg_allocated (condemned_gen2) = 0; 
+        generation_pinned_allocation_sweep_size (condemned_gen2) = 0;
+        generation_pinned_allocation_compact_size (condemned_gen2) = 0;
+#ifdef FREE_USAGE_STATS
+        generation_pinned_free_obj_space (condemned_gen2) = 0;
+        generation_allocated_in_pinned_free (condemned_gen2) = 0;
+        generation_allocated_since_last_pin (condemned_gen2) = 0;
+#endif //FREE_USAGE_STATS
+        generation_plan_allocation_start (condemned_gen2) = 0;
+        generation_allocation_segment (condemned_gen2) =
+            heap_segment_rw (generation_start_segment (condemned_gen2));
+
+        PREFIX_ASSUME(generation_allocation_segment(condemned_gen2) != NULL);
+
+        if (generation_start_segment (condemned_gen2) != ephemeral_heap_segment)
+        {
+            generation_allocation_pointer (condemned_gen2) =
+                heap_segment_mem (generation_allocation_segment (condemned_gen2));
+        }
+        else
+        {
+            generation_allocation_pointer (condemned_gen2) = generation_allocation_start (condemned_gen2);
+        }
+
+        generation_allocation_limit (condemned_gen2) = generation_allocation_pointer (condemned_gen2);
+        generation_allocation_context_start_region (condemned_gen2) = generation_allocation_pointer (condemned_gen2);
+
+        condemned_gn--;
+    }
+
+    BOOL allocate_first_generation_start = FALSE;
+    
+    if (allocate_in_condemned)
+    {
+        allocate_first_generation_start = TRUE;
+    }
+
+    dprintf(3,( " From %Ix to %Ix", (size_t)x, (size_t)end));
+
+    demotion_low = MAX_PTR;
+    demotion_high = heap_segment_allocated (ephemeral_heap_segment);
+
+    // If we are doing a gen1 only because of cards, it means we should not demote any pinned plugs
+    // from gen1. They should get promoted to gen2.
+    demote_gen1_p = !(settings.promotion && 
+                      (settings.condemned_generation == (max_generation - 1)) && 
+                      gen_to_condemn_reasons.is_only_condition (gen_low_card_p));
+
+    total_ephemeral_size = 0;
+
+    print_free_and_plug ("BP");
+
+    for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
+    {
+        generation* temp_gen = generation_of (gen_idx);
+
+        dprintf (2, ("gen%d start %Ix, plan start %Ix",
+            gen_idx, 
+            generation_allocation_start (temp_gen),
+            generation_plan_allocation_start (temp_gen)));
+    }
+
+    BOOL fire_pinned_plug_events_p = ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, PinPlugAtGCTime);
+    size_t last_plug_len = 0;
+
+    while (1)
+    {
+        if (x >= end)
+        {
+            assert (x == end);
+            assert (heap_segment_allocated (seg1) == end);
+            heap_segment_allocated (seg1) = plug_end;
+
+            current_brick = update_brick_table (tree, current_brick, x, plug_end);
+            dprintf (3, ("end of seg: new tree, sequence# 0"));
+            sequence_number = 0;
+            tree = 0;
+
+            if (heap_segment_next_rw (seg1))
+            {
+                seg1 = heap_segment_next_rw (seg1);
+                end = heap_segment_allocated (seg1);
+                plug_end = x = heap_segment_mem (seg1);
+                current_brick = brick_of (x);
+                dprintf(3,( " From %Ix to %Ix", (size_t)x, (size_t)end));
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        BOOL last_npinned_plug_p = FALSE;
+        BOOL last_pinned_plug_p = FALSE;
+
+        // last_pinned_plug is the beginning of the last pinned plug. If we merge a plug into a pinned
+        // plug we do not change the value of last_pinned_plug. This happens with artificially pinned plugs -
+        // it can be merged with a previous pinned plug and a pinned plug after it can be merged with it.
+        uint8_t* last_pinned_plug = 0;
+        size_t num_pinned_plugs_in_plug = 0;
+
+        uint8_t* last_object_in_plug = 0;
+
+        while ((x < end) && marked (x))
+        {
+            uint8_t*  plug_start = x;
+            uint8_t*  saved_plug_end = plug_end;
+            BOOL   pinned_plug_p = FALSE;
+            BOOL   npin_before_pin_p = FALSE;
+            BOOL   saved_last_npinned_plug_p = last_npinned_plug_p;
+            uint8_t*  saved_last_object_in_plug = last_object_in_plug;
+            BOOL   merge_with_last_pin_p = FALSE;
+
+            size_t added_pinning_size = 0;
+            size_t artificial_pinned_size = 0;
+
+            store_plug_gap_info (plug_start, plug_end, last_npinned_plug_p, last_pinned_plug_p, 
+                                 last_pinned_plug, pinned_plug_p, last_object_in_plug, 
+                                 merge_with_last_pin_p, last_plug_len);
+
+#ifdef FEATURE_STRUCTALIGN
+            int requiredAlignment = ((CObjectHeader*)plug_start)->GetRequiredAlignment();
+            size_t alignmentOffset = OBJECT_ALIGNMENT_OFFSET;
+#endif // FEATURE_STRUCTALIGN
+
+            {
+                uint8_t* xl = x;
+                while ((xl < end) && marked (xl) && (pinned (xl) == pinned_plug_p))
+                {
+                    assert (xl < end);
+                    if (pinned(xl))
+                    {
+                        clear_pinned (xl);
+                    }
+#ifdef FEATURE_STRUCTALIGN
+                    else
+                    {
+                        int obj_requiredAlignment = ((CObjectHeader*)xl)->GetRequiredAlignment();
+                        if (obj_requiredAlignment > requiredAlignment)
+                        {
+                            requiredAlignment = obj_requiredAlignment;
+                            alignmentOffset = xl - plug_start + OBJECT_ALIGNMENT_OFFSET;
+                        }
+                    }
+#endif // FEATURE_STRUCTALIGN
+
+                    clear_marked (xl);
+
+                    dprintf(4, ("+%Ix+", (size_t)xl));
+                    assert ((size (xl) > 0));
+                    assert ((size (xl) <= LARGE_OBJECT_SIZE));
+
+                    last_object_in_plug = xl;
+
+                    xl = xl + Align (size (xl));
+                    Prefetch (xl);
+                }
+
+                BOOL next_object_marked_p = ((xl < end) && marked (xl));
+
+                if (pinned_plug_p)
+                {
+                    // If it is pinned we need to extend to the next marked object as we can't use part of
+                    // a pinned object to make the artificial gap (unless the last 3 ptr sized words are all
+                    // references but for now I am just using the next non pinned object for that).
+                    if (next_object_marked_p) 
+                    {
+                        clear_marked (xl);
+                        last_object_in_plug = xl;
+                        size_t extra_size = Align (size (xl));
+                        xl = xl + extra_size;
+                        added_pinning_size = extra_size;
+                    }
+                }
+                else
+                {
+                    if (next_object_marked_p)
+                        npin_before_pin_p = TRUE;
+                }
+
+                assert (xl <= end);
+                x = xl;
+            }
+            dprintf (3, ( "%Ix[", (size_t)x));
+            plug_end = x;
+            size_t ps = plug_end - plug_start;
+            last_plug_len = ps;
+            dprintf (3, ( "%Ix[(%Ix)", (size_t)x, ps));
+            uint8_t*  new_address = 0;
+
+            if (!pinned_plug_p)
+            {
+                if (allocate_in_condemned &&
+                    (settings.condemned_generation == max_generation) &&
+                    (ps > (OS_PAGE_SIZE)))
+                {
+                    ptrdiff_t reloc = plug_start - generation_allocation_pointer (consing_gen);
+                    //reloc should >=0 except when we relocate
+                    //across segments and the dest seg is higher then the src
+
+                    if ((ps > (8*OS_PAGE_SIZE)) &&
+                        (reloc > 0) &&
+                        ((size_t)reloc < (ps/16)))
+                    {
+                        dprintf (3, ("Pinning %Ix; reloc would have been: %Ix",
+                                     (size_t)plug_start, reloc));
+                        // The last plug couldn't have been a npinned plug or it would have
+                        // included this plug.
+                        assert (!saved_last_npinned_plug_p);
+
+                        if (last_pinned_plug)
+                        {
+                            dprintf (3, ("artificially pinned plug merged with last pinned plug"));
+                            merge_with_last_pin_p = TRUE;
+                        }
+                        else
+                        {
+                            enque_pinned_plug (plug_start, FALSE, 0);
+                            last_pinned_plug = plug_start;
+                        }
+
+                        convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p,
+                                                ps, artificial_pinned_size);
+                    }
+                }
+            }
+
+            if (allocate_first_generation_start)
+            {
+                allocate_first_generation_start = FALSE;
+                plan_generation_start (condemned_gen1, consing_gen, plug_start);
+                assert (generation_plan_allocation_start (condemned_gen1));
+            }
+
+            if (seg1 == ephemeral_heap_segment)
+            {
+                process_ephemeral_boundaries (plug_start, active_new_gen_number,
+                                              active_old_gen_number,
+                                              consing_gen,
+                                              allocate_in_condemned);
+            }
+
+            dprintf (3, ("adding %Id to gen%d surv", ps, active_old_gen_number));
+
+            dynamic_data* dd_active_old = dynamic_data_of (active_old_gen_number);
+            dd_survived_size (dd_active_old) += ps;
+
+            BOOL convert_to_pinned_p = FALSE;
+
+            if (!pinned_plug_p)
+            {
+#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
+                dd_num_npinned_plugs (dd_active_old)++;
+#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
+
+                add_gen_plug (active_old_gen_number, ps);
+
+                if (allocate_in_condemned)
+                {
+                    verify_pins_with_post_plug_info("before aic");
+
+                    new_address =
+                        allocate_in_condemned_generations (consing_gen,
+                                                           ps,
+                                                           active_old_gen_number,
+#ifdef SHORT_PLUGS
+                                                           &convert_to_pinned_p,
+                                                           (npin_before_pin_p ? plug_end : 0),
+                                                           seg1,
+#endif //SHORT_PLUGS
+                                                           plug_start REQD_ALIGN_AND_OFFSET_ARG);
+                    verify_pins_with_post_plug_info("after aic");
+                }
+                else
+                {
+                    new_address = allocate_in_older_generation (older_gen, ps, active_old_gen_number, plug_start REQD_ALIGN_AND_OFFSET_ARG);
+
+                    if (new_address != 0)
+                    {
+                        if (settings.condemned_generation == (max_generation - 1))
+                        {
+                            dprintf (3, (" NA: %Ix-%Ix -> %Ix, %Ix (%Ix)",
+                                plug_start, plug_end,
+                                (size_t)new_address, (size_t)new_address + (plug_end - plug_start),
+                                (size_t)(plug_end - plug_start)));
+                        }
+                    }
+                    else
+                    {
+                        allocate_in_condemned = TRUE;
+
+                        new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, 
+#ifdef SHORT_PLUGS
+                                                                         &convert_to_pinned_p,
+                                                                         (npin_before_pin_p ? plug_end : 0),
+                                                                         seg1,
+#endif //SHORT_PLUGS
+                                                                         plug_start REQD_ALIGN_AND_OFFSET_ARG);
+                    }
+                }
+
+                if (convert_to_pinned_p)
+                {
+                    assert (last_npinned_plug_p != FALSE);
+                    assert (last_pinned_plug_p == FALSE);
+                    convert_to_pinned_plug (last_npinned_plug_p, last_pinned_plug_p, pinned_plug_p,
+                                            ps, artificial_pinned_size);
+                    enque_pinned_plug (plug_start, FALSE, 0);
+                    last_pinned_plug = plug_start;
+                }
+                else
+                {
+                    if (!new_address)
+                    {
+                        //verify that we are at then end of the ephemeral segment
+                        assert (generation_allocation_segment (consing_gen) ==
+                                ephemeral_heap_segment);
+                        //verify that we are near the end
+                        assert ((generation_allocation_pointer (consing_gen) + Align (ps)) <
+                                heap_segment_allocated (ephemeral_heap_segment));
+                        assert ((generation_allocation_pointer (consing_gen) + Align (ps)) >
+                                (heap_segment_allocated (ephemeral_heap_segment) + Align (min_obj_size)));
+                    }
+                    else
+                    {
+#ifdef SIMPLE_DPRINTF
+                        dprintf (3, ("(%Ix)[%Ix->%Ix, NA: [%Ix(%Id), %Ix[: %Ix(%d)",
+                            (size_t)(node_gap_size (plug_start)), 
+                            plug_start, plug_end, (size_t)new_address, (size_t)(plug_start - new_address),
+                                (size_t)new_address + ps, ps, 
+                                (is_plug_padded (plug_start) ? 1 : 0)));
+#endif //SIMPLE_DPRINTF
+
+#ifdef SHORT_PLUGS
+                        if (is_plug_padded (plug_start))
+                        {
+                            dprintf (3, ("%Ix was padded", plug_start));
+                            dd_padding_size (dd_active_old) += Align (min_obj_size);
+                        }
+#endif //SHORT_PLUGS
+                    }
+                }
+            }
+
+            if (pinned_plug_p)
+            {
+                if (fire_pinned_plug_events_p)
+                    FireEtwPinPlugAtGCTime(plug_start, plug_end, 
+                                           (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start)),
+                                           GetClrInstanceId());
+
+                if (merge_with_last_pin_p)
+                {
+                    merge_with_last_pinned_plug (last_pinned_plug, ps);
+                }
+                else
+                {
+                    assert (last_pinned_plug == plug_start);
+                    set_pinned_info (plug_start, ps, consing_gen);
+                }
+
+                new_address = plug_start;
+
+                dprintf (3, ( "(%Ix)PP: [%Ix, %Ix[%Ix](m:%d)",
+                            (size_t)(node_gap_size (plug_start)), (size_t)plug_start,
+                            (size_t)plug_end, ps,
+                            (merge_with_last_pin_p ? 1 : 0)));
+
+                dprintf (3, ("adding %Id to gen%d pinned surv", plug_end - plug_start, active_old_gen_number));
+                dd_pinned_survived_size (dd_active_old) += plug_end - plug_start;
+                dd_added_pinned_size (dd_active_old) += added_pinning_size;
+                dd_artificial_pinned_survived_size (dd_active_old) += artificial_pinned_size;
+
+                if (!demote_gen1_p && (active_old_gen_number == (max_generation - 1)))
+                {
+                    last_gen1_pin_end = plug_end;
+                }
+            }
+
+#ifdef _DEBUG
+            // detect forward allocation in the same segment
+            assert (!((new_address > plug_start) &&
+                (new_address < heap_segment_reserved (seg1))));
+#endif //_DEBUG
+
+            if (!merge_with_last_pin_p)
+            {
+                if (current_brick != brick_of (plug_start))
+                {
+                    current_brick = update_brick_table (tree, current_brick, plug_start, saved_plug_end);
+                    sequence_number = 0;
+                    tree = 0;
+                }
+
+                set_node_relocation_distance (plug_start, (new_address - plug_start));
+                if (last_node && (node_relocation_distance (last_node) ==
+                                  (node_relocation_distance (plug_start) +
+                                   (int)node_gap_size (plug_start))))
+                {
+                    //dprintf(3,( " Lb"));
+                    dprintf (3, ("%Ix Lb", plug_start));
+                    set_node_left (plug_start);
+                }
+                if (0 == sequence_number)
+                {
+                    dprintf (2, ("sn: 0, tree is set to %Ix", plug_start));
+                    tree = plug_start;
+                }
+
+                verify_pins_with_post_plug_info("before insert node");
+
+                tree = insert_node (plug_start, ++sequence_number, tree, last_node);
+                dprintf (3, ("tree is %Ix (b: %Ix) after insert_node", tree, brick_of (tree)));
+                last_node = plug_start;
+
+#ifdef _DEBUG
+                // If we detect if the last plug is pinned plug right before us, we should save this gap info
+                if (!pinned_plug_p)
+                {
+                    if (mark_stack_tos > 0)
+                    {
+                        mark& m = mark_stack_array[mark_stack_tos - 1];
+                        if (m.has_post_plug_info())
+                        {
+                            uint8_t* post_plug_info_start = m.saved_post_plug_info_start;
+                            size_t* current_plug_gap_start = (size_t*)(plug_start - sizeof (plug_and_gap));
+                            if ((uint8_t*)current_plug_gap_start == post_plug_info_start)
+                            {
+                                dprintf (3, ("Ginfo: %Ix, %Ix, %Ix",
+                                    *current_plug_gap_start, *(current_plug_gap_start + 1),
+                                    *(current_plug_gap_start + 2)));
+                                memcpy (&(m.saved_post_plug_debug), current_plug_gap_start, sizeof (gap_reloc_pair));
+                            }
+                        }
+                    }
+                }
+#endif //_DEBUG
+
+                verify_pins_with_post_plug_info("after insert node");
+            }
+        }
+        
+        if (num_pinned_plugs_in_plug > 1)
+        {
+            dprintf (3, ("more than %Id pinned plugs in this plug", num_pinned_plugs_in_plug));
+        }
+
+        {
+#ifdef MARK_LIST
+            if (use_mark_list)
+            {
+               while ((mark_list_next < mark_list_index) &&
+                      (*mark_list_next <= x))
+               {
+                   mark_list_next++;
+               }
+               if ((mark_list_next < mark_list_index)
+#ifdef MULTIPLE_HEAPS
+                   && (*mark_list_next < end) //for multiple segments
+#endif //MULTIPLE_HEAPS
+                   )
+                   x = *mark_list_next;
+               else
+                   x = end;
+            }
+            else
+#endif //MARK_LIST
+            {
+                uint8_t* xl = x;
+#ifdef BACKGROUND_GC
+                if (current_c_gc_state == c_gc_state_marking)
+                {
+                    assert (recursive_gc_sync::background_running_p());
+                    while ((xl < end) && !marked (xl))
+                    {
+                        dprintf (4, ("-%Ix-", (size_t)xl));
+                        assert ((size (xl) > 0));
+                        background_object_marked (xl, TRUE);
+                        xl = xl + Align (size (xl));
+                        Prefetch (xl);
+                    }
+                }
+                else
+#endif //BACKGROUND_GC
+                {
+                    while ((xl < end) && !marked (xl))
+                    {
+                        dprintf (4, ("-%Ix-", (size_t)xl));
+                        assert ((size (xl) > 0));
+                        xl = xl + Align (size (xl));
+                        Prefetch (xl);
+                    }
+                }
+                assert (xl <= end);
+                x = xl;
+            }
+        }
+    }
+
+    while (!pinned_plug_que_empty_p())
+    {
+        if (settings.promotion)
+        {
+            uint8_t* pplug = pinned_plug (oldest_pin());
+            if (in_range_for_segment (pplug, ephemeral_heap_segment))
+            {
+                consing_gen = ensure_ephemeral_heap_segment (consing_gen);
+                //allocate all of the generation gaps
+                while (active_new_gen_number > 0)
+                {
+                    active_new_gen_number--;
+
+                    if (active_new_gen_number == (max_generation - 1))
+                    {
+                        maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
+                        if (!demote_gen1_p)
+                            advance_pins_for_demotion (consing_gen);
+                    }
+
+                    generation* gen = generation_of (active_new_gen_number);
+                    plan_generation_start (gen, consing_gen, 0);
+
+                    if (demotion_low == MAX_PTR)
+                    {
+                        demotion_low = pplug;
+                        dprintf (3, ("end plan: dlow->%Ix", demotion_low));
+                    }
+
+                    dprintf (2, ("(%d)gen%d plan start: %Ix", 
+                                  heap_number, active_new_gen_number, (size_t)generation_plan_allocation_start (gen)));
+                    assert (generation_plan_allocation_start (gen));
+                }
+            }
+        }
+
+        if (pinned_plug_que_empty_p())
+            break;
+
+        size_t  entry = deque_pinned_plug();
+        mark*  m = pinned_plug_of (entry);
+        uint8_t*  plug = pinned_plug (m);
+        size_t  len = pinned_len (m);
+
+        // detect pinned block in different segment (later) than
+        // allocation segment
+        heap_segment* nseg = heap_segment_rw (generation_allocation_segment (consing_gen));
+
+        while ((plug < generation_allocation_pointer (consing_gen)) ||
+               (plug >= heap_segment_allocated (nseg)))
+        {
+            assert ((plug < heap_segment_mem (nseg)) ||
+                    (plug > heap_segment_reserved (nseg)));
+            //adjust the end of the segment to be the end of the plug
+            assert (generation_allocation_pointer (consing_gen)>=
+                    heap_segment_mem (nseg));
+            assert (generation_allocation_pointer (consing_gen)<=
+                    heap_segment_committed (nseg));
+
+            heap_segment_plan_allocated (nseg) =
+                generation_allocation_pointer (consing_gen);
+            //switch allocation segment
+            nseg = heap_segment_next_rw (nseg);
+            generation_allocation_segment (consing_gen) = nseg;
+            //reset the allocation pointer and limits
+            generation_allocation_pointer (consing_gen) =
+                heap_segment_mem (nseg);
+        }
+
+        set_new_pin_info (m, generation_allocation_pointer (consing_gen));
+        dprintf (2, ("pin %Ix b: %Ix->%Ix", plug, brick_of (plug),
+            (size_t)(brick_table[brick_of (plug)])));
+
+        generation_allocation_pointer (consing_gen) = plug + len;
+        generation_allocation_limit (consing_gen) =
+            generation_allocation_pointer (consing_gen);
+        //Add the size of the pinned plug to the right pinned allocations
+        //find out which gen this pinned plug came from 
+        int frgn = object_gennum (plug);
+        if ((frgn != (int)max_generation) && settings.promotion)
+        {
+            generation_pinned_allocation_sweep_size ((generation_of (frgn +1))) += len;
+        }
+
+    }
+
+    plan_generation_starts (consing_gen);
+    print_free_and_plug ("AP");
+
+    {
+#ifdef SIMPLE_DPRINTF
+        for (int gen_idx = 0; gen_idx <= max_generation; gen_idx++)
+        {
+            generation* temp_gen = generation_of (gen_idx);
+            dynamic_data* temp_dd = dynamic_data_of (gen_idx);
+
+            int added_pinning_ratio = 0;
+            int artificial_pinned_ratio = 0;
+
+            if (dd_pinned_survived_size (temp_dd) != 0)
+            {
+                added_pinning_ratio = (int)((float)dd_added_pinned_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd));
+                artificial_pinned_ratio = (int)((float)dd_artificial_pinned_survived_size (temp_dd) * 100 / (float)dd_pinned_survived_size (temp_dd));
+            }
+
+            size_t padding_size = 
+#ifdef SHORT_PLUGS
+                dd_padding_size (temp_dd);
+#else
+                0;
+#endif //SHORT_PLUGS
+            dprintf (1, ("gen%d: %Ix, %Ix(%Id), NON PIN alloc: %Id, pin com: %Id, sweep: %Id, surv: %Id, pinsurv: %Id(%d%% added, %d%% art), np surv: %Id, pad: %Id",
+                gen_idx, 
+                generation_allocation_start (temp_gen),
+                generation_plan_allocation_start (temp_gen),
+                (size_t)(generation_plan_allocation_start (temp_gen) - generation_allocation_start (temp_gen)),
+                generation_allocation_size (temp_gen),
+                generation_pinned_allocation_compact_size (temp_gen),
+                generation_pinned_allocation_sweep_size (temp_gen),
+                dd_survived_size (temp_dd),
+                dd_pinned_survived_size (temp_dd),
+                added_pinning_ratio,
+                artificial_pinned_ratio,
+                (dd_survived_size (temp_dd) - dd_pinned_survived_size (temp_dd)),
+                padding_size));
+        }
+#endif //SIMPLE_DPRINTF
+    }
+
+    if (settings.condemned_generation == (max_generation - 1 ))
+    {
+        size_t plan_gen2_size = generation_plan_size (max_generation);
+        size_t growth = plan_gen2_size - old_gen2_size;
+
+        if (growth > 0)
+        {
+            dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, gen1 c alloc: %Id", 
+                growth, generation_end_seg_allocated (generation_of (max_generation)), 
+                generation_condemned_allocated (generation_of (max_generation - 1))));
+        }
+        else
+        {
+            dprintf (1, ("gen2 shrank %Id (end seg alloc: %Id, gen1 c alloc: %Id", 
+                (old_gen2_size - plan_gen2_size), generation_end_seg_allocated (generation_of (max_generation)), 
+                generation_condemned_allocated (generation_of (max_generation - 1))));
+        }
+
+        generation* older_gen = generation_of (settings.condemned_generation + 1);
+        size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space;
+        size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
+        size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated;
+        size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated;
+
+        dprintf (1, ("older gen's free alloc: %Id->%Id, seg alloc: %Id->%Id, condemned alloc: %Id->%Id",
+                    r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen),
+                    r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen), 
+                    r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen)));
+
+        dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected), %Id seg alloc and %Id condemned alloc, gen1 condemned alloc is %Id", 
+            free_list_allocated, rejected_free_space, end_seg_allocated,
+            condemned_allocated, generation_condemned_allocated (generation_of (settings.condemned_generation))));
+
+        maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info);
+        maxgen_size_info->free_list_allocated = free_list_allocated;
+        maxgen_size_info->free_list_rejected = rejected_free_space;
+        maxgen_size_info->end_seg_allocated = end_seg_allocated;
+        maxgen_size_info->condemned_allocated = condemned_allocated;
+        maxgen_size_info->pinned_allocated = maxgen_pinned_compact_before_advance;
+        maxgen_size_info->pinned_allocated_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)) - maxgen_pinned_compact_before_advance;
+
+#ifdef FREE_USAGE_STATS
+        int free_list_efficiency = 0;
+        if ((free_list_allocated + rejected_free_space) != 0)
+            free_list_efficiency = (int)(((float) (free_list_allocated) / (float)(free_list_allocated + rejected_free_space)) * (float)100);
+
+        int running_free_list_efficiency = (int)(generation_allocator_efficiency(older_gen)*100);
+
+        dprintf (1, ("gen%d free list alloc effi: %d%%, current effi: %d%%",
+                    older_gen->gen_num,
+                    free_list_efficiency, running_free_list_efficiency));
+
+        dprintf (1, ("gen2 free list change"));
+        for (int j = 0; j < NUM_GEN_POWER2; j++)
+        {
+            dprintf (1, ("[h%d][#%Id]: 2^%d: F: %Id->%Id(%Id), P: %Id", 
+                heap_number, 
+                settings.gc_index,
+                (j + 10), r_older_gen_free_space[j], older_gen->gen_free_spaces[j], 
+                (ptrdiff_t)(r_older_gen_free_space[j] - older_gen->gen_free_spaces[j]),
+                (generation_of(max_generation - 1))->gen_plugs[j]));
+        }
+#endif //FREE_USAGE_STATS
+    }
+
+    size_t fragmentation =
+        generation_fragmentation (generation_of (condemned_gen_number),
+                                  consing_gen,
+                                  heap_segment_allocated (ephemeral_heap_segment));
+
+    dprintf (2,("Fragmentation: %Id", fragmentation));
+    dprintf (2,("---- End of Plan phase ----"));
+
+#ifdef TIME_GC
+    finish = GetCycleCount32();
+    plan_time = finish - start;
+#endif //TIME_GC
+
+    // We may update write barrier code.  We assume here EE has been suspended if we are on a GC thread.
+    assert(GCHeap::IsGCInProgress());
+
+    BOOL should_expand = FALSE;
+    BOOL should_compact= FALSE;
+    ephemeral_promotion = FALSE;
+
+#ifdef BIT64
+    if ((!settings.concurrent) &&
+        ((condemned_gen_number < max_generation) && 
+         ((settings.gen0_reduction_count > 0) || (settings.entry_memory_load >= 95))))
+    {
+        dprintf (2, ("gen0 reduction count is %d, condemning %d, mem load %d",
+                     settings.gen0_reduction_count,
+                     condemned_gen_number,
+                     settings.entry_memory_load));
+        should_compact = TRUE;
+
+        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, 
+            ((settings.gen0_reduction_count > 0) ? compact_fragmented_gen0 : compact_high_mem_load));
+
+        if ((condemned_gen_number >= (max_generation - 1)) && 
+            dt_low_ephemeral_space_p (tuning_deciding_expansion))
+        {
+            dprintf (2, ("Not enough space for all ephemeral generations with compaction"));
+            should_expand = TRUE;
+        }
+    }
+    else
+    {
+#endif // BIT64
+        should_compact = decide_on_compacting (condemned_gen_number, fragmentation, should_expand);
+#ifdef BIT64
+    }
+#endif // BIT64
+
+#ifdef FEATURE_LOH_COMPACTION
+    loh_compacted_p = FALSE;
+#endif //FEATURE_LOH_COMPACTION
+
+    if (condemned_gen_number == max_generation)
+    {
+#ifdef FEATURE_LOH_COMPACTION
+        if (settings.loh_compaction)
+        {
+            if (plan_loh())
+            {
+                should_compact = TRUE;
+                get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_loh_forced);
+                loh_compacted_p = TRUE;
+            }
+        }
+        else
+        {
+            if ((heap_number == 0) && (loh_pinned_queue))
+            {
+                loh_pinned_queue_decay--;
+
+                if (!loh_pinned_queue_decay)
+                {
+                    delete loh_pinned_queue;
+                    loh_pinned_queue = 0;
+                }
+            }
+        }
+
+        if (!loh_compacted_p)
+#endif //FEATURE_LOH_COMPACTION
+        {
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+            if (ShouldTrackMovementForProfilerOrEtw())
+                notify_profiler_of_surviving_large_objects();
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+            sweep_large_objects();
+        }
+    }
+    else
+    {
+        settings.loh_compaction = FALSE;
+    }
+
+#ifdef MULTIPLE_HEAPS
+
+    new_heap_segment = NULL;
+
+    if (should_compact && should_expand)
+        gc_policy = policy_expand;
+    else if (should_compact)
+        gc_policy = policy_compact;
+    else
+        gc_policy = policy_sweep;
+
+    //vote for result of should_compact
+    dprintf (3, ("Joining for compaction decision"));
+    gc_t_join.join(this, gc_join_decide_on_compaction);
+    if (gc_t_join.joined())
+    {
+        //safe place to delete large heap segments
+        if (condemned_gen_number == max_generation)
+        {
+            for (int i = 0; i < n_heaps; i++)
+            {
+                g_heaps [i]->rearrange_large_heap_segments ();
+            }
+        }
+
+        settings.demotion = FALSE;
+        int pol_max = policy_sweep;
+#ifdef GC_CONFIG_DRIVEN
+        BOOL is_compaction_mandatory = FALSE;
+#endif //GC_CONFIG_DRIVEN
+
+        int i;
+        for (i = 0; i < n_heaps; i++)
+        {
+            if (pol_max < g_heaps[i]->gc_policy)
+                pol_max = policy_compact;
+            // set the demotion flag is any of the heap has demotion
+            if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low)
+            {
+                (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit);
+                settings.demotion = TRUE;
+            }
+
+#ifdef GC_CONFIG_DRIVEN
+            if (!is_compaction_mandatory)
+            {
+                int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact);
+                if (compact_reason >= 0)
+                {
+                    if (gc_heap_compact_reason_mandatory_p[compact_reason])
+                        is_compaction_mandatory = TRUE;
+                }
+            }
+#endif //GC_CONFIG_DRIVEN
+        }
+
+#ifdef GC_CONFIG_DRIVEN
+        if (!is_compaction_mandatory)
+        {
+            // If compaction is not mandatory we can feel free to change it to a sweeping GC.
+            // Note that we may want to change this to only checking every so often instead of every single GC.
+            if (should_do_sweeping_gc (pol_max >= policy_compact))
+            {
+                pol_max = policy_sweep;
+            }
+            else
+            {
+                if (pol_max == policy_sweep)
+                    pol_max = policy_compact;
+            }
+        }
+#endif //GC_CONFIG_DRIVEN
+
+        for (i = 0; i < n_heaps; i++)
+        {
+            if (pol_max > g_heaps[i]->gc_policy)
+                g_heaps[i]->gc_policy = pol_max;
+            //get the segment while we are serialized
+            if (g_heaps[i]->gc_policy == policy_expand)
+            {
+                g_heaps[i]->new_heap_segment =
+                     g_heaps[i]->soh_get_segment_to_expand();
+                if (!g_heaps[i]->new_heap_segment)
+                {
+                    set_expand_in_full_gc (condemned_gen_number);
+                    //we are out of memory, cancel the expansion
+                    g_heaps[i]->gc_policy = policy_compact;
+                }
+            }
+        }
+
+        BOOL is_full_compacting_gc = FALSE;
+
+        if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation))
+        {
+            full_gc_counts[gc_type_compacting]++;
+            is_full_compacting_gc = TRUE;
+        }
+
+        for (i = 0; i < n_heaps; i++)
+        {
+            //copy the card and brick tables
+            if (g_card_table!= g_heaps[i]->card_table)
+            {
+                g_heaps[i]->copy_brick_card_table();
+            }
+
+            if (is_full_compacting_gc)
+            {
+                g_heaps[i]->loh_alloc_since_cg = 0;
+            }
+        }
+
+        //start all threads on the roots.
+        dprintf(3, ("Starting all gc threads after compaction decision"));
+        gc_t_join.restart();
+    }
+
+    //reset the local variable accordingly
+    should_compact = (gc_policy >= policy_compact);
+    should_expand  = (gc_policy >= policy_expand);
+
+#else //MULTIPLE_HEAPS
+
+    //safe place to delete large heap segments
+    if (condemned_gen_number == max_generation)
+    {
+        rearrange_large_heap_segments ();
+    }
+
+    settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE);
+    if (settings.demotion)
+        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
+
+#ifdef GC_CONFIG_DRIVEN
+    BOOL is_compaction_mandatory = FALSE;
+    int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
+    if (compact_reason >= 0)
+        is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason];
+
+    if (!is_compaction_mandatory)
+    {
+        if (should_do_sweeping_gc (should_compact))
+            should_compact = FALSE;
+        else
+            should_compact = TRUE;
+    }
+#endif //GC_CONFIG_DRIVEN
+
+    if (should_compact && (condemned_gen_number == max_generation))
+    {
+        full_gc_counts[gc_type_compacting]++;
+        loh_alloc_since_cg = 0;
+    }
+#endif //MULTIPLE_HEAPS
+
+    if (should_compact)
+    {
+        dprintf (2,( "**** Doing Compacting GC ****"));
+
+        if (should_expand)
+        {
+#ifndef MULTIPLE_HEAPS
+            heap_segment* new_heap_segment = soh_get_segment_to_expand();
+#endif //!MULTIPLE_HEAPS
+            if (new_heap_segment)
+            {
+                consing_gen = expand_heap(condemned_gen_number,
+                                          consing_gen,
+                                          new_heap_segment);
+            }
+
+            // If we couldn't get a new segment, or we were able to 
+            // reserve one but no space to commit, we couldn't
+            // expand heap.
+            if (ephemeral_heap_segment != new_heap_segment)
+            {
+                set_expand_in_full_gc (condemned_gen_number);
+                should_expand = FALSE;
+            }
+        }
+        generation_allocation_limit (condemned_gen1) =
+            generation_allocation_pointer (condemned_gen1);
+        if ((condemned_gen_number < max_generation))
+        {
+            generation_allocator (older_gen)->commit_alloc_list_changes();
+
+            // Fix the allocation area of the older generation
+            fix_older_allocation_area (older_gen);
+        }
+        assert (generation_allocation_segment (consing_gen) ==
+                ephemeral_heap_segment);
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+        if (ShouldTrackMovementForProfilerOrEtw())
+        {
+            record_survived_for_profiler(condemned_gen_number, first_condemned_address);
+        }
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+        relocate_phase (condemned_gen_number, first_condemned_address);
+        compact_phase (condemned_gen_number, first_condemned_address,
+                       (!settings.demotion && settings.promotion));
+        fix_generation_bounds (condemned_gen_number, consing_gen);
+        assert (generation_allocation_limit (youngest_generation) ==
+                generation_allocation_pointer (youngest_generation));
+        if (condemned_gen_number >= (max_generation -1))
+        {
+#ifdef MULTIPLE_HEAPS
+            // this needs be serialized just because we have one
+            // segment_standby_list/seg_table for all heaps. We should make it at least
+            // so that when hoarding is not on we don't need this join because
+            // decommitting memory can take a long time.
+            //must serialize on deleting segments
+            gc_t_join.join(this, gc_join_rearrange_segs_compaction);
+            if (gc_t_join.joined())
+            {
+                for (int i = 0; i < n_heaps; i++)
+                {
+                    g_heaps[i]->rearrange_heap_segments(TRUE);
+                }
+                gc_t_join.restart();
+            }
+#else
+            rearrange_heap_segments(TRUE);
+#endif //MULTIPLE_HEAPS
+
+            if (should_expand)
+            {
+                //fix the start_segment for the ephemeral generations
+                for (int i = 0; i < max_generation; i++)
+                {
+                    generation* gen = generation_of (i);
+                    generation_start_segment (gen) = ephemeral_heap_segment;
+                    generation_allocation_segment (gen) = ephemeral_heap_segment;
+                }
+            }
+        }
+
+        {
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+            finalize_queue->UpdatePromotedGenerations (condemned_gen_number,
+                                                       (!settings.demotion && settings.promotion));
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+#ifdef MULTIPLE_HEAPS
+            dprintf(3, ("Joining after end of compaction"));
+            gc_t_join.join(this, gc_join_adjust_handle_age_compact);
+            if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+            {
+#ifdef MULTIPLE_HEAPS
+                //join all threads to make sure they are synchronized
+                dprintf(3, ("Restarting after Promotion granted"));
+                gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+            }
+
+            ScanContext sc;
+            sc.thread_number = heap_number;
+            sc.promotion = FALSE;
+            sc.concurrent = FALSE;
+            // new generations bounds are set can call this guy
+            if (settings.promotion && !settings.demotion)
+            {
+                dprintf (2, ("Promoting EE roots for gen %d",
+                             condemned_gen_number));
+                GCScan::GcPromotionsGranted(condemned_gen_number,
+                                                max_generation, &sc);
+            }
+            else if (settings.demotion)
+            {
+                dprintf (2, ("Demoting EE roots for gen %d",
+                             condemned_gen_number));
+                GCScan::GcDemote (condemned_gen_number, max_generation, &sc);
+            }
+        }
+
+        {
+            gen0_big_free_spaces = 0;
+
+            reset_pinned_queue_bos();
+            unsigned int  gen_number = min (max_generation, 1 + condemned_gen_number);
+            generation*  gen = generation_of (gen_number);
+            uint8_t*  low = generation_allocation_start (generation_of (gen_number-1));
+            uint8_t*  high =  heap_segment_allocated (ephemeral_heap_segment);
+            
+            while (!pinned_plug_que_empty_p())
+            {
+                mark*  m = pinned_plug_of (deque_pinned_plug());
+                size_t len = pinned_len (m);
+                uint8_t*  arr = (pinned_plug (m) - len);
+                dprintf(3,("free [%Ix %Ix[ pin",
+                            (size_t)arr, (size_t)arr + len));
+                if (len != 0)
+                {
+                    assert (len >= Align (min_obj_size));
+                    make_unused_array (arr, len);
+                    // fix fully contained bricks + first one
+                    // if the array goes beyong the first brick
+                    size_t start_brick = brick_of (arr);
+                    size_t end_brick = brick_of (arr + len);
+                    if (end_brick != start_brick)
+                    {
+                        dprintf (3,
+                                    ("Fixing bricks [%Ix, %Ix[ to point to unused array %Ix",
+                                    start_brick, end_brick, (size_t)arr));
+                        set_brick (start_brick,
+                                    arr - brick_address (start_brick));
+                        size_t brick = start_brick+1;
+                        while (brick < end_brick)
+                        {
+                            set_brick (brick, start_brick - brick);
+                            brick++;
+                        }
+                    }
+
+                    //when we take an old segment to make the new
+                    //ephemeral segment. we can have a bunch of
+                    //pinned plugs out of order going to the new ephemeral seg
+                    //and then the next plugs go back to max_generation
+                    if ((heap_segment_mem (ephemeral_heap_segment) <= arr) &&
+                        (heap_segment_reserved (ephemeral_heap_segment) > arr))
+                    {
+
+                        while ((low <= arr) && (high > arr))
+                        {
+                            gen_number--;
+                            assert ((gen_number >= 1) || (demotion_low != MAX_PTR) ||
+                                    settings.demotion || !settings.promotion);
+                            dprintf (3, ("new free list generation %d", gen_number));
+
+                            gen = generation_of (gen_number);
+                            if (gen_number >= 1)
+                                low = generation_allocation_start (generation_of (gen_number-1));
+                            else
+                                low = high;
+                        }
+                    }
+                    else
+                    {
+                        dprintf (3, ("new free list generation %d", max_generation));
+                        gen_number = max_generation;
+                        gen = generation_of (gen_number);
+                    }
+
+                    dprintf(3,("threading it into generation %d", gen_number));
+                    thread_gap (arr, len, gen);
+                    add_gen_free (gen_number, len);
+                }
+            }
+        }
+
+#ifdef _DEBUG
+        for (int x = 0; x <= max_generation; x++)
+        {
+            assert (generation_allocation_start (generation_of (x)));
+        }
+#endif //_DEBUG
+
+        if (!settings.demotion && settings.promotion)
+        {
+            //clear card for generation 1. generation 0 is empty
+            clear_card_for_addresses (
+                generation_allocation_start (generation_of (1)),
+                generation_allocation_start (generation_of (0)));
+        }
+        if (settings.promotion && !settings.demotion)
+        {
+            uint8_t* start = generation_allocation_start (youngest_generation);
+            MAYBE_UNUSED_VAR(start);
+            assert (heap_segment_allocated (ephemeral_heap_segment) ==
+                    (start + Align (size (start))));
+        }
+    }
+    else
+    {
+        //force promotion for sweep
+        settings.promotion = TRUE;
+        settings.compaction = FALSE;
+
+        ScanContext sc;
+        sc.thread_number = heap_number;
+        sc.promotion = FALSE;
+        sc.concurrent = FALSE;
+
+        dprintf (2, ("**** Doing Mark and Sweep GC****"));
+
+        if ((condemned_gen_number < max_generation))
+        {
+            generation_allocator (older_gen)->copy_from_alloc_list (r_free_list);
+            generation_free_list_space (older_gen) = r_free_list_space;
+            generation_free_obj_space (older_gen) = r_free_obj_space;
+            generation_free_list_allocated (older_gen) = r_older_gen_free_list_allocated;
+            generation_end_seg_allocated (older_gen) = r_older_gen_end_seg_allocated;
+            generation_condemned_allocated (older_gen) = r_older_gen_condemned_allocated;
+            generation_allocation_limit (older_gen) = r_allocation_limit;
+            generation_allocation_pointer (older_gen) = r_allocation_pointer;
+            generation_allocation_context_start_region (older_gen) = r_allocation_start_region;
+            generation_allocation_segment (older_gen) = r_allocation_segment;
+        }
+
+        if ((condemned_gen_number < max_generation))
+        {
+            // Fix the allocation area of the older generation
+            fix_older_allocation_area (older_gen);
+        }
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+        if (ShouldTrackMovementForProfilerOrEtw())
+        {
+            record_survived_for_profiler(condemned_gen_number, first_condemned_address);
+        }
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+        gen0_big_free_spaces = 0;
+        make_free_lists (condemned_gen_number);
+        recover_saved_pinned_info();
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+        finalize_queue->UpdatePromotedGenerations (condemned_gen_number, TRUE);
+#endif // FEATURE_PREMORTEM_FINALIZATION
+// MTHTS: leave single thread for HT processing on plan_phase
+#ifdef MULTIPLE_HEAPS
+        dprintf(3, ("Joining after end of sweep"));
+        gc_t_join.join(this, gc_join_adjust_handle_age_sweep);
+        if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+            GCScan::GcPromotionsGranted(condemned_gen_number,
+                                            max_generation, &sc);
+            if (condemned_gen_number >= (max_generation -1))
+            {
+#ifdef MULTIPLE_HEAPS
+                for (int i = 0; i < n_heaps; i++)
+                {
+                    g_heaps[i]->rearrange_heap_segments(FALSE);
+                }
+#else
+                rearrange_heap_segments(FALSE);
+#endif //MULTIPLE_HEAPS
+            }
+
+#ifdef MULTIPLE_HEAPS
+            //join all threads to make sure they are synchronized
+            dprintf(3, ("Restarting after Promotion granted"));
+            gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+
+#ifdef _DEBUG
+        for (int x = 0; x <= max_generation; x++)
+        {
+            assert (generation_allocation_start (generation_of (x)));
+        }
+#endif //_DEBUG
+
+        //clear card for generation 1. generation 0 is empty
+        clear_card_for_addresses (
+            generation_allocation_start (generation_of (1)),
+            generation_allocation_start (generation_of (0)));
+        assert ((heap_segment_allocated (ephemeral_heap_segment) ==
+                 (generation_allocation_start (youngest_generation) +
+                  Align (min_obj_size))));
+    }
+
+    //verify_partial();
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif //_PREFAST_
+
+
+/*****************************
+Called after compact phase to fix all generation gaps
+********************************/
+void gc_heap::fix_generation_bounds (int condemned_gen_number,
+                                     generation* consing_gen)
+{
+    UNREFERENCED_PARAMETER(consing_gen);
+
+    assert (generation_allocation_segment (consing_gen) ==
+            ephemeral_heap_segment);
+
+    //assign the planned allocation start to the generation
+    int gen_number = condemned_gen_number;
+    int bottom_gen = 0;
+
+    while (gen_number >= bottom_gen)
+    {
+        generation*  gen = generation_of (gen_number);
+        dprintf(3,("Fixing generation pointers for %Ix", gen_number));
+        if ((gen_number < max_generation) && ephemeral_promotion)
+        {
+            make_unused_array (saved_ephemeral_plan_start[gen_number], 
+                               saved_ephemeral_plan_start_size[gen_number]);
+        }
+        reset_allocation_pointers (gen, generation_plan_allocation_start (gen));
+        make_unused_array (generation_allocation_start (gen), generation_plan_allocation_start_size (gen));
+        dprintf(3,(" start %Ix", (size_t)generation_allocation_start (gen)));
+        gen_number--;
+    }
+#ifdef MULTIPLE_HEAPS
+    if (ephemeral_promotion)
+    {
+        //we are creating a generation fault. set the cards.
+        // and we are only doing this for multiple heaps because in the single heap scenario the 
+        // new ephemeral generations will be empty and there'll be no need to set cards for the
+        // old ephemeral generations that got promoted into max_generation.
+        ptrdiff_t delta = 0;
+#ifdef SEG_MAPPING_TABLE
+        heap_segment* old_ephemeral_seg = seg_mapping_table_segment_of (saved_ephemeral_plan_start[max_generation-1]);
+#else //SEG_MAPPING_TABLE
+        heap_segment* old_ephemeral_seg = segment_of (saved_ephemeral_plan_start[max_generation-1], delta);
+#endif //SEG_MAPPING_TABLE
+
+        assert (in_range_for_segment (saved_ephemeral_plan_start[max_generation-1], old_ephemeral_seg));
+        size_t end_card = card_of (align_on_card (heap_segment_plan_allocated (old_ephemeral_seg)));
+        size_t card = card_of (saved_ephemeral_plan_start[max_generation-1]);
+        while (card != end_card)
+        {
+            set_card (card);
+            card++;
+        }
+    }
+#endif //MULTIPLE_HEAPS
+    {
+        alloc_allocated = heap_segment_plan_allocated(ephemeral_heap_segment);
+        //reset the allocated size
+        uint8_t* start = generation_allocation_start (youngest_generation);
+        MAYBE_UNUSED_VAR(start);
+        if (settings.promotion && !settings.demotion)
+        {
+            assert ((start + Align (size (start))) ==
+                    heap_segment_plan_allocated(ephemeral_heap_segment));
+        }
+
+        heap_segment_allocated(ephemeral_heap_segment)=
+            heap_segment_plan_allocated(ephemeral_heap_segment);
+    }
+}
+
+uint8_t* gc_heap::generation_limit (int gen_number)
+{
+    if (settings.promotion)
+    {
+        if (gen_number <= 1)
+            return heap_segment_reserved (ephemeral_heap_segment);
+        else
+            return generation_allocation_start (generation_of ((gen_number - 2)));
+    }
+    else
+    {
+        if (gen_number <= 0)
+            return heap_segment_reserved (ephemeral_heap_segment);
+        else
+            return generation_allocation_start (generation_of ((gen_number - 1)));
+    }
+}
+
+BOOL gc_heap::ensure_gap_allocation (int condemned_gen_number)
+{
+    uint8_t* start = heap_segment_allocated (ephemeral_heap_segment);
+    size_t size = Align (min_obj_size)*(condemned_gen_number+1);
+    assert ((start + size) <=
+            heap_segment_reserved (ephemeral_heap_segment));
+    if ((start + size) >
+        heap_segment_committed (ephemeral_heap_segment))
+    {
+        if (!grow_heap_segment (ephemeral_heap_segment, start + size))
+        {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+uint8_t* gc_heap::allocate_at_end (size_t size)
+{
+    uint8_t* start = heap_segment_allocated (ephemeral_heap_segment);
+    size = Align (size);
+    uint8_t* result = start;
+    // only called to allocate a min obj so can't overflow here.
+    assert ((start + size) <=
+            heap_segment_reserved (ephemeral_heap_segment));
+    //ensure_gap_allocation took care of it
+    assert ((start + size) <=
+            heap_segment_committed (ephemeral_heap_segment));
+    heap_segment_allocated (ephemeral_heap_segment) += size;
+    return result;
+}
+
+
+void gc_heap::make_free_lists (int condemned_gen_number)
+{
+#ifdef TIME_GC
+    unsigned start;
+    unsigned finish;
+    start = GetCycleCount32();
+#endif //TIME_GC
+
+    //Promotion has to happen in sweep case.
+    assert (settings.promotion);
+
+    generation* condemned_gen = generation_of (condemned_gen_number);
+    uint8_t* start_address = generation_allocation_start (condemned_gen);
+
+    size_t  current_brick = brick_of (start_address);
+    heap_segment* current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
+
+    PREFIX_ASSUME(current_heap_segment != NULL);
+
+    uint8_t*  end_address = heap_segment_allocated (current_heap_segment);
+    size_t  end_brick = brick_of (end_address-1);
+    make_free_args args;
+    args.free_list_gen_number = min (max_generation, 1 + condemned_gen_number);
+    args.current_gen_limit = (((condemned_gen_number == max_generation)) ?
+                              MAX_PTR :
+                              (generation_limit (args.free_list_gen_number)));
+    args.free_list_gen = generation_of (args.free_list_gen_number);
+    args.highest_plug = 0;
+
+    if ((start_address < end_address) ||
+        (condemned_gen_number == max_generation))
+    {
+        while (1)
+        {
+            if ((current_brick > end_brick))
+            {
+                if (args.current_gen_limit == MAX_PTR)
+                {
+                    //We had an empty segment
+                    //need to allocate the generation start
+
+                    generation* gen = generation_of (max_generation);
+
+                    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
+
+                    PREFIX_ASSUME(start_seg != NULL);
+
+                    uint8_t* gap = heap_segment_mem (start_seg);
+
+                    generation_allocation_start (gen) = gap;
+                    heap_segment_allocated (start_seg) = gap + Align (min_obj_size);
+                    make_unused_array (gap, Align (min_obj_size));
+                    reset_allocation_pointers (gen, gap);
+                    dprintf (3, ("Start segment empty, fixing generation start of %d to: %Ix",
+                                 max_generation, (size_t)gap));
+                    args.current_gen_limit = generation_limit (args.free_list_gen_number);
+                }
+                if (heap_segment_next_rw (current_heap_segment))
+                {
+                    current_heap_segment = heap_segment_next_rw (current_heap_segment);
+                    current_brick = brick_of (heap_segment_mem (current_heap_segment));
+                    end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
+
+                    continue;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            {
+                int brick_entry =  brick_table [ current_brick ];
+                if ((brick_entry >= 0))
+                {
+                    make_free_list_in_brick (brick_address (current_brick) + brick_entry-1, &args);
+                    dprintf(3,("Fixing brick entry %Ix to %Ix",
+                               current_brick, (size_t)args.highest_plug));
+                    set_brick (current_brick,
+                               (args.highest_plug - brick_address (current_brick)));
+                }
+                else
+                {
+                    if ((brick_entry > -32768))
+                    {
+
+#ifdef _DEBUG
+                        ptrdiff_t offset = brick_of (args.highest_plug) - current_brick;
+                        if ((brick_entry != -32767) && (! ((offset == brick_entry))))
+                        {
+                            assert ((brick_entry == -1));
+                        }
+#endif //_DEBUG
+                        //init to -1 for faster find_first_object
+                        set_brick (current_brick, -1);
+                    }
+                }
+            }
+            current_brick++;
+        }
+    }
+    {
+        int bottom_gen = 0;
+        args.free_list_gen_number--;
+        while (args.free_list_gen_number >= bottom_gen)
+        {
+            uint8_t*  gap = 0;
+            generation* gen2 = generation_of (args.free_list_gen_number);
+            gap = allocate_at_end (Align(min_obj_size));
+            generation_allocation_start (gen2) = gap;
+            reset_allocation_pointers (gen2, gap);
+            dprintf(3,("Fixing generation start of %d to: %Ix",
+                       args.free_list_gen_number, (size_t)gap));
+            PREFIX_ASSUME(gap != NULL);
+            make_unused_array (gap, Align (min_obj_size));
+
+            args.free_list_gen_number--;
+        }
+
+        //reset the allocated size
+        uint8_t* start2 = generation_allocation_start (youngest_generation);
+        alloc_allocated = start2 + Align (size (start2));
+    }
+
+#ifdef TIME_GC
+    finish = GetCycleCount32();
+    sweep_time = finish - start;
+#endif //TIME_GC
+}
+
+void gc_heap::make_free_list_in_brick (uint8_t* tree, make_free_args* args)
+{
+    assert ((tree != NULL));
+    {
+        int  right_node = node_right_child (tree);
+        int left_node = node_left_child (tree);
+        args->highest_plug = 0;
+        if (! (0 == tree))
+        {
+            if (! (0 == left_node))
+            {
+                make_free_list_in_brick (tree + left_node, args);
+
+            }
+            {
+                uint8_t*  plug = tree;
+                size_t  gap_size = node_gap_size (tree);
+                uint8_t*  gap = (plug - gap_size);
+                dprintf (3,("Making free list %Ix len %d in %d",
+                //dprintf (3,("F: %Ix len %Ix in %d",
+                        (size_t)gap, gap_size, args->free_list_gen_number));
+                args->highest_plug = tree;
+#ifdef SHORT_PLUGS
+                if (is_plug_padded (plug))
+                {
+                    dprintf (3, ("%Ix padded", plug));
+                    clear_plug_padded (plug);
+                }
+#endif //SHORT_PLUGS
+            gen_crossing:
+                {
+                    if ((args->current_gen_limit == MAX_PTR) ||
+                        ((plug >= args->current_gen_limit) &&
+                         ephemeral_pointer_p (plug)))
+                    {
+                        dprintf(3,(" Crossing Generation boundary at %Ix",
+                               (size_t)args->current_gen_limit));
+                        if (!(args->current_gen_limit == MAX_PTR))
+                        {
+                            args->free_list_gen_number--;
+                            args->free_list_gen = generation_of (args->free_list_gen_number);
+                        }
+                        dprintf(3,( " Fixing generation start of %d to: %Ix",
+                                args->free_list_gen_number, (size_t)gap));
+
+                        reset_allocation_pointers (args->free_list_gen, gap);
+                        args->current_gen_limit = generation_limit (args->free_list_gen_number);
+
+                        if ((gap_size >= (2*Align (min_obj_size))))
+                        {
+                            dprintf(3,(" Splitting the gap in two %Id left",
+                                   gap_size));
+                            make_unused_array (gap, Align(min_obj_size));
+                            gap_size = (gap_size - Align(min_obj_size));
+                            gap = (gap + Align(min_obj_size));
+                        }
+                        else
+                        {
+                            make_unused_array (gap, gap_size);
+                            gap_size = 0;
+                        }
+                        goto gen_crossing;
+                    }
+                }
+
+                thread_gap (gap, gap_size, args->free_list_gen);
+                add_gen_free (args->free_list_gen->gen_num, gap_size);
+            }
+            if (! (0 == right_node))
+            {
+                make_free_list_in_brick (tree + right_node, args);
+            }
+        }
+    }
+}
+
+void gc_heap::thread_gap (uint8_t* gap_start, size_t size, generation*  gen)
+{
+    assert (generation_allocation_start (gen));
+    if ((size > 0))
+    {
+        if ((gen->gen_num == 0) && (size > CLR_SIZE))
+        {
+            gen0_big_free_spaces += size;
+        }
+
+        assert ((heap_segment_rw (generation_start_segment (gen))!=
+                 ephemeral_heap_segment) ||
+                (gap_start > generation_allocation_start (gen)));
+        // The beginning of a segment gap is not aligned
+        assert (size >= Align (min_obj_size));
+        make_unused_array (gap_start, size, 
+                          (!settings.concurrent && (gen != youngest_generation)),
+                          (gen->gen_num == max_generation));
+        dprintf (3, ("fr: [%Ix, %Ix[", (size_t)gap_start, (size_t)gap_start+size));
+
+        if ((size >= min_free_list))
+        {
+            generation_free_list_space (gen) += size;
+            generation_allocator (gen)->thread_item (gap_start, size);
+        }
+        else
+        {
+            generation_free_obj_space (gen) += size;
+        }
+    }
+}
+
+void gc_heap::loh_thread_gap_front (uint8_t* gap_start, size_t size, generation*  gen)
+{
+    assert (generation_allocation_start (gen));
+    if (size >= min_free_list)
+    {
+        generation_free_list_space (gen) += size;
+        generation_allocator (gen)->thread_item_front (gap_start, size);
+    }
+}
+
+void gc_heap::make_unused_array (uint8_t* x, size_t size, BOOL clearp, BOOL resetp)
+{
+    dprintf (3, ("Making unused array [%Ix, %Ix[",
+        (size_t)x, (size_t)(x+size)));
+    assert (size >= Align (min_obj_size));
+
+//#if defined (VERIFY_HEAP) && defined (BACKGROUND_GC)
+//    check_batch_mark_array_bits (x, x+size);
+//#endif //VERIFY_HEAP && BACKGROUND_GC
+
+    if (resetp)
+        reset_memory (x, size);
+
+    ((CObjectHeader*)x)->SetFree(size);
+
+#ifdef BIT64
+
+#if BIGENDIAN
+#error "This won't work on big endian platforms"
+#endif
+
+    size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size;
+
+    if (size_as_object < size)
+    {
+        //
+        // If the size is more than 4GB, we need to create multiple objects because of
+        // the Array::m_NumComponents is uint32_t and the high 32 bits of unused array
+        // size is ignored in regular object size computation.
+        //
+        uint8_t * tmp = x + size_as_object;
+        size_t remaining_size = size - size_as_object;
+
+        while (remaining_size > UINT32_MAX)
+        {
+            // Make sure that there will be at least Align(min_obj_size) left
+            size_t current_size = UINT32_MAX - get_alignment_constant (FALSE) 
+                - Align (min_obj_size, get_alignment_constant (FALSE));
+
+            ((CObjectHeader*)tmp)->SetFree(current_size);
+
+            remaining_size -= current_size;
+            tmp += current_size;
+        }
+
+        ((CObjectHeader*)tmp)->SetFree(remaining_size);
+    }
+#endif
+
+    if (clearp)
+        clear_card_for_addresses (x, x + Align(size));
+}
+
+// Clear memory set by make_unused_array.
+void gc_heap::clear_unused_array (uint8_t* x, size_t size)
+{
+    // Also clear the sync block
+    *(((PTR_PTR)x)-1) = 0;
+
+    ((CObjectHeader*)x)->UnsetFree();
+
+#ifdef BIT64
+
+#if BIGENDIAN
+#error "This won't work on big endian platforms"
+#endif
+
+    // The memory could have been cleared in the meantime. We have to mirror the algorithm
+    // from make_unused_array since we cannot depend on the object sizes in memory.
+    size_t size_as_object = (uint32_t)(size - free_object_base_size) + free_object_base_size;
+
+    if (size_as_object < size)
+    {
+        uint8_t * tmp = x + size_as_object;
+        size_t remaining_size = size - size_as_object;
+
+        while (remaining_size > UINT32_MAX)
+        {
+            size_t current_size = UINT32_MAX - get_alignment_constant (FALSE) 
+                - Align (min_obj_size, get_alignment_constant (FALSE));
+
+            ((CObjectHeader*)tmp)->UnsetFree();
+
+            remaining_size -= current_size;
+            tmp += current_size;
+        }
+
+        ((CObjectHeader*)tmp)->UnsetFree();
+    }
+#else
+    UNREFERENCED_PARAMETER(size);
+#endif
+}
+
+inline
+uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
+{
+    uint8_t* candidate = 0;
+    int cn;
+    while (1)
+    {
+        if (tree < old_address)
+        {
+            if ((cn = node_right_child (tree)) != 0)
+            {
+                assert (candidate < tree);
+                candidate = tree;
+                tree = tree + cn;
+                Prefetch (tree - 8);
+                continue;
+            }
+            else
+                break;
+        }
+        else if (tree > old_address)
+        {
+            if ((cn = node_left_child (tree)) != 0)
+            {
+                tree = tree + cn;
+                Prefetch (tree - 8);
+                continue;
+            }
+            else
+                break;
+        } else
+            break;
+    }
+    if (tree <= old_address)
+        return tree;
+    else if (candidate)
+        return candidate;
+    else
+        return tree;
+}
+
+#ifdef FEATURE_BASICFREEZE
+bool gc_heap::frozen_object_p (Object* obj)
+{
+    heap_segment* pSegment = gc_heap::find_segment ((uint8_t*)obj, FALSE);
+    _ASSERTE(pSegment);
+
+    return heap_segment_read_only_p(pSegment);
+}
+#endif // FEATURE_BASICFREEZE
+
+#ifdef FEATURE_REDHAWK
+// TODO: this was added on RH, we have not done perf runs to see if this is the right
+// thing to do for other versions of the CLR.
+inline
+#endif // FEATURE_REDHAWK
+void gc_heap::relocate_address (uint8_t** pold_address THREAD_NUMBER_DCL)
+{
+    uint8_t* old_address = *pold_address;
+    if (!((old_address >= gc_low) && (old_address < gc_high)))
+#ifdef MULTIPLE_HEAPS
+    {
+        UNREFERENCED_PARAMETER(thread);
+        if (old_address == 0)
+            return;
+        gc_heap* hp = heap_of (old_address);
+        if ((hp == this) ||
+            !((old_address >= hp->gc_low) && (old_address < hp->gc_high)))
+            return;
+    }
+#else //MULTIPLE_HEAPS
+        return ;
+#endif //MULTIPLE_HEAPS
+    // delta translates old_address into address_gc (old_address);
+    size_t  brick = brick_of (old_address);
+    int    brick_entry =  brick_table [ brick ];
+    uint8_t*  new_address = old_address;
+    if (! ((brick_entry == 0)))
+    {
+    retry:
+        {
+            while (brick_entry < 0)
+            {
+                brick = (brick + brick_entry);
+                brick_entry =  brick_table [ brick ];
+            }
+            uint8_t* old_loc = old_address;
+
+            uint8_t* node = tree_search ((brick_address (brick) + brick_entry-1),
+                                      old_loc);
+            if ((node <= old_loc))
+                new_address = (old_address + node_relocation_distance (node));
+            else
+            {
+                if (node_left_p (node))
+                {
+                    dprintf(3,(" L: %Ix", (size_t)node));
+                    new_address = (old_address +
+                                   (node_relocation_distance (node) +
+                                    node_gap_size (node)));
+                }
+                else
+                {
+                    brick = brick - 1;
+                    brick_entry =  brick_table [ brick ];
+                    goto retry;
+                }
+            }
+        }
+
+        *pold_address = new_address;
+        return;
+    }
+
+#ifdef FEATURE_LOH_COMPACTION
+    if (loh_compacted_p
+#ifdef FEATURE_BASICFREEZE
+        && !frozen_object_p((Object*)old_address)
+#endif // FEATURE_BASICFREEZE
+        )
+    {
+        *pold_address = old_address + loh_node_relocation_distance (old_address);
+    }
+    else
+#endif //FEATURE_LOH_COMPACTION
+    {
+        *pold_address = new_address;
+    }
+}
+
+inline void 
+gc_heap::check_class_object_demotion (uint8_t* obj)
+{
+#ifdef COLLECTIBLE_CLASS
+    if (is_collectible(obj))
+    {
+        check_class_object_demotion_internal (obj);
+    }
+#else
+    UNREFERENCED_PARAMETER(obj);
+#endif //COLLECTIBLE_CLASS
+}
+
+#ifdef COLLECTIBLE_CLASS
+NOINLINE void 
+gc_heap::check_class_object_demotion_internal (uint8_t* obj)
+{
+    if (settings.demotion)
+    {
+#ifdef MULTIPLE_HEAPS
+        // We set the card without checking the demotion range 'cause at this point
+        // the handle that points to the loader allocator object may or may not have
+        // been relocated by other GC threads. 
+        set_card (card_of (obj));
+#else
+        THREAD_FROM_HEAP;
+        uint8_t* class_obj = get_class_object (obj);
+        dprintf (3, ("%Ix: got classobj %Ix", obj, class_obj));
+        uint8_t* temp_class_obj = class_obj;
+        uint8_t** temp = &temp_class_obj;
+        relocate_address (temp THREAD_NUMBER_ARG);
+
+        check_demotion_helper (temp, obj);
+#endif //MULTIPLE_HEAPS
+    }
+}
+
+#endif //COLLECTIBLE_CLASS
+
+inline void
+gc_heap::check_demotion_helper (uint8_t** pval, uint8_t* parent_obj)
+{
+    // detect if we are demoting an object
+    if ((*pval < demotion_high) &&
+        (*pval >= demotion_low))
+    {
+        dprintf(3, ("setting card %Ix:%Ix",
+                    card_of((uint8_t*)pval),
+                    (size_t)pval));
+
+        set_card (card_of (parent_obj));
+    }
+#ifdef MULTIPLE_HEAPS
+    else if (settings.demotion)
+    {
+        dprintf (4, ("Demotion active, computing heap_of object"));
+        gc_heap* hp = heap_of (*pval);
+        if ((*pval < hp->demotion_high) &&
+            (*pval >= hp->demotion_low))
+        {
+            dprintf(3, ("setting card %Ix:%Ix",
+                        card_of((uint8_t*)pval),
+                        (size_t)pval));
+
+            set_card (card_of (parent_obj));
+        }
+    }
+#endif //MULTIPLE_HEAPS
+}
+
+inline void
+gc_heap::reloc_survivor_helper (uint8_t** pval)
+{
+    THREAD_FROM_HEAP;
+    relocate_address (pval THREAD_NUMBER_ARG);
+
+    check_demotion_helper (pval, (uint8_t*)pval);
+}
+
+inline void
+gc_heap::relocate_obj_helper (uint8_t* x, size_t s)
+{
+    THREAD_FROM_HEAP;
+    if (contain_pointers (x))
+    {
+        dprintf (3, ("$%Ix$", (size_t)x));
+
+        go_through_object_nostart (method_table(x), x, s, pval,
+                            {
+                                uint8_t* child = *pval;
+                                reloc_survivor_helper (pval);
+                                if (child)
+                                {
+                                    dprintf (3, ("%Ix->%Ix->%Ix", (uint8_t*)pval, child, *pval));
+                                }
+                            });
+
+    }
+    check_class_object_demotion (x);
+}
+
+inline 
+void gc_heap::reloc_ref_in_shortened_obj (uint8_t** address_to_set_card, uint8_t** address_to_reloc)
+{
+    THREAD_FROM_HEAP;
+
+    uint8_t* old_val = (address_to_reloc ? *address_to_reloc : 0);
+    relocate_address (address_to_reloc THREAD_NUMBER_ARG);
+    if (address_to_reloc)
+    {
+        dprintf (3, ("SR %Ix: %Ix->%Ix", (uint8_t*)address_to_reloc, old_val, *address_to_reloc));
+    }
+
+    //check_demotion_helper (current_saved_info_to_relocate, (uint8_t*)pval);
+    uint8_t* relocated_addr = *address_to_reloc;
+    if ((relocated_addr < demotion_high) &&
+        (relocated_addr >= demotion_low))
+    {
+        dprintf (3, ("set card for location %Ix(%Ix)",
+                    (size_t)address_to_set_card, card_of((uint8_t*)address_to_set_card)));
+
+        set_card (card_of ((uint8_t*)address_to_set_card));
+    }
+#ifdef MULTIPLE_HEAPS
+    else if (settings.demotion)
+    {
+        gc_heap* hp = heap_of (relocated_addr);
+        if ((relocated_addr < hp->demotion_high) &&
+            (relocated_addr >= hp->demotion_low))
+        {
+            dprintf (3, ("%Ix on h%d, set card for location %Ix(%Ix)",
+                        relocated_addr, hp->heap_number, (size_t)address_to_set_card, card_of((uint8_t*)address_to_set_card)));
+
+            set_card (card_of ((uint8_t*)address_to_set_card));
+        }
+    }
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::relocate_pre_plug_info (mark* pinned_plug_entry)
+{
+    THREAD_FROM_HEAP;
+    uint8_t* plug = pinned_plug (pinned_plug_entry);
+    uint8_t* pre_plug_start = plug - sizeof (plug_and_gap);
+    // Note that we need to add one ptr size here otherwise we may not be able to find the relocated
+    // address. Consider this scenario: 
+    // gen1 start | 3-ptr sized NP | PP
+    // 0          | 0x18           | 0x30
+    // If we are asking for the reloc address of 0x10 we will AV in relocate_address because
+    // the first plug we saw in the brick is 0x18 which means 0x10 will cause us to go back a brick
+    // which is 0, and then we'll AV in tree_search when we try to do node_right_child (tree). 
+    pre_plug_start += sizeof (uint8_t*);
+    uint8_t** old_address = &pre_plug_start;
+
+    uint8_t* old_val = (old_address ? *old_address : 0);
+    relocate_address (old_address THREAD_NUMBER_ARG);
+    if (old_address)
+    {
+        dprintf (3, ("PreR %Ix: %Ix->%Ix, set reloc: %Ix", 
+            (uint8_t*)old_address, old_val, *old_address, (pre_plug_start - sizeof (uint8_t*))));
+    }
+
+    pinned_plug_entry->set_pre_plug_info_reloc_start (pre_plug_start - sizeof (uint8_t*));
+}
+
+inline
+void gc_heap::relocate_shortened_obj_helper (uint8_t* x, size_t s, uint8_t* end, mark* pinned_plug_entry, BOOL is_pinned)
+{
+    THREAD_FROM_HEAP;
+    uint8_t* plug = pinned_plug (pinned_plug_entry);
+
+    if (!is_pinned)
+    {
+        //// Temporary - we just wanna make sure we are doing things right when padding is needed.
+        //if ((x + s) < plug)
+        //{
+        //    dprintf (3, ("obj %Ix needed padding: end %Ix is %d bytes from pinned obj %Ix", 
+        //        x, (x + s), (plug- (x + s)), plug));
+        //    GCToOSInterface::DebugBreak();
+        //}
+
+        relocate_pre_plug_info (pinned_plug_entry);
+    }
+
+    verify_pins_with_post_plug_info("after relocate_pre_plug_info");
+
+    uint8_t* saved_plug_info_start = 0;
+    uint8_t** saved_info_to_relocate = 0;
+
+    if (is_pinned)
+    {
+        saved_plug_info_start = (uint8_t*)(pinned_plug_entry->get_post_plug_info_start());
+        saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_post_plug_reloc_info());
+    }
+    else
+    {
+        saved_plug_info_start = (plug - sizeof (plug_and_gap));
+        saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_pre_plug_reloc_info());
+    }
+    
+    uint8_t** current_saved_info_to_relocate = 0;
+    uint8_t* child = 0;
+
+    dprintf (3, ("x: %Ix, pp: %Ix, end: %Ix", x, plug, end));
+
+    if (contain_pointers (x))
+    {
+        dprintf (3,("$%Ix$", (size_t)x));
+
+        go_through_object_nostart (method_table(x), x, s, pval,
+        {
+            dprintf (3, ("obj %Ix, member: %Ix->%Ix", x, (uint8_t*)pval, *pval));
+
+            if ((uint8_t*)pval >= end)
+            {
+                current_saved_info_to_relocate = saved_info_to_relocate + ((uint8_t*)pval - saved_plug_info_start) / sizeof (uint8_t**);
+                child = *current_saved_info_to_relocate;
+                reloc_ref_in_shortened_obj (pval, current_saved_info_to_relocate);
+                dprintf (3, ("last part: R-%Ix(saved: %Ix)->%Ix ->%Ix",
+                    (uint8_t*)pval, current_saved_info_to_relocate, child, *current_saved_info_to_relocate));
+            }
+            else
+            {
+                reloc_survivor_helper (pval);
+            }
+        });
+    }
+
+    check_class_object_demotion (x);
+}
+
+void gc_heap::relocate_survivor_helper (uint8_t* plug, uint8_t* plug_end)
+{
+    uint8_t*  x = plug;
+    while (x < plug_end)
+    {
+        size_t s = size (x);
+        uint8_t* next_obj = x + Align (s);
+        Prefetch (next_obj);
+        relocate_obj_helper (x, s);
+        assert (s > 0);
+        x = next_obj;
+    }
+}
+
+// if we expanded, right now we are not handling it as We are not saving the new reloc info.
+void gc_heap::verify_pins_with_post_plug_info (const char* msg)
+{
+#if defined  (_DEBUG) && defined (VERIFY_HEAP)
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        if (!verify_pinned_queue_p)
+            return;
+
+        if (settings.heap_expansion)
+            return;
+
+        for (size_t i = 0; i < mark_stack_tos; i++)
+        {
+            mark& m = mark_stack_array[i];
+
+            mark* pinned_plug_entry = pinned_plug_of(i);
+
+            if (pinned_plug_entry->has_post_plug_info() && 
+                pinned_plug_entry->post_short_p() && 
+                (pinned_plug_entry->saved_post_plug_debug.gap != 1))
+            {
+                uint8_t* next_obj = pinned_plug_entry->get_post_plug_info_start() + sizeof (plug_and_gap);
+                // object after pin
+                dprintf (3, ("OFP: %Ix, G: %Ix, R: %Ix, LC: %d, RC: %d", 
+                    next_obj, node_gap_size (next_obj), node_relocation_distance (next_obj),
+                    (int)node_left_child (next_obj), (int)node_right_child (next_obj)));
+
+                size_t* post_plug_debug = (size_t*)(&m.saved_post_plug_debug);
+
+                if (node_gap_size (next_obj) != *post_plug_debug)
+                {
+                    dprintf (3, ("obj: %Ix gap should be %Ix but it is %Ix", 
+                        next_obj, *post_plug_debug, (size_t)(node_gap_size (next_obj))));
+                    FATAL_GC_ERROR();
+                }
+                post_plug_debug++;
+                // can't do node_relocation_distance here as it clears the left bit.
+                //if (node_relocation_distance (next_obj) != *post_plug_debug)
+                if (*((size_t*)(next_obj - 3 * sizeof (size_t))) != *post_plug_debug)
+                {
+                    dprintf (3, ("obj: %Ix reloc should be %Ix but it is %Ix", 
+                        next_obj, *post_plug_debug, (size_t)(node_relocation_distance (next_obj))));
+                    FATAL_GC_ERROR();
+                }
+                if (node_left_child (next_obj) > 0)
+                {
+                    dprintf (3, ("obj: %Ix, vLC: %d\n", next_obj, (int)(node_left_child (next_obj))));
+                    FATAL_GC_ERROR();
+                }
+            }
+        }
+
+        dprintf (3, ("%s verified", msg));
+    }
+#else // _DEBUG && VERIFY_HEAP
+    UNREFERENCED_PARAMETER(msg);
+#endif // _DEBUG && VERIFY_HEAP
+}
+
+#ifdef COLLECTIBLE_CLASS
+// We don't want to burn another ptr size space for pinned plugs to record this so just 
+// set the card unconditionally for collectible objects if we are demoting.
+inline void
+gc_heap::unconditional_set_card_collectible (uint8_t* obj)
+{
+    if (settings.demotion)
+    {
+        set_card (card_of (obj));
+    }
+}
+#endif //COLLECTIBLE_CLASS
+
+void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_end, mark* pinned_plug_entry)
+{
+    uint8_t*  x = plug;
+    uint8_t* p_plug = pinned_plug (pinned_plug_entry);
+    BOOL is_pinned = (plug == p_plug);
+    BOOL check_short_obj_p = (is_pinned ? pinned_plug_entry->post_short_p() : pinned_plug_entry->pre_short_p());
+
+    plug_end += sizeof (gap_reloc_pair);
+
+    //dprintf (3, ("%s %Ix is shortened, and last object %s overwritten", (is_pinned ? "PP" : "NP"), plug, (check_short_obj_p ? "is" : "is not")));
+    dprintf (3, ("%s %Ix-%Ix short, LO: %s OW", (is_pinned ? "PP" : "NP"), plug, plug_end, (check_short_obj_p ? "is" : "is not")));
+
+    verify_pins_with_post_plug_info("begin reloc short surv");
+
+    while (x < plug_end)
+    {
+        if (check_short_obj_p && ((plug_end - x) < min_pre_pin_obj_size))
+        {
+            dprintf (3, ("last obj %Ix is short", x));
+
+            if (is_pinned)
+            {
+#ifdef COLLECTIBLE_CLASS
+                if (pinned_plug_entry->post_short_collectible_p())
+                    unconditional_set_card_collectible (x);
+#endif //COLLECTIBLE_CLASS
+
+                // Relocate the saved references based on bits set.
+                uint8_t** saved_plug_info_start = (uint8_t**)(pinned_plug_entry->get_post_plug_info_start());
+                uint8_t** saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_post_plug_reloc_info());
+                for (size_t i = 0; i < pinned_plug_entry->get_max_short_bits(); i++)
+                {
+                    if (pinned_plug_entry->post_short_bit_p (i))
+                    {
+                        reloc_ref_in_shortened_obj ((saved_plug_info_start + i), (saved_info_to_relocate + i));
+                    }
+                }
+            }
+            else
+            {
+#ifdef COLLECTIBLE_CLASS
+                if (pinned_plug_entry->pre_short_collectible_p())
+                    unconditional_set_card_collectible (x);
+#endif //COLLECTIBLE_CLASS
+
+                relocate_pre_plug_info (pinned_plug_entry);
+
+                // Relocate the saved references based on bits set.
+                uint8_t** saved_plug_info_start = (uint8_t**)(p_plug - sizeof (plug_and_gap));
+                uint8_t** saved_info_to_relocate = (uint8_t**)(pinned_plug_entry->get_pre_plug_reloc_info());
+                for (size_t i = 0; i < pinned_plug_entry->get_max_short_bits(); i++)
+                {
+                    if (pinned_plug_entry->pre_short_bit_p (i))
+                    {
+                        reloc_ref_in_shortened_obj ((saved_plug_info_start + i), (saved_info_to_relocate + i));
+                    }
+                }
+            }
+
+            break;
+        }
+
+        size_t s = size (x);
+        uint8_t* next_obj = x + Align (s);
+        Prefetch (next_obj);
+
+        if (next_obj >= plug_end) 
+        {
+            dprintf (3, ("object %Ix is at the end of the plug %Ix->%Ix", 
+                next_obj, plug, plug_end));
+
+            verify_pins_with_post_plug_info("before reloc short obj");
+
+            relocate_shortened_obj_helper (x, s, (x + Align (s) - sizeof (plug_and_gap)), pinned_plug_entry, is_pinned);
+        }
+        else
+        {
+            relocate_obj_helper (x, s);
+        }
+
+        assert (s > 0);
+        x = next_obj;
+    }
+
+    verify_pins_with_post_plug_info("end reloc short surv");
+}
+
+void gc_heap::relocate_survivors_in_plug (uint8_t* plug, uint8_t* plug_end,
+                                          BOOL check_last_object_p, 
+                                          mark* pinned_plug_entry)
+{
+    //dprintf(3,("Relocating pointers in Plug [%Ix,%Ix[", (size_t)plug, (size_t)plug_end));
+    dprintf (3,("RP: [%Ix,%Ix[", (size_t)plug, (size_t)plug_end));
+
+    if (check_last_object_p)
+    {
+        relocate_shortened_survivor_helper (plug, plug_end, pinned_plug_entry);
+    }
+    else
+    {
+        relocate_survivor_helper (plug, plug_end);
+    }
+}
+
+void gc_heap::relocate_survivors_in_brick (uint8_t* tree, relocate_args* args)
+{
+    assert ((tree != NULL));
+
+    dprintf (3, ("tree: %Ix, args->last_plug: %Ix, left: %Ix, right: %Ix, gap(t): %Ix",
+        tree, args->last_plug, 
+        (tree + node_left_child (tree)),
+        (tree + node_right_child (tree)),
+        node_gap_size (tree)));
+
+    if (node_left_child (tree))
+    {
+        relocate_survivors_in_brick (tree + node_left_child (tree), args);
+    }
+    {
+        uint8_t*  plug = tree;
+        BOOL   has_post_plug_info_p = FALSE;
+        BOOL   has_pre_plug_info_p = FALSE;
+
+        if (tree == oldest_pinned_plug)
+        {
+            args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
+                                                               &has_post_plug_info_p);
+            assert (tree == pinned_plug (args->pinned_plug_entry));
+
+            dprintf (3, ("tree is the oldest pin: %Ix", tree));
+        }
+        if (args->last_plug)
+        {
+            size_t  gap_size = node_gap_size (tree);
+            uint8_t*  gap = (plug - gap_size);
+            dprintf (3, ("tree: %Ix, gap: %Ix (%Ix)", tree, gap, gap_size));
+            assert (gap_size >= Align (min_obj_size));
+            uint8_t*  last_plug_end = gap;
+
+            BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);
+
+            {
+                relocate_survivors_in_plug (args->last_plug, last_plug_end, check_last_object_p, args->pinned_plug_entry);
+            }
+        }
+        else
+        {
+            assert (!has_pre_plug_info_p);
+        }
+
+        args->last_plug = plug;
+        args->is_shortened = has_post_plug_info_p;
+        if (has_post_plug_info_p)
+        {
+            dprintf (3, ("setting %Ix as shortened", plug));
+        }
+        dprintf (3, ("last_plug: %Ix(shortened: %d)", plug, (args->is_shortened ? 1 : 0)));
+    }
+    if (node_right_child (tree))
+    {
+        relocate_survivors_in_brick (tree + node_right_child (tree), args);
+    }
+}
+
+inline
+void gc_heap::update_oldest_pinned_plug()
+{
+    oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin()));
+}
+
+void gc_heap::relocate_survivors (int condemned_gen_number,
+                                  uint8_t* first_condemned_address)
+{
+    generation* condemned_gen = generation_of (condemned_gen_number);
+    uint8_t*  start_address = first_condemned_address;
+    size_t  current_brick = brick_of (start_address);
+    heap_segment*  current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
+
+    PREFIX_ASSUME(current_heap_segment != NULL);
+
+    uint8_t*  end_address = 0;
+
+    reset_pinned_queue_bos();
+    update_oldest_pinned_plug();
+    
+    end_address = heap_segment_allocated (current_heap_segment);
+
+    size_t  end_brick = brick_of (end_address - 1);
+    relocate_args args;
+    args.low = gc_low;
+    args.high = gc_high;
+    args.is_shortened = FALSE;
+    args.pinned_plug_entry = 0;
+    args.last_plug = 0;
+    while (1)
+    {
+        if (current_brick > end_brick)
+        {
+            if (args.last_plug)
+            {
+                {
+                    assert (!(args.is_shortened));
+                    relocate_survivors_in_plug (args.last_plug,
+                                                heap_segment_allocated (current_heap_segment),
+                                                args.is_shortened, 
+                                                args.pinned_plug_entry);
+                }
+
+                args.last_plug = 0;
+            }
+
+            if (heap_segment_next_rw (current_heap_segment))
+            {
+                current_heap_segment = heap_segment_next_rw (current_heap_segment);
+                current_brick = brick_of (heap_segment_mem (current_heap_segment));
+                end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        {
+            int brick_entry =  brick_table [ current_brick ];
+
+            if (brick_entry >= 0)
+            {
+                relocate_survivors_in_brick (brick_address (current_brick) +
+                                             brick_entry -1,
+                                             &args);
+            }
+        }
+        current_brick++;
+    }
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void gc_heap::walk_plug (uint8_t* plug, size_t size, BOOL check_last_object_p, walk_relocate_args* args, size_t profiling_context)
+{
+    if (check_last_object_p)
+    {
+        size += sizeof (gap_reloc_pair);
+        mark* entry = args->pinned_plug_entry;
+
+        if (args->is_shortened)
+        {
+            assert (entry->has_post_plug_info());
+            entry->swap_post_plug_and_saved_for_profiler();
+        }
+        else
+        {
+            assert (entry->has_pre_plug_info());
+            entry->swap_pre_plug_and_saved_for_profiler();
+        }
+    }
+
+    ptrdiff_t last_plug_relocation = node_relocation_distance (plug);
+    ptrdiff_t reloc = settings.compaction ? last_plug_relocation : 0;
+
+    STRESS_LOG_PLUG_MOVE(plug, (plug + size), -last_plug_relocation);
+
+    ETW::GCLog::MovedReference(plug,
+                               (plug + size),
+                               reloc,
+                               profiling_context,
+                               settings.compaction);
+
+    if (check_last_object_p)
+    {
+        mark* entry = args->pinned_plug_entry;
+
+        if (args->is_shortened)
+        {
+            entry->swap_post_plug_and_saved_for_profiler();
+        }
+        else
+        {
+            entry->swap_pre_plug_and_saved_for_profiler();
+        }
+    }
+}
+
+void gc_heap::walk_relocation_in_brick (uint8_t* tree, walk_relocate_args* args, size_t profiling_context)
+{
+    assert ((tree != NULL));
+    if (node_left_child (tree))
+    {
+        walk_relocation_in_brick (tree + node_left_child (tree), args, profiling_context);
+    }
+
+    uint8_t*  plug = tree;
+    BOOL   has_pre_plug_info_p = FALSE;
+    BOOL   has_post_plug_info_p = FALSE;
+
+    if (tree == oldest_pinned_plug)
+    {
+        args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
+                                                           &has_post_plug_info_p);
+        assert (tree == pinned_plug (args->pinned_plug_entry));
+    }
+
+    if (args->last_plug != 0)
+    {
+        size_t gap_size = node_gap_size (tree);
+        uint8_t*  gap = (plug - gap_size);
+        uint8_t*  last_plug_end = gap;
+        size_t last_plug_size = (last_plug_end - args->last_plug);
+        dprintf (3, ("tree: %Ix, last_plug: %Ix, gap: %Ix(%Ix), last_plug_end: %Ix, size: %Ix", 
+            tree, args->last_plug, gap, gap_size, last_plug_end, last_plug_size));
+        
+        BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);
+        if (!check_last_object_p)
+        {
+            assert (last_plug_size >= Align (min_obj_size));
+        }
+
+        walk_plug (args->last_plug, last_plug_size, check_last_object_p, args, profiling_context);
+    }
+    else
+    {
+        assert (!has_pre_plug_info_p);
+    }
+
+    dprintf (3, ("set args last plug to plug: %Ix", plug));
+    args->last_plug = plug;
+    args->is_shortened = has_post_plug_info_p;
+
+    if (node_right_child (tree))
+    {
+        walk_relocation_in_brick (tree + node_right_child (tree), args, profiling_context);
+
+    }
+}
+
+void gc_heap::walk_relocation (int condemned_gen_number,
+                               uint8_t* first_condemned_address,
+                               size_t profiling_context)
+
+{
+    generation* condemned_gen = generation_of (condemned_gen_number);
+    uint8_t*  start_address = first_condemned_address;
+    size_t  current_brick = brick_of (start_address);
+    heap_segment*  current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
+
+    PREFIX_ASSUME(current_heap_segment != NULL);
+
+    reset_pinned_queue_bos();
+    update_oldest_pinned_plug();
+    size_t end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
+    walk_relocate_args args;
+    args.is_shortened = FALSE;
+    args.pinned_plug_entry = 0;
+    args.last_plug = 0;
+
+    while (1)
+    {
+        if (current_brick > end_brick)
+        {
+            if (args.last_plug)
+            {
+                walk_plug (args.last_plug, 
+                           (heap_segment_allocated (current_heap_segment) - args.last_plug), 
+                           args.is_shortened, 
+                           &args, profiling_context);
+                args.last_plug = 0;
+            }
+            if (heap_segment_next_rw (current_heap_segment))
+            {
+                current_heap_segment = heap_segment_next_rw (current_heap_segment);
+                current_brick = brick_of (heap_segment_mem (current_heap_segment));
+                end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        {
+            int brick_entry =  brick_table [ current_brick ];
+            if (brick_entry >= 0)
+            {
+                walk_relocation_in_brick (brick_address (current_brick) +
+                                          brick_entry - 1,
+                                          &args,
+                                          profiling_context);
+            }
+        }
+        current_brick++;
+    }
+}
+
+#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+void gc_heap::walk_relocation_for_bgc(size_t profiling_context)
+{
+    // This should only be called for BGCs
+    assert(settings.concurrent);
+
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+    BOOL small_object_segments = TRUE;
+    int align_const = get_alignment_constant (small_object_segments);
+
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (small_object_segments)
+            {
+                //switch to large segment
+                small_object_segments = FALSE;
+
+                align_const = get_alignment_constant (small_object_segments);
+                seg = heap_segment_rw (generation_start_segment (large_object_generation));
+
+                PREFIX_ASSUME(seg != NULL);
+
+                continue;
+            }
+            else 
+                break;
+        }
+
+        uint8_t* o = heap_segment_mem (seg);
+        uint8_t* end = heap_segment_allocated (seg);
+
+        while (o < end)
+        {   
+
+            if (method_table(o) == g_pFreeObjectMethodTable)
+            {
+                o += Align (size (o), align_const);
+                continue;
+            }
+
+            // It's survived. Make a fake plug, starting at o,
+            // and send the event
+
+            uint8_t* plug_start = o;
+
+            while (method_table(o) != g_pFreeObjectMethodTable)
+            {
+                o += Align (size (o), align_const);
+                if (o >= end)
+                {
+                    break;
+                }
+            }
+                
+            uint8_t* plug_end = o;
+
+            // Note on last parameter: since this is for bgc, only ETW
+            // should be sending these events so that existing profapi profilers
+            // don't get confused.
+            ETW::GCLog::MovedReference(
+                plug_start,
+                plug_end,
+                0,              // Reloc distance == 0 as this is non-compacting
+                profiling_context,
+                FALSE,          // Non-compacting
+                FALSE);         // fAllowProfApiNotification
+        }
+
+        seg = heap_segment_next (seg);
+    }
+}
+
+void gc_heap::make_free_lists_for_profiler_for_bgc ()
+{
+    assert(settings.concurrent);
+
+    size_t profiling_context = 0;
+    ETW::GCLog::BeginMovedReferences(&profiling_context);
+
+    // This provides the profiler with information on what blocks of
+    // memory are moved during a gc.
+
+    walk_relocation_for_bgc(profiling_context);
+
+    // Notify the EE-side profiling code that all the references have been traced for
+    // this heap, and that it needs to flush all cached data it hasn't sent to the
+    // profiler and release resources it no longer needs.  Since this is for bgc, only
+    // ETW should be sending these events so that existing profapi profilers don't get confused.
+    ETW::GCLog::EndMovedReferences(profiling_context, FALSE /* fAllowProfApiNotification */);
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_after_profiler_heap_walk);
+    if (bgc_t_join.joined())
+    {
+        bgc_t_join.restart();
+    }
+#endif // MULTIPLE_HEAPS
+}
+
+#endif // defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+void gc_heap::relocate_phase (int condemned_gen_number,
+                              uint8_t* first_condemned_address)
+{
+    ScanContext sc;
+    sc.thread_number = heap_number;
+    sc.promotion = FALSE;
+    sc.concurrent = FALSE;
+
+
+#ifdef TIME_GC
+        unsigned start;
+        unsigned finish;
+        start = GetCycleCount32();
+#endif //TIME_GC
+
+//  %type%  category = quote (relocate);
+    dprintf (2,("---- Relocate phase -----"));
+
+#ifdef MULTIPLE_HEAPS
+    //join all threads to make sure they are synchronized
+    dprintf(3, ("Joining after end of plan"));
+    gc_t_join.join(this, gc_join_begin_relocate_phase);
+    if (gc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+
+    {
+#ifdef MULTIPLE_HEAPS
+
+        //join all threads to make sure they are synchronized
+        dprintf(3, ("Restarting for relocation"));
+        gc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    dprintf(3,("Relocating roots"));
+    GCScan::GcScanRoots(GCHeap::Relocate,
+                            condemned_gen_number, max_generation, &sc);
+
+    verify_pins_with_post_plug_info("after reloc stack");
+
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        scan_background_roots (GCHeap::Relocate, heap_number, &sc);
+    }
+#endif //BACKGROUND_GC
+
+    if (condemned_gen_number != max_generation)
+    {
+        dprintf(3,("Relocating cross generation pointers"));
+        mark_through_cards_for_segments (&gc_heap::relocate_address, TRUE);
+        verify_pins_with_post_plug_info("after reloc cards");
+    }
+    if (condemned_gen_number != max_generation)
+    {
+        dprintf(3,("Relocating cross generation pointers for large objects"));
+        mark_through_cards_for_large_objects (&gc_heap::relocate_address, TRUE);
+    }
+    else
+    {
+#ifdef FEATURE_LOH_COMPACTION
+        if (loh_compacted_p)
+        {
+            assert (settings.condemned_generation == max_generation);
+            relocate_in_loh_compact();
+        }
+        else
+#endif //FEATURE_LOH_COMPACTION
+        {
+            relocate_in_large_objects ();
+        }
+    }
+    {
+        dprintf(3,("Relocating survivors"));
+        relocate_survivors (condemned_gen_number,
+                            first_condemned_address);
+    }
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+        dprintf(3,("Relocating finalization data"));
+        finalize_queue->RelocateFinalizationData (condemned_gen_number,
+                                                       __this);
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+
+// MTHTS
+    {
+        dprintf(3,("Relocating handle table"));
+        GCScan::GcScanHandles(GCHeap::Relocate,
+                                  condemned_gen_number, max_generation, &sc);
+    }
+
+#ifdef MULTIPLE_HEAPS
+    //join all threads to make sure they are synchronized
+    dprintf(3, ("Joining after end of relocation"));
+    gc_t_join.join(this, gc_join_relocate_phase_done);
+
+#endif //MULTIPLE_HEAPS
+
+#ifdef TIME_GC
+        finish = GetCycleCount32();
+        reloc_time = finish - start;
+#endif //TIME_GC
+
+    dprintf(2,( "---- End of Relocate phase ----"));
+}
+
+// This compares to see if tree is the current pinned plug and returns info
+// for this pinned plug. Also advances the pinned queue if that's the case.
+//
+// We don't change the values of the plug info if tree is not the same as 
+// the current pinned plug - the caller is responsible for setting the right
+// values to begin with.
+//
+// POPO TODO: We are keeping this temporarily as this is also used by realloc 
+// where it passes FALSE to deque_p, change it to use the same optimization 
+// as relocate. Not as essential since realloc is already a slow path.
+mark* gc_heap::get_next_pinned_entry (uint8_t* tree,
+                                      BOOL* has_pre_plug_info_p, 
+                                      BOOL* has_post_plug_info_p,
+                                      BOOL deque_p)
+{
+    if (!pinned_plug_que_empty_p())
+    {
+        mark* oldest_entry = oldest_pin();
+        uint8_t* oldest_plug = pinned_plug (oldest_entry);
+        if (tree == oldest_plug)
+        {
+            *has_pre_plug_info_p =  oldest_entry->has_pre_plug_info();
+            *has_post_plug_info_p = oldest_entry->has_post_plug_info();
+
+            if (deque_p)
+            {
+                deque_pinned_plug();
+            }
+
+            dprintf (3, ("found a pinned plug %Ix, pre: %d, post: %d", 
+                tree, 
+                (*has_pre_plug_info_p ? 1 : 0),
+                (*has_post_plug_info_p ? 1 : 0)));
+
+            return oldest_entry;
+        }
+    }
+
+    return NULL;
+}
+
+// This also deques the oldest entry and update the oldest plug
+mark* gc_heap::get_oldest_pinned_entry (BOOL* has_pre_plug_info_p, 
+                                        BOOL* has_post_plug_info_p)
+{
+    mark* oldest_entry = oldest_pin();
+    *has_pre_plug_info_p =  oldest_entry->has_pre_plug_info();
+    *has_post_plug_info_p = oldest_entry->has_post_plug_info();
+
+    deque_pinned_plug();
+    update_oldest_pinned_plug();
+    return oldest_entry;
+}
+
+inline
+void gc_heap::copy_cards_range (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p)
+{
+    if (copy_cards_p)
+        copy_cards_for_addresses (dest, src, len);
+    else
+        clear_card_for_addresses (dest, dest + len);
+}
+
+// POPO TODO: We should actually just recover the artifically made gaps here..because when we copy
+// we always copy the earlier plugs first which means we won't need the gap sizes anymore. This way
+// we won't need to individually recover each overwritten part of plugs.
+inline
+void  gc_heap::gcmemcopy (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p)
+{
+    if (dest != src)
+    {
+#ifdef BACKGROUND_GC
+        if (current_c_gc_state == c_gc_state_marking) 
+        {
+            //TODO: should look to see whether we should consider changing this
+            // to copy a consecutive region of the mark array instead.
+            copy_mark_bits_for_addresses (dest, src, len);
+        }
+#endif //BACKGROUND_GC
+        //dprintf(3,(" Memcopy [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
+        dprintf(3,(" mc: [%Ix->%Ix, %Ix->%Ix[", (size_t)src, (size_t)dest, (size_t)src+len, (size_t)dest+len));
+        memcopy (dest - plug_skew, src - plug_skew, (int)len);
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        if (SoftwareWriteWatch::IsEnabledForGCHeap())
+        {
+            // The ranges [src - plug_kew .. src[ and [src + len - plug_skew .. src + len[ are ObjHeaders, which don't have GC
+            // references, and are not relevant for write watch. The latter range actually corresponds to the ObjHeader for the
+            // object at (src + len), so it can be ignored anyway.
+            SoftwareWriteWatch::SetDirtyRegion(dest, len - plug_skew);
+        }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        copy_cards_range (dest, src, len, copy_cards_p);
+    }
+}
+
+void gc_heap::compact_plug (uint8_t* plug, size_t size, BOOL check_last_object_p, compact_args* args)
+{
+    args->print();
+    uint8_t* reloc_plug = plug + args->last_plug_relocation;
+
+    if (check_last_object_p)
+    {
+        size += sizeof (gap_reloc_pair);
+        mark* entry = args->pinned_plug_entry;
+
+        if (args->is_shortened)
+        {
+            assert (entry->has_post_plug_info());
+            entry->swap_post_plug_and_saved();
+        }
+        else
+        {
+            assert (entry->has_pre_plug_info());
+            entry->swap_pre_plug_and_saved();
+        }
+    }
+
+    int  old_brick_entry =  brick_table [brick_of (plug)];
+
+    assert (node_relocation_distance (plug) == args->last_plug_relocation);
+
+#ifdef FEATURE_STRUCTALIGN
+    ptrdiff_t alignpad = node_alignpad(plug);
+    if (alignpad)
+    {
+        make_unused_array (reloc_plug - alignpad, alignpad);
+        if (brick_of (reloc_plug - alignpad) != brick_of (reloc_plug))
+        {
+            // The alignment padding is straddling one or more bricks;
+            // it has to be the last "object" of its first brick.
+            fix_brick_to_highest (reloc_plug - alignpad, reloc_plug);
+        }
+    }
+#else // FEATURE_STRUCTALIGN
+    size_t unused_arr_size = 0; 
+    BOOL  already_padded_p = FALSE;
+#ifdef SHORT_PLUGS
+    if (is_plug_padded (plug))
+    {
+        already_padded_p = TRUE;
+        clear_plug_padded (plug);
+        unused_arr_size = Align (min_obj_size);
+    }
+#endif //SHORT_PLUGS
+    if (node_realigned (plug))
+    {
+        unused_arr_size += switch_alignment_size (already_padded_p);
+    }
+
+    if (unused_arr_size != 0) 
+    {
+        make_unused_array (reloc_plug - unused_arr_size, unused_arr_size);
+
+        if (brick_of (reloc_plug - unused_arr_size) != brick_of (reloc_plug))
+        {
+            dprintf (3, ("fix B for padding: %Id: %Ix->%Ix", 
+                unused_arr_size, (reloc_plug - unused_arr_size), reloc_plug));
+            // The alignment padding is straddling one or more bricks;
+            // it has to be the last "object" of its first brick.
+            fix_brick_to_highest (reloc_plug - unused_arr_size, reloc_plug);
+        }
+    }
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef SHORT_PLUGS
+    if (is_plug_padded (plug))
+    {
+        make_unused_array (reloc_plug - Align (min_obj_size), Align (min_obj_size));
+
+        if (brick_of (reloc_plug - Align (min_obj_size)) != brick_of (reloc_plug))
+        {
+            // The alignment padding is straddling one or more bricks;
+            // it has to be the last "object" of its first brick.
+            fix_brick_to_highest (reloc_plug - Align (min_obj_size), reloc_plug);
+        }
+    }
+#endif //SHORT_PLUGS
+
+    gcmemcopy (reloc_plug, plug, size, args->copy_cards_p);
+
+    if (args->check_gennum_p)
+    {
+        int src_gennum = args->src_gennum;
+        if (src_gennum == -1)
+        {
+            src_gennum = object_gennum (plug);
+        }
+
+        int dest_gennum = object_gennum_plan (reloc_plug);
+
+        if (src_gennum < dest_gennum)
+        {
+            generation_allocation_size (generation_of (dest_gennum)) += size;
+        }
+    }
+
+    size_t current_reloc_brick = args->current_compacted_brick;
+
+    if (brick_of (reloc_plug) != current_reloc_brick)
+    {
+        dprintf (3, ("last reloc B: %Ix, current reloc B: %Ix", 
+            current_reloc_brick, brick_of (reloc_plug)));
+
+        if (args->before_last_plug)
+        {
+            dprintf (3,(" fixing last brick %Ix to point to last plug %Ix(%Ix)",
+                     current_reloc_brick,
+                     args->before_last_plug, 
+                     (args->before_last_plug - brick_address (current_reloc_brick))));
+
+            {
+                set_brick (current_reloc_brick,
+                        args->before_last_plug - brick_address (current_reloc_brick));
+            }
+        }
+        current_reloc_brick = brick_of (reloc_plug);
+    }
+    size_t end_brick = brick_of (reloc_plug + size-1);
+    if (end_brick != current_reloc_brick)
+    {
+        // The plug is straddling one or more bricks
+        // It has to be the last plug of its first brick
+        dprintf (3,("plug spanning multiple bricks, fixing first brick %Ix to %Ix(%Ix)",
+                 current_reloc_brick, (size_t)reloc_plug,
+                 (reloc_plug - brick_address (current_reloc_brick))));
+
+        {
+            set_brick (current_reloc_brick,
+                    reloc_plug - brick_address (current_reloc_brick));
+        }
+        // update all intervening brick
+        size_t brick = current_reloc_brick + 1;
+        dprintf (3,("setting intervening bricks %Ix->%Ix to -1",
+            brick, (end_brick - 1)));
+        while (brick < end_brick)
+        {
+            set_brick (brick, -1);
+            brick++;
+        }
+        // code last brick offset as a plug address
+        args->before_last_plug = brick_address (end_brick) -1;
+        current_reloc_brick = end_brick;
+        dprintf (3, ("setting before last to %Ix, last brick to %Ix",
+            args->before_last_plug, current_reloc_brick));
+    } 
+    else
+    {
+        dprintf (3, ("still in the same brick: %Ix", end_brick));
+        args->before_last_plug = reloc_plug;
+    }
+    args->current_compacted_brick = current_reloc_brick;
+
+    if (check_last_object_p)
+    {
+        mark* entry = args->pinned_plug_entry;
+
+        if (args->is_shortened)
+        {
+            entry->swap_post_plug_and_saved();
+        }
+        else
+        {
+            entry->swap_pre_plug_and_saved();
+        }
+    }
+}
+
+void gc_heap::compact_in_brick (uint8_t* tree, compact_args* args)
+{
+    assert (tree != NULL);
+    int   left_node = node_left_child (tree);
+    int   right_node = node_right_child (tree);
+    ptrdiff_t relocation = node_relocation_distance (tree);
+
+    args->print();
+
+    if (left_node)
+    {
+        dprintf (3, ("B: L: %d->%Ix", left_node, (tree + left_node)));
+        compact_in_brick ((tree + left_node), args);
+    }
+
+    uint8_t*  plug = tree;
+    BOOL   has_pre_plug_info_p = FALSE;
+    BOOL   has_post_plug_info_p = FALSE;
+
+    if (tree == oldest_pinned_plug)
+    {
+        args->pinned_plug_entry = get_oldest_pinned_entry (&has_pre_plug_info_p,
+                                                           &has_post_plug_info_p);
+        assert (tree == pinned_plug (args->pinned_plug_entry));
+    }
+
+    if (args->last_plug != 0)
+    {
+        size_t gap_size = node_gap_size (tree);
+        uint8_t*  gap = (plug - gap_size);
+        uint8_t*  last_plug_end = gap;
+        size_t last_plug_size = (last_plug_end - args->last_plug);
+        dprintf (3, ("tree: %Ix, last_plug: %Ix, gap: %Ix(%Ix), last_plug_end: %Ix, size: %Ix", 
+            tree, args->last_plug, gap, gap_size, last_plug_end, last_plug_size));
+        
+        BOOL check_last_object_p = (args->is_shortened || has_pre_plug_info_p);
+        if (!check_last_object_p)
+        {
+            assert (last_plug_size >= Align (min_obj_size));
+        }
+
+        compact_plug (args->last_plug, last_plug_size, check_last_object_p, args);
+    }
+    else
+    {
+        assert (!has_pre_plug_info_p);
+    }
+
+    dprintf (3, ("set args last plug to plug: %Ix, reloc: %Ix", plug, relocation));
+    args->last_plug = plug;
+    args->last_plug_relocation = relocation;
+    args->is_shortened = has_post_plug_info_p;
+
+    if (right_node)
+    {
+        dprintf (3, ("B: R: %d->%Ix", right_node, (tree + right_node)));
+        compact_in_brick ((tree + right_node), args);
+    }
+}
+
+void gc_heap::recover_saved_pinned_info()
+{
+    reset_pinned_queue_bos();
+
+    while (!(pinned_plug_que_empty_p()))
+    {
+        mark* oldest_entry = oldest_pin();
+        oldest_entry->recover_plug_info();
+#ifdef GC_CONFIG_DRIVEN
+        if (oldest_entry->has_pre_plug_info() && oldest_entry->has_post_plug_info())
+            record_interesting_data_point (idp_pre_and_post_pin);
+        else if (oldest_entry->has_pre_plug_info())
+            record_interesting_data_point (idp_pre_pin);
+        else if (oldest_entry->has_post_plug_info())
+            record_interesting_data_point (idp_post_pin);
+#endif //GC_CONFIG_DRIVEN
+
+        deque_pinned_plug();
+    }
+}
+
+void gc_heap::compact_phase (int condemned_gen_number,
+                             uint8_t*  first_condemned_address,
+                             BOOL clear_cards)
+{
+//  %type%  category = quote (compact);
+#ifdef TIME_GC
+        unsigned start;
+        unsigned finish;
+        start = GetCycleCount32();
+#endif //TIME_GC
+    generation*   condemned_gen = generation_of (condemned_gen_number);
+    uint8_t*  start_address = first_condemned_address;
+    size_t   current_brick = brick_of (start_address);
+    heap_segment*  current_heap_segment = heap_segment_rw (generation_start_segment (condemned_gen));
+
+    PREFIX_ASSUME(current_heap_segment != NULL);
+
+    reset_pinned_queue_bos();
+    update_oldest_pinned_plug();
+
+    BOOL reused_seg = expand_reused_seg_p();
+    if (reused_seg)
+    {
+        for (int i = 1; i <= max_generation; i++)
+        {
+            generation_allocation_size (generation_of (i)) = 0;
+        }
+    }
+
+    uint8_t*  end_address = heap_segment_allocated (current_heap_segment);
+
+    size_t  end_brick = brick_of (end_address-1);
+    compact_args args;
+    args.last_plug = 0;
+    args.before_last_plug = 0;
+    args.current_compacted_brick = ~((size_t)1);
+    args.is_shortened = FALSE;
+    args.pinned_plug_entry = 0;
+    args.copy_cards_p =  (condemned_gen_number >= 1) || !clear_cards;
+    args.check_gennum_p = reused_seg;
+    if (args.check_gennum_p)
+    {
+        args.src_gennum = ((current_heap_segment == ephemeral_heap_segment) ? -1 : 2);
+    }
+
+    dprintf (2,("---- Compact Phase: %Ix(%Ix)----", 
+        first_condemned_address, brick_of (first_condemned_address)));
+
+#ifdef MULTIPLE_HEAPS
+    //restart
+    if (gc_t_join.joined())
+    {
+#endif //MULTIPLE_HEAPS
+
+#ifdef MULTIPLE_HEAPS
+        dprintf(3, ("Restarting for compaction"));
+        gc_t_join.restart();
+    }
+#endif //MULTIPLE_HEAPS
+
+    reset_pinned_queue_bos();
+
+#ifdef FEATURE_LOH_COMPACTION
+    if (loh_compacted_p)
+    {
+        compact_loh();
+    }
+#endif //FEATURE_LOH_COMPACTION
+
+    if ((start_address < end_address) ||
+        (condemned_gen_number == max_generation))
+    {
+        while (1)
+        {
+            if (current_brick > end_brick)
+            {
+                if (args.last_plug != 0)
+                {
+                    dprintf (3, ("compacting last plug: %Ix", args.last_plug))
+                    compact_plug (args.last_plug,
+                                  (heap_segment_allocated (current_heap_segment) - args.last_plug),
+                                  args.is_shortened,
+                                  &args);
+                }
+
+                if (heap_segment_next_rw (current_heap_segment))
+                {
+                    current_heap_segment = heap_segment_next_rw (current_heap_segment);
+                    current_brick = brick_of (heap_segment_mem (current_heap_segment));
+                    end_brick = brick_of (heap_segment_allocated (current_heap_segment)-1);
+                    args.last_plug = 0;
+                    if (args.check_gennum_p)
+                    {
+                        args.src_gennum = ((current_heap_segment == ephemeral_heap_segment) ? -1 : 2);
+                    }
+                    continue;
+                }
+                else
+                {
+                    if (args.before_last_plug !=0)
+                    {
+                        dprintf (3, ("Fixing last brick %Ix to point to plug %Ix",
+                                    args.current_compacted_brick, (size_t)args.before_last_plug));
+                        assert (args.current_compacted_brick != ~1u);
+                        set_brick (args.current_compacted_brick,
+                                   args.before_last_plug - brick_address (args.current_compacted_brick));
+                    }
+                    break;
+                }
+            }
+            {
+                int  brick_entry =  brick_table [ current_brick ];
+                dprintf (3, ("B: %Ix(%Ix)->%Ix", 
+                    current_brick, (size_t)brick_entry, (brick_address (current_brick) + brick_entry - 1)));
+
+                if (brick_entry >= 0)
+                {
+                    compact_in_brick ((brick_address (current_brick) + brick_entry -1),
+                                      &args);
+
+                }
+            }
+            current_brick++;
+        }
+    }
+
+    recover_saved_pinned_info();
+
+#ifdef TIME_GC
+    finish = GetCycleCount32();
+    compact_time = finish - start;
+#endif //TIME_GC
+
+    concurrent_print_time_delta ("compact end");
+
+    dprintf(2,("---- End of Compact phase ----"));
+}
+
+#ifdef MULTIPLE_HEAPS
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4702) // C4702: unreachable code: gc_thread_function may not return
+#endif //_MSC_VER
+void __stdcall gc_heap::gc_thread_stub (void* arg)
+{
+    ClrFlsSetThreadType (ThreadType_GC);
+    STRESS_LOG_RESERVE_MEM (GC_STRESSLOG_MULTIPLY);
+
+#ifndef FEATURE_REDHAWK
+    // We commit the thread's entire stack to ensure we're robust in low memory conditions.
+    BOOL fSuccess = Thread::CommitThreadStack(NULL);
+
+    if (!fSuccess)
+    {
+#ifdef BACKGROUND_GC
+        // For background GC we revert to doing a blocking GC.
+        return;
+#else
+        STRESS_LOG0(LF_GC, LL_ALWAYS, "Thread::CommitThreadStack failed.");
+        _ASSERTE(!"Thread::CommitThreadStack failed.");
+        EEPOLICY_HANDLE_FATAL_ERROR(COR_E_STACKOVERFLOW);
+#endif //BACKGROUND_GC
+    }
+#endif // FEATURE_REDHAWK
+
+    gc_heap* heap = (gc_heap*)arg;
+    _alloca (256*heap->heap_number);
+    heap->gc_thread_function();
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif //_MSC_VER
+
+#endif //MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4702) // C4702: unreachable code: gc_thread_function may not return
+#endif //_MSC_VER
+uint32_t __stdcall gc_heap::bgc_thread_stub (void* arg)
+{
+    gc_heap* heap = (gc_heap*)arg;
+    return heap->bgc_thread_function();
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif //_MSC_VER
+
+#endif //BACKGROUND_GC
+
+/*------------------ Background GC ----------------------------*/
+
+#ifdef BACKGROUND_GC
+
+void gc_heap::background_drain_mark_list (int thread)
+{
+    UNREFERENCED_PARAMETER(thread);
+
+    size_t saved_c_mark_list_index = c_mark_list_index;
+
+    if (saved_c_mark_list_index)
+    {
+        concurrent_print_time_delta ("SML");
+    }
+    while (c_mark_list_index != 0)
+    {
+        size_t current_index = c_mark_list_index - 1;
+        uint8_t* o = c_mark_list [current_index];
+        background_mark_object (o THREAD_NUMBER_ARG);
+        c_mark_list_index--;
+    }
+    if (saved_c_mark_list_index)
+    {
+
+        concurrent_print_time_delta ("EML");
+    }
+
+    fire_drain_mark_list_event (saved_c_mark_list_index);
+}
+
+
+// The background GC version of scan_dependent_handles (see that method for a more in-depth comment).
+#ifdef MULTIPLE_HEAPS
+// Since we only scan dependent handles while we are stopped we'll never interfere with FGCs scanning
+// them. So we can use the same static variables.
+void gc_heap::background_scan_dependent_handles (ScanContext *sc)
+{
+    // Whenever we call this method there may have been preceding object promotions. So set
+    // s_fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
+    // based on the how the scanning proceeded).
+    s_fUnscannedPromotions = TRUE;
+
+    // We don't know how many times we need to loop yet. In particular we can't base the loop condition on
+    // the state of this thread's portion of the dependent handle table. That's because promotions on other
+    // threads could cause handle promotions to become necessary here. Even if there are definitely no more
+    // promotions possible in this thread's handles, we still have to stay in lock-step with those worker
+    // threads that haven't finished yet (each GC worker thread has to join exactly the same number of times
+    // as all the others or they'll get out of step).
+    while (true)
+    {
+        // The various worker threads are all currently racing in this code. We need to work out if at least
+        // one of them think they have work to do this cycle. Each thread needs to rescan its portion of the
+        // dependent handle table when both of the following conditions apply:
+        //  1) At least one (arbitrary) object might have been promoted since the last scan (because if this
+        //     object happens to correspond to a primary in one of our handles we might potentially have to
+        //     promote the associated secondary).
+        //  2) The table for this thread has at least one handle with a secondary that isn't promoted yet.
+        //
+        // The first condition is represented by s_fUnscannedPromotions. This is always non-zero for the first
+        // iteration of this loop (see comment above) and in subsequent cycles each thread updates this
+        // whenever a mark stack overflow occurs or scanning their dependent handles results in a secondary
+        // being promoted. This value is cleared back to zero in a synchronized fashion in the join that
+        // follows below. Note that we can't read this outside of the join since on any iteration apart from
+        // the first threads will be racing between reading this value and completing their previous
+        // iteration's table scan.
+        //
+        // The second condition is tracked by the dependent handle code itself on a per worker thread basis
+        // (and updated by the GcDhReScan() method). We call GcDhUnpromotedHandlesExist() on each thread to
+        // determine the local value and collect the results into the s_fUnpromotedHandles variable in what is
+        // effectively an OR operation. As per s_fUnscannedPromotions we can't read the final result until
+        // we're safely joined.
+        if (GCScan::GcDhUnpromotedHandlesExist(sc))
+            s_fUnpromotedHandles = TRUE;
+
+        // Synchronize all the threads so we can read our state variables safely. The following shared
+        // variable (indicating whether we should scan the tables or terminate the loop) will be set by a
+        // single thread inside the join.
+        bgc_t_join.join(this, gc_join_scan_dependent_handles);
+        if (bgc_t_join.joined())
+        {
+            // We're synchronized so it's safe to read our shared state variables. We update another shared
+            // variable to indicate to all threads whether we'll be scanning for another cycle or terminating
+            // the loop. We scan if there has been at least one object promotion since last time and at least
+            // one thread has a dependent handle table with a potential handle promotion possible.
+            s_fScanRequired = s_fUnscannedPromotions && s_fUnpromotedHandles;
+
+            // Reset our shared state variables (ready to be set again on this scan or with a good initial
+            // value for the next call if we're terminating the loop).
+            s_fUnscannedPromotions = FALSE;
+            s_fUnpromotedHandles = FALSE;
+
+            if (!s_fScanRequired)
+            {
+                uint8_t* all_heaps_max = 0;
+                uint8_t* all_heaps_min = MAX_PTR;
+                int i;
+                for (i = 0; i < n_heaps; i++)
+                {
+                    if (all_heaps_max < g_heaps[i]->background_max_overflow_address)
+                        all_heaps_max = g_heaps[i]->background_max_overflow_address;
+                    if (all_heaps_min > g_heaps[i]->background_min_overflow_address)
+                        all_heaps_min = g_heaps[i]->background_min_overflow_address;
+                }
+                for (i = 0; i < n_heaps; i++)
+                {
+                    g_heaps[i]->background_max_overflow_address = all_heaps_max;
+                    g_heaps[i]->background_min_overflow_address = all_heaps_min;
+                }
+            }
+
+            // Restart all the workers.
+            dprintf(2, ("Starting all gc thread mark stack overflow processing"));
+            bgc_t_join.restart();
+        }
+
+        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
+        // being visible. If there really was an overflow (process_mark_overflow returns true) then set the
+        // global flag indicating that at least one object promotion may have occurred (the usual comment
+        // about races applies). (Note it's OK to set this flag even if we're about to terminate the loop and
+        // exit the method since we unconditionally set this variable on method entry anyway).
+        if (background_process_mark_overflow (sc->concurrent))
+            s_fUnscannedPromotions = TRUE;
+
+        // If we decided that no scan was required we can terminate the loop now.
+        if (!s_fScanRequired)
+            break;
+
+        // Otherwise we must join with the other workers to ensure that all mark stack overflows have been
+        // processed before we start scanning dependent handle tables (if overflows remain while we scan we
+        // could miss noting the promotion of some primary objects).
+        bgc_t_join.join(this, gc_join_rescan_dependent_handles);
+        if (bgc_t_join.joined())
+        {
+            // Restart all the workers.
+            dprintf(3, ("Starting all gc thread for dependent handle promotion"));
+            bgc_t_join.restart();
+        }
+
+        // If the portion of the dependent handle table managed by this worker has handles that could still be
+        // promoted perform a rescan. If the rescan resulted in at least one promotion note this fact since it
+        // could require a rescan of handles on this or other workers.
+        if (GCScan::GcDhUnpromotedHandlesExist(sc))
+            if (GCScan::GcDhReScan(sc))
+                s_fUnscannedPromotions = TRUE;
+    }
+}
+#else
+void gc_heap::background_scan_dependent_handles (ScanContext *sc)
+{
+    // Whenever we call this method there may have been preceding object promotions. So set
+    // fUnscannedPromotions unconditionally (during further iterations of the scanning loop this will be set
+    // based on the how the scanning proceeded).
+    bool fUnscannedPromotions = true;
+
+    // Scan dependent handles repeatedly until there are no further promotions that can be made or we made a
+    // scan without performing any new promotions.
+    while (GCScan::GcDhUnpromotedHandlesExist(sc) && fUnscannedPromotions)
+    {
+        // On each iteration of the loop start with the assumption that no further objects have been promoted.
+        fUnscannedPromotions = false;
+
+        // Handle any mark stack overflow: scanning dependent handles relies on all previous object promotions
+        // being visible. If there was an overflow (background_process_mark_overflow returned true) then
+        // additional objects now appear to be promoted and we should set the flag.
+        if (background_process_mark_overflow (sc->concurrent))
+            fUnscannedPromotions = true;
+
+        // Perform the scan and set the flag if any promotions resulted.
+        if (GCScan::GcDhReScan (sc))
+            fUnscannedPromotions = true;
+    }
+
+    // Perform a last processing of any overflowed mark stack.
+    background_process_mark_overflow (sc->concurrent);
+}
+#endif //MULTIPLE_HEAPS
+
+void gc_heap::recover_bgc_settings()
+{
+    if ((settings.condemned_generation < max_generation) && recursive_gc_sync::background_running_p())
+    {
+        dprintf (2, ("restoring bgc settings"));
+        settings = saved_bgc_settings;
+        GCHeap::GcCondemnedGeneration = gc_heap::settings.condemned_generation;
+    }
+}
+
+void gc_heap::allow_fgc()
+{
+    assert (bgc_thread == GetThread());
+
+    if (GCToEEInterface::IsPreemptiveGCDisabled(bgc_thread) && GCToEEInterface::CatchAtSafePoint(bgc_thread))
+    {
+        GCToEEInterface::EnablePreemptiveGC(bgc_thread);
+        GCToEEInterface::DisablePreemptiveGC(bgc_thread);
+    }
+}
+
+BOOL gc_heap::should_commit_mark_array()
+{
+    return (recursive_gc_sync::background_running_p() || (current_bgc_state == bgc_initialized));
+}
+
+void gc_heap::clear_commit_flag()
+{
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (gen != large_object_generation)
+            {
+                gen = large_object_generation;
+                seg = heap_segment_in_range (generation_start_segment (gen));
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        if (seg->flags & heap_segment_flags_ma_committed)
+        {
+            seg->flags &= ~heap_segment_flags_ma_committed;
+        }
+
+        if (seg->flags & heap_segment_flags_ma_pcommitted)
+        {
+            seg->flags &= ~heap_segment_flags_ma_pcommitted;
+        }
+
+        seg = heap_segment_next (seg);
+    }
+}
+
+void gc_heap::clear_commit_flag_global()
+{
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        g_heaps[i]->clear_commit_flag();
+    }
+#else
+    clear_commit_flag();
+#endif //MULTIPLE_HEAPS
+}
+
+void gc_heap::verify_mark_array_cleared (uint8_t* begin, uint8_t* end, uint32_t* mark_array_addr)
+{
+#ifdef _DEBUG
+    size_t  markw = mark_word_of (begin);
+    size_t  markw_end = mark_word_of (end);
+
+    while (markw < markw_end)
+    {
+        if (mark_array_addr[markw])
+        {
+            dprintf  (1, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            markw, mark_array_addr[markw], mark_word_address (markw)));
+            FATAL_GC_ERROR();
+        }
+        markw++;
+    }
+#else // _DEBUG
+    UNREFERENCED_PARAMETER(begin);
+    UNREFERENCED_PARAMETER(end);
+    UNREFERENCED_PARAMETER(mark_array_addr);
+#endif //_DEBUG
+}
+
+void gc_heap::verify_mark_array_cleared (heap_segment* seg, uint32_t* mark_array_addr)
+{
+    verify_mark_array_cleared (heap_segment_mem (seg), heap_segment_reserved (seg), mark_array_addr);
+}
+
+BOOL gc_heap::commit_mark_array_new_seg (gc_heap* hp, 
+                                         heap_segment* seg,
+                                         uint32_t* new_card_table,
+                                         uint8_t* new_lowest_address)
+{
+    UNREFERENCED_PARAMETER(hp); // compiler bug? -- this *is*, indeed, referenced
+
+    uint8_t* start = (heap_segment_read_only_p(seg) ? heap_segment_mem(seg) : (uint8_t*)seg);
+    uint8_t* end = heap_segment_reserved (seg);
+
+    uint8_t* lowest = hp->background_saved_lowest_address;
+    uint8_t* highest = hp->background_saved_highest_address;
+
+    uint8_t* commit_start = NULL;
+    uint8_t* commit_end = NULL;
+    size_t commit_flag = 0;
+
+    if ((highest >= start) &&
+        (lowest <= end))
+    {
+        if ((start >= lowest) && (end <= highest))
+        {
+            dprintf (GC_TABLE_LOG, ("completely in bgc range: seg %Ix-%Ix, bgc: %Ix-%Ix",
+                                    start, end, lowest, highest));
+            commit_flag = heap_segment_flags_ma_committed;
+        }
+        else
+        {
+            dprintf (GC_TABLE_LOG, ("partially in bgc range: seg %Ix-%Ix, bgc: %Ix-%Ix",
+                                    start, end, lowest, highest));
+            commit_flag = heap_segment_flags_ma_pcommitted;
+        }
+
+        commit_start = max (lowest, start);
+        commit_end = min (highest, end);
+
+        if (!commit_mark_array_by_range (commit_start, commit_end, hp->mark_array))
+        {
+            return FALSE;
+        }
+
+        if (new_card_table == 0)
+        {
+            new_card_table = g_card_table;
+        }
+
+        if (hp->card_table != new_card_table)
+        {
+            if (new_lowest_address == 0)
+            {
+                new_lowest_address = g_lowest_address;
+            }
+
+            uint32_t* ct = &new_card_table[card_word (gcard_of (new_lowest_address))];
+            uint32_t* ma = (uint32_t*)((uint8_t*)card_table_mark_array (ct) - size_mark_array_of (0, new_lowest_address));
+
+            dprintf (GC_TABLE_LOG, ("table realloc-ed: %Ix->%Ix, MA: %Ix->%Ix", 
+                                    hp->card_table, new_card_table,
+                                    hp->mark_array, ma));
+
+            if (!commit_mark_array_by_range (commit_start, commit_end, ma))
+            {
+                return FALSE;
+            }
+        }
+
+        seg->flags |= commit_flag;
+    }
+
+    return TRUE;
+}
+
+BOOL gc_heap::commit_mark_array_by_range (uint8_t* begin, uint8_t* end, uint32_t* mark_array_addr)
+{
+    size_t beg_word = mark_word_of (begin);
+    size_t end_word = mark_word_of (align_on_mark_word (end));
+    uint8_t* commit_start = align_lower_page ((uint8_t*)&mark_array_addr[beg_word]);
+    uint8_t* commit_end = align_on_page ((uint8_t*)&mark_array_addr[end_word]);
+    size_t size = (size_t)(commit_end - commit_start);
+
+#ifdef SIMPLE_DPRINTF
+    dprintf (GC_TABLE_LOG, ("range: %Ix->%Ix mark word: %Ix->%Ix(%Id), mark array: %Ix->%Ix(%Id), commit %Ix->%Ix(%Id)",
+                            begin, end,
+                            beg_word, end_word,
+                            (end_word - beg_word) * sizeof (uint32_t),
+                            &mark_array_addr[beg_word],
+                            &mark_array_addr[end_word],
+                            (size_t)(&mark_array_addr[end_word] - &mark_array_addr[beg_word]),
+                            commit_start, commit_end,
+                            size));
+#endif //SIMPLE_DPRINTF
+
+    if (GCToOSInterface::VirtualCommit (commit_start, size))
+    {
+        // We can only verify the mark array is cleared from begin to end, the first and the last
+        // page aren't necessarily all cleared 'cause they could be used by other segments or 
+        // card bundle.
+        verify_mark_array_cleared (begin, end, mark_array_addr);
+        return TRUE;
+    }
+    else
+    {
+        dprintf (GC_TABLE_LOG, ("failed to commit %Id bytes", (end_word - beg_word) * sizeof (uint32_t)));
+        return FALSE;
+    }
+}
+
+BOOL gc_heap::commit_mark_array_with_check (heap_segment* seg, uint32_t* new_mark_array_addr)
+{
+    uint8_t* start = (heap_segment_read_only_p(seg) ? heap_segment_mem(seg) : (uint8_t*)seg);
+    uint8_t* end = heap_segment_reserved (seg);
+
+#ifdef MULTIPLE_HEAPS
+    uint8_t* lowest = heap_segment_heap (seg)->background_saved_lowest_address;
+    uint8_t* highest = heap_segment_heap (seg)->background_saved_highest_address;
+#else
+    uint8_t* lowest = background_saved_lowest_address;
+    uint8_t* highest = background_saved_highest_address;
+#endif //MULTIPLE_HEAPS
+
+    if ((highest >= start) &&
+        (lowest <= end))
+    {
+        start = max (lowest, start);
+        end = min (highest, end);
+        if (!commit_mark_array_by_range (start, end, new_mark_array_addr))
+        {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+BOOL gc_heap::commit_mark_array_by_seg (heap_segment* seg, uint32_t* mark_array_addr)
+{
+    dprintf (GC_TABLE_LOG, ("seg: %Ix->%Ix; MA: %Ix",
+        seg,
+        heap_segment_reserved (seg),
+        mark_array_addr));
+    uint8_t* start = (heap_segment_read_only_p (seg) ? heap_segment_mem (seg) : (uint8_t*)seg);
+
+    return commit_mark_array_by_range (start, heap_segment_reserved (seg), mark_array_addr);
+}
+
+BOOL gc_heap::commit_mark_array_bgc_init (uint32_t* mark_array_addr)
+{
+    UNREFERENCED_PARAMETER(mark_array_addr);
+
+    dprintf (GC_TABLE_LOG, ("BGC init commit: lowest: %Ix, highest: %Ix, mark_array: %Ix", 
+                            lowest_address, highest_address, mark_array));
+
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (gen != large_object_generation)
+            {
+                gen = large_object_generation;
+                seg = heap_segment_in_range (generation_start_segment (gen));
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        dprintf (GC_TABLE_LOG, ("seg: %Ix, flags: %Id", seg, seg->flags));
+
+        if (!(seg->flags & heap_segment_flags_ma_committed))
+        {
+            // For ro segments they could always be only partially in range so we'd
+            // be calling this at the beginning of every BGC. We are not making this 
+            // more efficient right now - ro segments are currently only used by redhawk.
+            if (heap_segment_read_only_p (seg))
+            {
+                if ((heap_segment_mem (seg) >= lowest_address) && 
+                    (heap_segment_reserved (seg) <= highest_address))
+                {
+                    if (commit_mark_array_by_seg (seg, mark_array))
+                    {
+                        seg->flags |= heap_segment_flags_ma_committed;
+                    }
+                    else
+                    {
+                        return FALSE;
+                    }
+                }
+                else
+                {
+                    uint8_t* start = max (lowest_address, heap_segment_mem (seg));
+                    uint8_t* end = min (highest_address, heap_segment_reserved (seg));
+                    if (commit_mark_array_by_range (start, end, mark_array))
+                    {
+                        seg->flags |= heap_segment_flags_ma_pcommitted;
+                    }
+                    else
+                    {
+                        return FALSE;
+                    }
+                }
+            }
+            else
+            {
+                // For normal segments they are by design completely in range so just 
+                // commit the whole mark array for each seg.
+                if (commit_mark_array_by_seg (seg, mark_array))
+                {
+                    if (seg->flags & heap_segment_flags_ma_pcommitted)
+                    {
+                        seg->flags &= ~heap_segment_flags_ma_pcommitted;
+                    }
+                    seg->flags |= heap_segment_flags_ma_committed;
+                }
+                else
+                {
+                    return FALSE;
+                }
+            }
+        }
+
+        seg = heap_segment_next (seg);
+    }
+
+    return TRUE;
+}
+
+// This function doesn't check the commit flag since it's for a new array -
+// the mark_array flag for these segments will remain the same.
+BOOL gc_heap::commit_new_mark_array (uint32_t* new_mark_array_addr)
+{
+    dprintf (GC_TABLE_LOG, ("commiting existing segs on MA %Ix", new_mark_array_addr));
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (gen != large_object_generation)
+            {
+                gen = large_object_generation;
+                seg = heap_segment_in_range (generation_start_segment (gen));
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        if (!commit_mark_array_with_check (seg, new_mark_array_addr))
+        {
+            return FALSE;
+        }
+
+        seg = heap_segment_next (seg);
+    }
+
+#ifdef MULTIPLE_HEAPS
+    if (new_heap_segment)
+    {
+        if (!commit_mark_array_with_check (new_heap_segment, new_mark_array_addr))
+        {
+            return FALSE;
+        }        
+    }
+#endif //MULTIPLE_HEAPS
+
+    return TRUE;
+}
+
+BOOL gc_heap::commit_new_mark_array_global (uint32_t* new_mark_array)
+{
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        if (!g_heaps[i]->commit_new_mark_array (new_mark_array))
+        {
+            return FALSE;
+        }
+    }
+#else
+    if (!commit_new_mark_array (new_mark_array))
+    {
+        return FALSE;
+    }
+#endif //MULTIPLE_HEAPS
+
+    return TRUE;
+}
+
+void gc_heap::decommit_mark_array_by_seg (heap_segment* seg)
+{
+    // if BGC is disabled (the finalize watchdog does this at shutdown), the mark array could have
+    // been set to NULL. 
+    if (mark_array == NULL)
+    {
+        return;
+    }
+
+    dprintf (GC_TABLE_LOG, ("decommitting seg %Ix(%Ix), MA: %Ix", seg, seg->flags, mark_array));
+
+    size_t flags = seg->flags;
+
+    if ((flags & heap_segment_flags_ma_committed) ||
+        (flags & heap_segment_flags_ma_pcommitted))
+    {
+        uint8_t* start = (heap_segment_read_only_p(seg) ? heap_segment_mem(seg) : (uint8_t*)seg);
+        uint8_t* end = heap_segment_reserved (seg);
+
+        if (flags & heap_segment_flags_ma_pcommitted)
+        {
+            start = max (lowest_address, start);
+            end = min (highest_address, end);
+        }
+
+        size_t beg_word = mark_word_of (start);
+        size_t end_word = mark_word_of (align_on_mark_word (end));
+        uint8_t* decommit_start = align_on_page ((uint8_t*)&mark_array[beg_word]);
+        uint8_t* decommit_end = align_lower_page ((uint8_t*)&mark_array[end_word]);
+        size_t size = (size_t)(decommit_end - decommit_start);
+
+#ifdef SIMPLE_DPRINTF
+        dprintf (GC_TABLE_LOG, ("seg: %Ix mark word: %Ix->%Ix(%Id), mark array: %Ix->%Ix(%Id), decommit %Ix->%Ix(%Id)",
+                                seg,
+                                beg_word, end_word,
+                                (end_word - beg_word) * sizeof (uint32_t),
+                                &mark_array[beg_word],
+                                &mark_array[end_word],
+                                (size_t)(&mark_array[end_word] - &mark_array[beg_word]),
+                                decommit_start, decommit_end,
+                                size));
+#endif //SIMPLE_DPRINTF
+        
+        if (decommit_start < decommit_end)
+        {
+            if (!GCToOSInterface::VirtualDecommit (decommit_start, size))
+            {
+                dprintf (GC_TABLE_LOG, ("GCToOSInterface::VirtualDecommit on %Ix for %Id bytes failed", 
+                                        decommit_start, size));
+                assert (!"decommit failed");
+            }
+        }
+
+        dprintf (GC_TABLE_LOG, ("decommited [%Ix for address [%Ix", beg_word, seg));
+    }
+}
+
+void gc_heap::background_mark_phase ()
+{
+    verify_mark_array_cleared();
+
+    ScanContext sc;
+    sc.thread_number = heap_number;
+    sc.promotion = TRUE;
+    sc.concurrent = FALSE;
+
+    THREAD_FROM_HEAP;
+    Thread* current_thread = GetThread();
+    BOOL cooperative_mode = TRUE;
+#ifndef MULTIPLE_HEAPS
+    const int thread = heap_number;
+#endif //!MULTIPLE_HEAPS
+
+    dprintf(2,("-(GC%d)BMark-", VolatileLoad(&settings.gc_index)));
+
+    assert (settings.concurrent);
+
+#ifdef TIME_GC
+    unsigned start;
+    unsigned finish;
+    start = GetCycleCount32();
+#endif //TIME_GC
+
+#ifdef FFIND_OBJECT
+    if (gen0_must_clear_bricks > 0)
+        gen0_must_clear_bricks--;
+#endif //FFIND_OBJECT
+
+    background_soh_alloc_count = 0;
+    background_loh_alloc_count = 0;
+    bgc_overflow_count = 0;
+
+    bpromoted_bytes (heap_number) = 0;
+    static uint32_t num_sizedrefs = 0;
+
+    background_min_overflow_address = MAX_PTR;
+    background_max_overflow_address = 0;
+    background_min_soh_overflow_address = MAX_PTR;
+    background_max_soh_overflow_address = 0;
+    processed_soh_overflow_p = FALSE;
+
+    {
+        //set up the mark lists from g_mark_list
+        assert (g_mark_list);
+        mark_list = g_mark_list;
+        //dont use the mark list for full gc
+        //because multiple segments are more complex to handle and the list
+        //is likely to overflow
+        mark_list_end = &mark_list [0];
+        mark_list_index = &mark_list [0];
+
+        c_mark_list_index = 0;
+
+        shigh = (uint8_t*) 0;
+        slow  = MAX_PTR;
+
+        generation*   gen = generation_of (max_generation);
+
+        dprintf(3,("BGC: stack marking"));
+        sc.concurrent = TRUE;
+
+        GCScan::GcScanRoots(background_promote_callback,
+                                max_generation, max_generation,
+                                &sc);
+    }
+
+    {
+        dprintf(3,("BGC: finalization marking"));
+        finalize_queue->GcScanRoots(background_promote_callback, heap_number, 0);
+    }
+
+    size_t total_loh_size = generation_size (max_generation + 1);
+    bgc_begin_loh_size = total_loh_size;
+    bgc_alloc_spin_loh = 0;
+    bgc_loh_size_increased = 0;
+    bgc_loh_allocated_in_free = 0;
+    size_t total_soh_size = generation_sizes (generation_of (max_generation));
+
+    dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+
+    {
+        //concurrent_print_time_delta ("copying stack roots");
+        concurrent_print_time_delta ("CS");
+
+        fire_bgc_event (BGC1stNonConEnd);
+
+        expanded_in_fgc = FALSE;
+        saved_overflow_ephemeral_seg = 0;
+        current_bgc_state = bgc_reset_ww;
+
+        // we don't need a join here - just whichever thread that gets here
+        // first can change the states and call restart_vm.
+        // this is not true - we can't let the EE run when we are scanning stack.
+        // since we now allow reset ww to run concurrently and have a join for it,
+        // we can do restart ee on the 1st thread that got here. Make sure we handle the 
+        // sizedref handles correctly.
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_restart_ee);
+        if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+            // Resetting write watch for software write watch is pretty fast, much faster than for hardware write watch. Reset
+            // can be done while the runtime is suspended or after the runtime is restarted, the preference was to reset while
+            // the runtime is suspended. The reset for hardware write watch is done after the runtime is restarted below.
+#ifdef WRITE_WATCH
+            concurrent_print_time_delta ("CRWW begin");
+
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                g_heaps[i]->reset_write_watch (FALSE);
+            }
+#else
+            reset_write_watch (FALSE);
+#endif //MULTIPLE_HEAPS
+
+            concurrent_print_time_delta ("CRWW");
+#endif //WRITE_WATCH
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+            num_sizedrefs = SystemDomain::System()->GetTotalNumSizedRefHandles();
+
+            // this c_write is not really necessary because restart_vm
+            // has an instruction that will flush the cpu cache (interlocked
+            // or whatever) but we don't want to rely on that.
+            dprintf (BGC_LOG, ("setting cm_in_progress"));
+            c_write (cm_in_progress, TRUE);
+
+            //restart all thread, doing the marking from the array
+            assert (dont_restart_ee_p);
+            dont_restart_ee_p = FALSE;
+
+            restart_vm();
+            GCToOSInterface::YieldThread (0);
+#ifdef MULTIPLE_HEAPS
+            dprintf(3, ("Starting all gc threads for gc"));
+            bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_after_reset);
+        if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+            disable_preemptive (current_thread, TRUE);
+
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+            // When software write watch is enabled, resetting write watch is done while the runtime is suspended above. The
+            // post-reset call to revisit_written_pages is only necessary for concurrent reset_write_watch, to discard dirtied
+            // pages during the concurrent reset.
+
+#ifdef WRITE_WATCH
+            concurrent_print_time_delta ("CRWW begin");
+
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                g_heaps[i]->reset_write_watch (TRUE);
+            }
+#else
+            reset_write_watch (TRUE);
+#endif //MULTIPLE_HEAPS
+
+            concurrent_print_time_delta ("CRWW");
+#endif //WRITE_WATCH
+
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                g_heaps[i]->revisit_written_pages (TRUE, TRUE);
+            }
+#else
+            revisit_written_pages (TRUE, TRUE);
+#endif //MULTIPLE_HEAPS
+
+            concurrent_print_time_delta ("CRW");
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < n_heaps; i++)
+            {
+                g_heaps[i]->current_bgc_state = bgc_mark_handles;
+            }
+#else
+            current_bgc_state = bgc_mark_handles;
+#endif //MULTIPLE_HEAPS
+
+            current_c_gc_state = c_gc_state_marking;
+
+            enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+            dprintf(3, ("Joining BGC threads after resetting writewatch"));
+            bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+
+        disable_preemptive (current_thread, TRUE);
+
+        if (num_sizedrefs > 0)
+        {
+            GCScan::GcScanSizedRefs(background_promote, max_generation, max_generation, &sc);
+
+            enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+            bgc_t_join.join(this, gc_join_scan_sizedref_done);
+            if (bgc_t_join.joined())
+            {
+                dprintf(3, ("Done with marking all sized refs. Starting all bgc thread for marking other strong roots"));
+                bgc_t_join.restart();
+            }
+#endif //MULTIPLE_HEAPS
+
+            disable_preemptive (current_thread, TRUE);
+        }
+
+        dprintf (3,("BGC: handle table marking"));
+        GCScan::GcScanHandles(background_promote,
+                                  max_generation, max_generation,
+                                  &sc);
+        //concurrent_print_time_delta ("concurrent marking handle table");
+        concurrent_print_time_delta ("CRH");
+
+        current_bgc_state = bgc_mark_stack;
+        dprintf (2,("concurrent draining mark list"));
+        background_drain_mark_list (thread);
+        //concurrent_print_time_delta ("concurrent marking stack roots");
+        concurrent_print_time_delta ("CRS");
+
+        dprintf (2,("concurrent revisiting dirtied pages"));
+        revisit_written_pages (TRUE);
+        revisit_written_pages (TRUE);
+        //concurrent_print_time_delta ("concurrent marking dirtied pages on LOH");
+        concurrent_print_time_delta ("CRre");
+
+        enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_concurrent_overflow);
+        if (bgc_t_join.joined())
+        {
+            uint8_t* all_heaps_max = 0;
+            uint8_t* all_heaps_min = MAX_PTR;
+            int i;
+            for (i = 0; i < n_heaps; i++)
+            {
+                dprintf (3, ("heap %d overflow max is %Ix, min is %Ix", 
+                    i,
+                    g_heaps[i]->background_max_overflow_address,
+                    g_heaps[i]->background_min_overflow_address));
+                if (all_heaps_max < g_heaps[i]->background_max_overflow_address)
+                    all_heaps_max = g_heaps[i]->background_max_overflow_address;
+                if (all_heaps_min > g_heaps[i]->background_min_overflow_address)
+                    all_heaps_min = g_heaps[i]->background_min_overflow_address;
+            }
+            for (i = 0; i < n_heaps; i++)
+            {
+                g_heaps[i]->background_max_overflow_address = all_heaps_max;
+                g_heaps[i]->background_min_overflow_address = all_heaps_min;
+            }
+            dprintf(3, ("Starting all bgc threads after updating the overflow info"));
+            bgc_t_join.restart();
+        }
+#endif //MULTIPLE_HEAPS
+
+        disable_preemptive (current_thread, TRUE);
+
+        dprintf (2, ("before CRov count: %d", bgc_overflow_count));
+        bgc_overflow_count = 0;
+        background_process_mark_overflow (TRUE);
+        dprintf (2, ("after CRov count: %d", bgc_overflow_count));
+        bgc_overflow_count = 0;
+        //concurrent_print_time_delta ("concurrent processing mark overflow");
+        concurrent_print_time_delta ("CRov");
+
+        // Stop all threads, crawl all stacks and revisit changed pages.
+        fire_bgc_event (BGC1stConEnd);
+
+        dprintf (2, ("Stopping the EE"));
+
+        enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_suspend_ee);
+        if (bgc_t_join.joined())
+        {
+            bgc_threads_sync_event.Reset();
+
+            dprintf(3, ("Joining BGC threads for non concurrent final marking"));
+            bgc_t_join.restart();
+        }
+#endif //MULTIPLE_HEAPS
+
+        if (heap_number == 0)
+        {
+            enter_spin_lock (&gc_lock);
+
+            bgc_suspend_EE ();
+            //suspend_EE ();
+            bgc_threads_sync_event.Set();
+        }
+        else
+        {
+            bgc_threads_sync_event.Wait(INFINITE, FALSE);
+            dprintf (2, ("bgc_threads_sync_event is signalled"));
+        }
+
+        assert (settings.concurrent);
+        assert (settings.condemned_generation == max_generation);
+
+        dprintf (2, ("clearing cm_in_progress"));
+        c_write (cm_in_progress, FALSE);
+
+        bgc_alloc_lock->check();
+
+        current_bgc_state = bgc_final_marking;
+
+        //concurrent_print_time_delta ("concurrent marking ended");
+        concurrent_print_time_delta ("CR");
+
+        fire_bgc_event (BGC2ndNonConBegin);
+
+        mark_absorb_new_alloc();
+
+        // We need a join here 'cause find_object would complain if the gen0
+        // bricks of another heap haven't been fixed up. So we need to make sure
+        // that every heap's gen0 bricks are fixed up before we proceed.
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_after_absorb);
+        if (bgc_t_join.joined())
+        {
+            dprintf(3, ("Joining BGC threads after absorb"));
+            bgc_t_join.restart();
+        }
+#endif //MULTIPLE_HEAPS
+
+        // give VM a chance to do work
+        GCToEEInterface::GcBeforeBGCSweepWork();
+
+        //reset the flag, indicating that the EE no longer expect concurrent
+        //marking
+        sc.concurrent = FALSE;
+
+        total_loh_size = generation_size (max_generation + 1);
+        total_soh_size = generation_sizes (generation_of (max_generation));
+
+        dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+
+        dprintf (2, ("nonconcurrent marking stack roots"));
+        GCScan::GcScanRoots(background_promote,
+                                max_generation, max_generation,
+                                &sc);
+        //concurrent_print_time_delta ("nonconcurrent marking stack roots");
+        concurrent_print_time_delta ("NRS");
+
+//        finalize_queue->EnterFinalizeLock();
+        finalize_queue->GcScanRoots(background_promote, heap_number, 0);
+//        finalize_queue->LeaveFinalizeLock();
+
+        dprintf (2, ("nonconcurrent marking handle table"));
+        GCScan::GcScanHandles(background_promote,
+                                  max_generation, max_generation,
+                                  &sc);
+        //concurrent_print_time_delta ("nonconcurrent marking handle table");
+        concurrent_print_time_delta ("NRH");
+
+        dprintf (2,("---- (GC%d)final going through written pages ----", VolatileLoad(&settings.gc_index)));
+        revisit_written_pages (FALSE);
+        //concurrent_print_time_delta ("nonconcurrent revisit dirtied pages on LOH");
+        concurrent_print_time_delta ("NRre LOH");
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_disable_software_write_watch);
+        if (bgc_t_join.joined())
+#endif // MULTIPLE_HEAPS
+        {
+            // The runtime is suspended, and we will be doing a final query of dirty pages, so pause tracking written pages to
+            // avoid further perf penalty after the runtime is restarted
+            SoftwareWriteWatch::DisableForGCHeap();
+
+#ifdef MULTIPLE_HEAPS
+            dprintf(3, ("Restarting BGC threads after disabling software write watch"));
+            bgc_t_join.restart();
+#endif // MULTIPLE_HEAPS
+        }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+        dprintf (2, ("before NR 1st Hov count: %d", bgc_overflow_count));
+        bgc_overflow_count = 0;
+
+        // Dependent handles need to be scanned with a special algorithm (see the header comment on
+        // scan_dependent_handles for more detail). We perform an initial scan without processing any mark
+        // stack overflow. This is not guaranteed to complete the operation but in a common case (where there
+        // are no dependent handles that are due to be collected) it allows us to optimize away further scans.
+        // The call to background_scan_dependent_handles is what will cycle through more iterations if
+        // required and will also perform processing of any mark stack overflow once the dependent handle
+        // table has been fully promoted.
+        dprintf (2, ("1st dependent handle scan and process mark overflow"));
+        GCScan::GcDhInitialScan(background_promote, max_generation, max_generation, &sc);
+        background_scan_dependent_handles (&sc);
+        //concurrent_print_time_delta ("1st nonconcurrent dependent handle scan and process mark overflow");
+        concurrent_print_time_delta ("NR 1st Hov");
+
+        dprintf (2, ("after NR 1st Hov count: %d", bgc_overflow_count));
+        bgc_overflow_count = 0;
+
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_null_dead_short_weak);
+        if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+            GCToEEInterface::AfterGcScanRoots (max_generation, max_generation, &sc);
+
+#ifdef MULTIPLE_HEAPS
+            dprintf(3, ("Joining BGC threads for short weak handle scan"));
+            bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+
+        // null out the target of short weakref that were not promoted.
+        GCScan::GcShortWeakPtrScan(background_promote, max_generation, max_generation,&sc);
+
+        //concurrent_print_time_delta ("bgc GcShortWeakPtrScan");
+        concurrent_print_time_delta ("NR GcShortWeakPtrScan");
+    }
+
+    {
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_scan_finalization);
+        if (bgc_t_join.joined())
+        {
+            dprintf(3, ("Joining BGC threads for finalization"));
+            bgc_t_join.restart();
+        }
+#endif //MULTIPLE_HEAPS
+
+        //Handle finalization.
+        dprintf(3,("Marking finalization data"));
+        //concurrent_print_time_delta ("bgc joined to mark finalization");
+        concurrent_print_time_delta ("NRj");
+
+//        finalize_queue->EnterFinalizeLock();
+        finalize_queue->ScanForFinalization (background_promote, max_generation, FALSE, __this);
+//        finalize_queue->LeaveFinalizeLock();
+
+        concurrent_print_time_delta ("NRF");
+    }
+
+    dprintf (2, ("before NR 2nd Hov count: %d", bgc_overflow_count));
+    bgc_overflow_count = 0;
+
+    // Scan dependent handles again to promote any secondaries associated with primaries that were promoted
+    // for finalization. As before background_scan_dependent_handles will also process any mark stack
+    // overflow.
+    dprintf (2, ("2nd dependent handle scan and process mark overflow"));
+    background_scan_dependent_handles (&sc);
+    //concurrent_print_time_delta ("2nd nonconcurrent dependent handle scan and process mark overflow");
+    concurrent_print_time_delta ("NR 2nd Hov");
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_null_dead_long_weak);
+    if (bgc_t_join.joined())
+    {
+        dprintf(2, ("Joining BGC threads for weak pointer deletion"));
+        bgc_t_join.restart();
+    }
+#endif //MULTIPLE_HEAPS
+
+    // null out the target of long weakref that were not promoted.
+    GCScan::GcWeakPtrScan (background_promote, max_generation, max_generation, &sc);
+    concurrent_print_time_delta ("NR GcWeakPtrScan");
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_null_dead_syncblk);
+    if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+        dprintf (2, ("calling GcWeakPtrScanBySingleThread"));
+        // scan for deleted entries in the syncblk cache
+        GCScan::GcWeakPtrScanBySingleThread (max_generation, max_generation, &sc);
+        concurrent_print_time_delta ("NR GcWeakPtrScanBySingleThread");
+#ifdef MULTIPLE_HEAPS
+        dprintf(2, ("Starting BGC threads for end of background mark phase"));
+        bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    gen0_bricks_cleared = FALSE;
+
+    dprintf (2, ("end of bgc mark: loh: %d, soh: %d", 
+                 generation_size (max_generation + 1), 
+                 generation_sizes (generation_of (max_generation))));
+
+    for (int gen_idx = max_generation; gen_idx <= (max_generation + 1); gen_idx++)
+    {
+        generation* gen = generation_of (gen_idx);
+        dynamic_data* dd = dynamic_data_of (gen_idx);
+        dd_begin_data_size (dd) = generation_size (gen_idx) - 
+                                   (generation_free_list_space (gen) + generation_free_obj_space (gen)) -
+                                   Align (size (generation_allocation_start (gen)));
+        dd_survived_size (dd) = 0;
+        dd_pinned_survived_size (dd) = 0;
+        dd_artificial_pinned_survived_size (dd) = 0;
+        dd_added_pinned_size (dd) = 0;
+    }
+
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+    PREFIX_ASSUME(seg != NULL);
+
+    while (seg)
+    {
+        seg->flags &= ~heap_segment_flags_swept;
+
+        if (heap_segment_allocated (seg) == heap_segment_mem (seg))
+        {
+            // This can't happen...
+            FATAL_GC_ERROR();
+        }
+
+        if (seg == ephemeral_heap_segment)
+        {
+            heap_segment_background_allocated (seg) = generation_allocation_start (generation_of (max_generation - 1));
+        }
+        else
+        {
+            heap_segment_background_allocated (seg) = heap_segment_allocated (seg);
+        }
+
+        dprintf (2, ("seg %Ix background allocated is %Ix", 
+                      heap_segment_mem (seg), 
+                      heap_segment_background_allocated (seg)));
+        seg = heap_segment_next_rw (seg);
+    }
+
+    // We need to void alloc contexts here 'cause while background_ephemeral_sweep is running
+    // we can't let the user code consume the left over parts in these alloc contexts.
+    repair_allocation_contexts (FALSE);
+
+#ifdef TIME_GC
+        finish = GetCycleCount32();
+        mark_time = finish - start;
+#endif //TIME_GC
+
+    dprintf (2, ("end of bgc mark: gen2 free list space: %d, free obj space: %d", 
+        generation_free_list_space (generation_of (max_generation)), 
+        generation_free_obj_space (generation_of (max_generation))));
+
+    dprintf(2,("---- (GC%d)End of background mark phase ----", VolatileLoad(&settings.gc_index)));
+}
+
+void
+gc_heap::suspend_EE ()
+{
+    dprintf (2, ("suspend_EE"));
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = gc_heap::g_heaps[0];
+    GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC_PREP);
+#else
+    GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC_PREP);
+#endif //MULTIPLE_HEAPS
+}
+
+#ifdef MULTIPLE_HEAPS
+void
+gc_heap::bgc_suspend_EE ()
+{
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap::g_heaps[i]->reset_gc_done();
+    }
+    gc_started = TRUE;
+    dprintf (2, ("bgc_suspend_EE"));
+    GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC_PREP);
+
+    gc_started = FALSE;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap::g_heaps[i]->set_gc_done();
+    }
+}
+#else
+void
+gc_heap::bgc_suspend_EE ()
+{
+    reset_gc_done();
+    gc_started = TRUE;
+    dprintf (2, ("bgc_suspend_EE"));
+    GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC_PREP);
+    gc_started = FALSE;
+    set_gc_done();
+}
+#endif //MULTIPLE_HEAPS
+
+void
+gc_heap::restart_EE ()
+{
+    dprintf (2, ("restart_EE"));
+#ifdef MULTIPLE_HEAPS
+    GCToEEInterface::RestartEE(FALSE);
+#else
+    GCToEEInterface::RestartEE(FALSE);
+#endif //MULTIPLE_HEAPS
+}
+
+inline uint8_t* gc_heap::high_page ( heap_segment* seg, BOOL concurrent_p)
+{
+    if (concurrent_p)
+    {
+        uint8_t* end = ((seg == ephemeral_heap_segment) ?
+                     generation_allocation_start (generation_of (max_generation-1)) :
+                     heap_segment_allocated (seg));
+        return align_lower_page (end);
+    }
+    else 
+    {
+        return heap_segment_allocated (seg);
+    }
+}
+
+void gc_heap::revisit_written_page (uint8_t* page,
+                                    uint8_t* end,
+                                    BOOL concurrent_p,
+                                    heap_segment* seg,
+                                    uint8_t*& last_page,
+                                    uint8_t*& last_object,
+                                    BOOL large_objects_p,
+                                    size_t& num_marked_objects)
+{
+    UNREFERENCED_PARAMETER(seg);
+
+    uint8_t*   start_address = page;
+    uint8_t*   o             = 0;
+    int align_const = get_alignment_constant (!large_objects_p);
+    uint8_t* high_address = end;
+    uint8_t* current_lowest_address = background_saved_lowest_address;
+    uint8_t* current_highest_address = background_saved_highest_address;
+    BOOL no_more_loop_p = FALSE;
+
+    THREAD_FROM_HEAP;
+#ifndef MULTIPLE_HEAPS
+    const int thread = heap_number;
+#endif //!MULTIPLE_HEAPS
+
+    if (large_objects_p)
+    {
+        o = last_object;
+    }
+    else
+    {
+        if (((last_page + OS_PAGE_SIZE) == page)
+            || (start_address <= last_object))
+        {
+            o = last_object;
+        }
+        else
+        {
+            o = find_first_object (start_address, last_object);
+            // We can visit the same object again, but on a different page.
+            assert (o >= last_object);
+        }
+    }
+
+    dprintf (3,("page %Ix start: %Ix, %Ix[ ",
+               (size_t)page, (size_t)o,
+               (size_t)(min (high_address, page + OS_PAGE_SIZE))));
+
+    while (o < (min (high_address, page + OS_PAGE_SIZE)))
+    {
+        size_t s;
+
+        if (concurrent_p && large_objects_p)
+        {
+            bgc_alloc_lock->bgc_mark_set (o);
+
+            if (((CObjectHeader*)o)->IsFree())
+            {
+                s = unused_array_size (o);
+            }
+            else
+            {
+                s = size (o);
+            }
+        }
+        else
+        {
+            s = size (o);
+        }
+
+        dprintf (3,("Considering object %Ix(%s)", (size_t)o, (background_object_marked (o, FALSE) ? "bm" : "nbm")));
+
+        assert (Align (s) >= Align (min_obj_size));
+
+        uint8_t* next_o =  o + Align (s, align_const);
+
+        if (next_o >= start_address) 
+        {
+#ifdef MULTIPLE_HEAPS
+            if (concurrent_p)
+            {
+                // We set last_object here for SVR BGC here because SVR BGC has more than 
+                // one GC thread. When we have more than one GC thread we would run into this 
+                // situation if we skipped unmarked objects:
+                // bgc thread 1 calls GWW, and detect object X not marked so it would skip it 
+                // for revisit. 
+                // bgc thread 2 marks X and all its current children.
+                // user thread comes along and dirties more (and later) pages in X.
+                // bgc thread 1 calls GWW again and gets those later pages but it will not mark anything
+                // on them because it had already skipped X. We need to detect that this object is now
+                // marked and mark the children on the dirtied pages.
+                // In the future if we have less BGC threads than we have heaps we should add
+                // the check to the number of BGC threads.
+                last_object = o;
+            }
+#endif //MULTIPLE_HEAPS
+
+            if (contain_pointers (o) &&
+                (!((o >= current_lowest_address) && (o < current_highest_address)) ||
+                background_marked (o)))
+            {
+                dprintf (3, ("going through %Ix", (size_t)o));
+                go_through_object (method_table(o), o, s, poo, start_address, use_start, (o + s),
+                                    if ((uint8_t*)poo >= min (high_address, page + OS_PAGE_SIZE))
+                                    {
+                                        no_more_loop_p = TRUE;
+                                        goto end_limit;
+                                    }
+                                    uint8_t* oo = *poo;
+
+                                    num_marked_objects++;
+                                    background_mark_object (oo THREAD_NUMBER_ARG);
+                                );
+            }
+            else if (
+                concurrent_p &&
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // see comment below
+                large_objects_p &&
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                ((CObjectHeader*)o)->IsFree() &&
+                (next_o > min (high_address, page + OS_PAGE_SIZE)))
+            {
+                // We need to not skip the object here because of this corner scenario:
+                // A large object was being allocated during BGC mark so we first made it 
+                // into a free object, then cleared its memory. In this loop we would detect
+                // that it's a free object which normally we would skip. But by the next time
+                // we call GetWriteWatch we could still be on this object and the object had
+                // been made into a valid object and some of its memory was changed. We need
+                // to be sure to process those written pages so we can't skip the object just
+                // yet.
+                //
+                // Similarly, when using software write watch, don't advance last_object when
+                // the current object is a free object that spans beyond the current page or
+                // high_address. Software write watch acquires gc_lock before the concurrent
+                // GetWriteWatch() call during revisit_written_pages(). A foreground GC may
+                // happen at that point and allocate from this free region, so when
+                // revisit_written_pages() continues, it cannot skip now-valid objects in this
+                // region.
+                no_more_loop_p = TRUE;
+                goto end_limit;                
+            }
+        }
+end_limit:
+        if (concurrent_p && large_objects_p)
+        {
+            bgc_alloc_lock->bgc_mark_done ();
+        }
+        if (no_more_loop_p)
+        {
+            break;
+        }
+        o = next_o;
+    }
+
+#ifdef MULTIPLE_HEAPS
+    if (concurrent_p)
+    {
+        assert (last_object < (min (high_address, page + OS_PAGE_SIZE)));
+    }
+    else
+#endif //MULTIPLE_HEAPS
+    {
+        last_object = o;
+    }
+
+    dprintf (3,("Last object: %Ix", (size_t)last_object));
+    last_page = align_lower_page (o);
+}
+
+// When reset_only_p is TRUE, we should only reset pages that are in range
+// because we need to consider the segments or part of segments that were
+// allocated out of range all live.
+void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
+{
+#ifdef WRITE_WATCH
+    if (concurrent_p && !reset_only_p)
+    {
+        current_bgc_state = bgc_revisit_soh;
+    }
+
+    size_t total_dirtied_pages = 0;
+    size_t total_marked_objects = 0;
+
+    heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    bool reset_watch_state = !!concurrent_p;
+    bool is_runtime_suspended = !concurrent_p;
+    BOOL small_object_segments = TRUE;
+    int align_const = get_alignment_constant (small_object_segments);
+
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (small_object_segments)
+            {
+                //switch to large segment
+                if (concurrent_p && !reset_only_p)
+                {
+                    current_bgc_state = bgc_revisit_loh;
+                }
+
+                if (!reset_only_p)
+                {
+                    dprintf (GTC_LOG, ("h%d: SOH: dp:%Id; mo: %Id", heap_number, total_dirtied_pages, total_marked_objects));
+                    fire_revisit_event (total_dirtied_pages, total_marked_objects, !small_object_segments);
+                    concurrent_print_time_delta (concurrent_p ? "CR SOH" : "NR SOH");
+                    total_dirtied_pages = 0;
+                    total_marked_objects = 0;
+                }
+
+                small_object_segments = FALSE;
+                //concurrent_print_time_delta (concurrent_p ? "concurrent marking dirtied pages on SOH" : "nonconcurrent marking dirtied pages on SOH");
+
+                dprintf (3, ("now revisiting large object segments"));
+                align_const = get_alignment_constant (small_object_segments);
+                seg = heap_segment_rw (generation_start_segment (large_object_generation));
+
+                PREFIX_ASSUME(seg != NULL);
+
+                continue;
+            }
+            else
+            {
+                if (reset_only_p)
+                {
+                    dprintf (GTC_LOG, ("h%d: tdp: %Id", heap_number, total_dirtied_pages));
+                } 
+                else
+                {
+                    dprintf (GTC_LOG, ("h%d: LOH: dp:%Id; mo: %Id", heap_number, total_dirtied_pages, total_marked_objects));
+                    fire_revisit_event (total_dirtied_pages, total_marked_objects, !small_object_segments);
+                }
+                break;
+            }
+        }
+        uint8_t* base_address = (uint8_t*)heap_segment_mem (seg);
+        //we need to truncate to the base of the page because
+        //some newly allocated could exist beyond heap_segment_allocated
+        //and if we reset the last page write watch status,
+        // they wouldn't be guaranteed to be visited -> gc hole.
+        uintptr_t bcount = array_size;
+        uint8_t* last_page = 0;
+        uint8_t* last_object = heap_segment_mem (seg);
+        uint8_t* high_address = 0;
+
+        BOOL skip_seg_p = FALSE;
+
+        if (reset_only_p)
+        {
+            if ((heap_segment_mem (seg) >= background_saved_lowest_address) ||
+                (heap_segment_reserved (seg) <= background_saved_highest_address))
+            {
+                dprintf (3, ("h%d: sseg: %Ix(-%Ix)", heap_number, 
+                    heap_segment_mem (seg), heap_segment_reserved (seg)));
+                skip_seg_p = TRUE;
+            }
+        }
+
+        if (!skip_seg_p)
+        {
+            dprintf (3, ("looking at seg %Ix", (size_t)last_object));
+
+            if (reset_only_p)
+            {
+                base_address = max (base_address, background_saved_lowest_address);
+                dprintf (3, ("h%d: reset only starting %Ix", heap_number, base_address));
+            }
+
+            dprintf (3, ("h%d: starting: %Ix, seg %Ix-%Ix", heap_number, base_address, 
+                heap_segment_mem (seg), heap_segment_reserved (seg)));
+
+
+            while (1)
+            {
+                if (reset_only_p)
+                {
+                    high_address = ((seg == ephemeral_heap_segment) ? alloc_allocated : heap_segment_allocated (seg));
+                    high_address = min (high_address, background_saved_highest_address);
+                }
+                else
+                {
+                    high_address = high_page (seg, concurrent_p);
+                }
+
+                if ((base_address < high_address) &&
+                    (bcount >= array_size))
+                {
+                    ptrdiff_t region_size = high_address - base_address;
+                    dprintf (3, ("h%d: gw: [%Ix(%Id)", heap_number, (size_t)base_address, (size_t)region_size));
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                    // When the runtime is not suspended, it's possible for the table to be resized concurrently with the scan
+                    // for dirty pages below. Prevent that by synchronizing with grow_brick_card_tables(). When the runtime is
+                    // suspended, it's ok to scan for dirty pages concurrently from multiple background GC threads for disjoint
+                    // memory regions.
+                    if (!is_runtime_suspended)
+                    {
+                        enter_spin_lock(&gc_lock);
+                    }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+                    get_write_watch_for_gc_heap (reset_watch_state, base_address, region_size,
+                                                 (void**)background_written_addresses,
+                                                 &bcount, is_runtime_suspended);
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                    if (!is_runtime_suspended)
+                    {
+                        leave_spin_lock(&gc_lock);
+                    }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+                    if (bcount != 0)
+                    {
+                        total_dirtied_pages += bcount;
+
+                        dprintf (3, ("Found %d pages [%Ix, %Ix[", 
+                                        bcount, (size_t)base_address, (size_t)high_address));
+                    }
+
+                    if (!reset_only_p)
+                    {
+                        for (unsigned i = 0; i < bcount; i++)
+                        {
+    #ifdef NO_WRITE_BARRIER
+                            card_table [card_word (card_of (background_written_addresses [i]))] = ~0u;
+                            dprintf (3,("Set Cards [%p:%p, %p:%p[",
+                                        card_of (background_written_addresses [i]), g_addresses [i],
+                                        card_of (background_written_addresses [i]+OS_PAGE_SIZE), background_written_addresses [i]+OS_PAGE_SIZE));
+    #endif //NO_WRITE_BARRIER
+                            uint8_t* page = (uint8_t*)background_written_addresses[i];
+                            dprintf (3, ("looking at page %d at %Ix(h: %Ix)", i, 
+                                (size_t)page, (size_t)high_address));
+                            if (page < high_address)
+                            {
+                                //search for marked objects in the page
+                                revisit_written_page (page, high_address, concurrent_p,
+                                                    seg, last_page, last_object,
+                                                    !small_object_segments,
+                                                    total_marked_objects);
+                            }
+                            else
+                            {
+                                dprintf (3, ("page %d at %Ix is >= %Ix!", i, (size_t)page, (size_t)high_address));
+                                assert (!"page shouldn't have exceeded limit");
+                            }
+                        }
+                    }
+
+                    if (bcount >= array_size){
+                        base_address = background_written_addresses [array_size-1] + OS_PAGE_SIZE;
+                        bcount = array_size;
+                    }
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+
+        seg = heap_segment_next_rw (seg);
+    }
+
+#endif //WRITE_WATCH
+}
+
+void gc_heap::background_grow_c_mark_list()
+{
+    assert (c_mark_list_index >= c_mark_list_length);
+    BOOL should_drain_p = FALSE;
+    THREAD_FROM_HEAP;
+#ifndef MULTIPLE_HEAPS
+    const int thread = heap_number;
+#endif //!MULTIPLE_HEAPS
+
+    dprintf (2, ("stack copy buffer overflow"));
+    uint8_t** new_c_mark_list = 0;
+    {
+        FAULT_NOT_FATAL();
+        if (c_mark_list_length >= (SIZE_T_MAX / (2 * sizeof (uint8_t*))))
+        {
+            should_drain_p = TRUE;
+        }
+        else
+        {
+            new_c_mark_list = new (nothrow) uint8_t*[c_mark_list_length*2];
+            if (new_c_mark_list == 0)
+            {
+                should_drain_p = TRUE;
+            }
+        }
+    }
+    if (should_drain_p)
+
+    {
+        dprintf (2, ("No more memory for the stacks copy, draining.."));
+        //drain the list by marking its elements
+        background_drain_mark_list (thread);
+    }
+    else
+    {
+        assert (new_c_mark_list);
+        memcpy (new_c_mark_list, c_mark_list, c_mark_list_length*sizeof(uint8_t*));
+        c_mark_list_length = c_mark_list_length*2;
+        delete c_mark_list;
+        c_mark_list = new_c_mark_list;
+    }
+}
+
+void gc_heap::background_promote_callback (Object** ppObject, ScanContext* sc,
+                                  uint32_t flags)
+{
+    UNREFERENCED_PARAMETER(sc);
+    //in order to save space on the array, mark the object,
+    //knowing that it will be visited later
+    assert (settings.concurrent);
+
+    THREAD_NUMBER_FROM_CONTEXT;
+#ifndef MULTIPLE_HEAPS
+    const int thread = 0;
+#endif //!MULTIPLE_HEAPS
+
+    uint8_t* o = (uint8_t*)*ppObject;
+
+    if (o == 0)
+        return;
+
+    HEAP_FROM_THREAD;
+
+    gc_heap* hp = gc_heap::heap_of (o);
+
+    if ((o < hp->background_saved_lowest_address) || (o >= hp->background_saved_highest_address))
+    {
+        return;
+    }
+
+#ifdef INTERIOR_POINTERS
+    if (flags & GC_CALL_INTERIOR)
+    {
+        o = hp->find_object (o, hp->background_saved_lowest_address);
+        if (o == 0)
+            return;
+    }
+#endif //INTERIOR_POINTERS
+
+#ifdef FEATURE_CONSERVATIVE_GC
+    // For conservative GC, a value on stack may point to middle of a free object.
+    // In this case, we don't need to promote the pointer.
+    if (g_pConfig->GetGCConservative() && ((CObjectHeader*)o)->IsFree())
+    {
+        return;
+    }
+#endif //FEATURE_CONSERVATIVE_GC
+
+#ifdef _DEBUG
+    ((CObjectHeader*)o)->Validate();
+#endif //_DEBUG
+
+    dprintf (3, ("Concurrent Background Promote %Ix", (size_t)o));
+    if (o && (size (o) > LARGE_OBJECT_SIZE))
+    {
+        dprintf (3, ("Brc %Ix", (size_t)o));
+    }
+
+    if (hpt->c_mark_list_index >= hpt->c_mark_list_length)
+    {
+        hpt->background_grow_c_mark_list();
+    }
+    dprintf (3, ("pushing %08x into mark_list", (size_t)o));
+    hpt->c_mark_list [hpt->c_mark_list_index++] = o;
+
+    STRESS_LOG3(LF_GC|LF_GCROOTS, LL_INFO1000000, "    GCHeap::Background Promote: Promote GC Root *%p = %p MT = %pT", ppObject, o, o ? ((Object*) o)->GetGCSafeMethodTable() : NULL);
+}
+
+void gc_heap::mark_absorb_new_alloc()
+{
+    fix_allocation_contexts (FALSE);
+    
+    gen0_bricks_cleared = FALSE;
+
+    clear_gen0_bricks();
+}
+
+BOOL gc_heap::prepare_bgc_thread(gc_heap* gh)
+{
+    BOOL success = FALSE;
+    BOOL thread_created = FALSE;
+    dprintf (2, ("Preparing gc thread"));
+
+    gh->bgc_threads_timeout_cs.Enter();
+    if (!(gh->bgc_thread_running))
+    {
+        dprintf (2, ("GC thread not runnning"));
+        if ((gh->bgc_thread == 0) && create_bgc_thread(gh))
+        {
+            success = TRUE;
+            thread_created = TRUE;
+        }
+    }
+    else
+    {
+        dprintf (3, ("GC thread already running"));
+        success = TRUE;
+    }
+    gh->bgc_threads_timeout_cs.Leave();
+
+    if(thread_created)
+        FireEtwGCCreateConcurrentThread_V1(GetClrInstanceId());
+
+    return success;
+}
+
+BOOL gc_heap::create_bgc_thread(gc_heap* gh)
+{
+    assert (background_gc_done_event.IsValid());
+
+    //dprintf (2, ("Creating BGC thread"));
+
+    gh->bgc_thread = GCToEEInterface::CreateBackgroundThread(gh->bgc_thread_stub, gh);
+    gh->bgc_thread_running = (gh->bgc_thread != NULL);    
+
+    return gh->bgc_thread_running;
+}
+
+BOOL gc_heap::create_bgc_threads_support (int number_of_heaps)
+{
+    BOOL ret = FALSE;
+    dprintf (3, ("Creating concurrent GC thread for the first time"));
+    if (!background_gc_done_event.CreateManualEventNoThrow(TRUE))
+    {
+        goto cleanup;
+    }
+    if (!bgc_threads_sync_event.CreateManualEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+    if (!ee_proceed_event.CreateAutoEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+    if (!bgc_start_event.CreateManualEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.init (number_of_heaps, join_flavor_bgc);
+#else
+    UNREFERENCED_PARAMETER(number_of_heaps);
+#endif //MULTIPLE_HEAPS
+
+    ret = TRUE;
+
+cleanup:
+
+    if (!ret)
+    {
+        if (background_gc_done_event.IsValid())
+        {
+            background_gc_done_event.CloseEvent();
+        }
+        if (bgc_threads_sync_event.IsValid())
+        {
+            bgc_threads_sync_event.CloseEvent();
+        }
+        if (ee_proceed_event.IsValid())
+        {
+            ee_proceed_event.CloseEvent();
+        }
+        if (bgc_start_event.IsValid())
+        {
+            bgc_start_event.CloseEvent();
+        }
+    }
+
+    return ret;
+}
+
+BOOL gc_heap::create_bgc_thread_support()
+{
+    BOOL ret = FALSE;
+    uint8_t** parr;
+    
+    if (!gc_lh_block_event.CreateManualEventNoThrow(FALSE))
+    {
+        goto cleanup;
+    }
+
+    //needs to have room for enough smallest objects fitting on a page
+    parr = new (nothrow) (uint8_t* [1 + page_size / MIN_OBJECT_SIZE]);
+    if (!parr)
+    {
+        goto cleanup;
+    }
+
+    make_c_mark_list (parr);
+
+    ret = TRUE;
+
+cleanup:
+
+    if (!ret)
+    {
+        if (gc_lh_block_event.IsValid())
+        {
+            gc_lh_block_event.CloseEvent();
+        }
+    }
+
+    return ret;
+}
+
+int gc_heap::check_for_ephemeral_alloc()
+{
+    int gen = ((settings.reason == reason_oos_soh) ? (max_generation - 1) : -1);
+
+    if (gen == -1)
+    {
+#ifdef MULTIPLE_HEAPS
+        for (int heap_index = 0; heap_index < n_heaps; heap_index++)
+#endif //MULTIPLE_HEAPS
+        {
+            for (int i = 0; i <= (max_generation - 1); i++)
+            {
+#ifdef MULTIPLE_HEAPS
+                if (g_heaps[heap_index]->get_new_allocation (i) <= 0)
+#else
+                if (get_new_allocation (i) <= 0)
+#endif //MULTIPLE_HEAPS
+                {
+                    gen = max (gen, i);
+                }
+                else
+                    break;
+            }
+        }
+    }
+
+    return gen;
+}
+
+// Wait for gc to finish sequential part
+void gc_heap::wait_to_proceed()
+{
+    assert (background_gc_done_event.IsValid());
+    assert (bgc_start_event.IsValid());
+
+    user_thread_wait(&ee_proceed_event, FALSE);
+}
+
+// Start a new concurrent gc
+void gc_heap::start_c_gc()
+{
+    assert (background_gc_done_event.IsValid());
+    assert (bgc_start_event.IsValid());
+
+//Need to make sure that the gc thread is in the right place.
+    background_gc_done_event.Wait(INFINITE, FALSE);
+    background_gc_done_event.Reset();
+    bgc_start_event.Set();
+}
+
+void gc_heap::do_background_gc()
+{
+    dprintf (2, ("starting a BGC"));
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+    {
+        g_heaps[i]->init_background_gc();
+    }
+#else
+    init_background_gc();
+#endif //MULTIPLE_HEAPS
+    //start the background gc
+    start_c_gc ();
+
+    //wait until we get restarted by the BGC.
+    wait_to_proceed();
+}
+
+void gc_heap::kill_gc_thread()
+{
+    //assert (settings.concurrent == FALSE);
+
+    // We are doing a two-stage shutdown now.
+    // In the first stage, we do minimum work, and call ExitProcess at the end.
+    // In the secodn stage, we have the Loader lock and only one thread is
+    // alive.  Hence we do not need to kill gc thread.
+    background_gc_done_event.CloseEvent();
+    gc_lh_block_event.CloseEvent();
+    bgc_start_event.CloseEvent();
+    bgc_threads_timeout_cs.Destroy();
+    bgc_thread = 0;
+    recursive_gc_sync::shutdown();
+}
+
+uint32_t gc_heap::bgc_thread_function()
+{
+    assert (background_gc_done_event.IsValid());
+    assert (bgc_start_event.IsValid());
+
+    dprintf (3, ("gc_thread thread starting..."));
+
+    BOOL do_exit = FALSE;
+
+    Thread* current_thread = GetThread();
+    BOOL cooperative_mode = TRUE;
+    bgc_thread_id.SetToCurrentThread();
+    dprintf (1, ("bgc_thread_id is set to %x", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging()));
+    while (1)
+    {
+        // Wait for work to do...
+        dprintf (3, ("bgc thread: waiting..."));
+
+        cooperative_mode = enable_preemptive (current_thread);
+        //current_thread->m_fPreemptiveGCDisabled = 0;
+
+        uint32_t result = bgc_start_event.Wait(
+#ifdef _DEBUG
+#ifdef MULTIPLE_HEAPS
+                                             INFINITE,
+#else
+                                             2000,
+#endif //MULTIPLE_HEAPS
+#else //_DEBUG
+#ifdef MULTIPLE_HEAPS
+                                             INFINITE,
+#else
+                                             20000,
+#endif //MULTIPLE_HEAPS
+#endif //_DEBUG
+            FALSE);
+        dprintf (2, ("gc thread: finished waiting"));
+
+        // not calling disable_preemptive here 'cause we 
+        // can't wait for GC complete here - RestartEE will be called 
+        // when we've done the init work.
+
+        if (result == WAIT_TIMEOUT)
+        {
+            // Should join the bgc threads and terminate all of them
+            // at once.
+            dprintf (1, ("GC thread timeout"));
+            bgc_threads_timeout_cs.Enter();
+            if (!keep_bgc_threads_p)
+            {
+                dprintf (2, ("GC thread exiting"));
+                bgc_thread_running = FALSE;
+                bgc_thread = 0;
+                bgc_thread_id.Clear();
+                do_exit = TRUE;
+            }
+            bgc_threads_timeout_cs.Leave();
+            if (do_exit)
+                break;
+            else
+            {
+                dprintf (3, ("GC thread needed, not exiting"));
+                continue;
+            }
+        }
+        // if we signal the thread with no concurrent work to do -> exit
+        if (!settings.concurrent)
+        {
+            dprintf (3, ("no concurrent GC needed, exiting"));
+            break;
+        }
+#ifdef TRACE_GC
+        //trace_gc = TRUE;
+#endif //TRACE_GC
+        recursive_gc_sync::begin_background();
+        dprintf (2, ("beginning of bgc: gen2 FL: %d, FO: %d, frag: %d", 
+            generation_free_list_space (generation_of (max_generation)),
+            generation_free_obj_space (generation_of (max_generation)),
+            dd_fragmentation (dynamic_data_of (max_generation))));
+
+        gc1();
+
+        current_bgc_state = bgc_not_in_process;
+
+#ifdef TRACE_GC
+        //trace_gc = FALSE;
+#endif //TRACE_GC
+
+        enable_preemptive (current_thread);
+#ifdef MULTIPLE_HEAPS
+        bgc_t_join.join(this, gc_join_done);
+        if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+        {
+            enter_spin_lock (&gc_lock);
+            dprintf (SPINLOCK_LOG, ("bgc Egc"));
+            
+            bgc_start_event.Reset();
+            do_post_gc();
+#ifdef MULTIPLE_HEAPS
+            for (int gen = max_generation; gen <= (max_generation + 1); gen++)
+            {
+                size_t desired_per_heap = 0;
+                size_t total_desired = 0;
+                gc_heap* hp = 0;
+                dynamic_data* dd;
+                for (int i = 0; i < n_heaps; i++)
+                {
+                    hp = g_heaps[i];
+                    dd = hp->dynamic_data_of (gen);
+                    size_t temp_total_desired = total_desired + dd_desired_allocation (dd);
+                    if (temp_total_desired < total_desired)
+                    {
+                        // we overflowed.
+                        total_desired = (size_t)MAX_PTR;
+                        break;
+                    }
+                    total_desired = temp_total_desired;
+                }
+
+                desired_per_heap = Align ((total_desired/n_heaps), get_alignment_constant (FALSE));
+
+                for (int i = 0; i < n_heaps; i++)
+                {
+                    hp = gc_heap::g_heaps[i];
+                    dd = hp->dynamic_data_of (gen);
+                    dd_desired_allocation (dd) = desired_per_heap;
+                    dd_gc_new_allocation (dd) = desired_per_heap;
+                    dd_new_allocation (dd) = desired_per_heap;
+                }
+            }
+#endif //MULTIPLE_HEAPS
+#ifdef MULTIPLE_HEAPS
+            fire_pevents();
+#endif //MULTIPLE_HEAPS
+
+            c_write (settings.concurrent, FALSE);
+            recursive_gc_sync::end_background();
+            keep_bgc_threads_p = FALSE;
+            background_gc_done_event.Set();
+
+            dprintf (SPINLOCK_LOG, ("bgc Lgc"));
+            leave_spin_lock (&gc_lock);
+#ifdef MULTIPLE_HEAPS
+            dprintf(1, ("End of BGC - starting all BGC threads"));
+            bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+        }
+        // We can't disable preempt here because there might've been a GC already
+        // started and decided to do a BGC and waiting for a BGC thread to restart 
+        // vm. That GC will be waiting in wait_to_proceed and we are waiting for it
+        // to restart the VM so we deadlock.
+        //gc_heap::disable_preemptive (current_thread, TRUE);
+    }
+
+    FireEtwGCTerminateConcurrentThread_V1(GetClrInstanceId());
+
+    dprintf (3, ("bgc_thread thread exiting"));
+    return 0;
+}
+
+#endif //BACKGROUND_GC
+
+//Clear the cards [start_card, end_card[
+void gc_heap::clear_cards (size_t start_card, size_t end_card)
+{
+    if (start_card < end_card)
+    {
+        size_t start_word = card_word (start_card);
+        size_t end_word = card_word (end_card);
+        if (start_word < end_word)
+        {
+            unsigned bits = card_bit (start_card);
+            card_table [start_word] &= lowbits (~0, bits);
+            for (size_t i = start_word+1; i < end_word; i++)
+                card_table [i] = 0;
+            bits = card_bit (end_card);
+            // Don't write beyond end_card (and possibly uncommitted card table space).
+            if (bits != 0)
+            {
+                card_table [end_word] &= highbits (~0, bits);
+            }
+        }
+        else
+        {
+            card_table [start_word] &= (lowbits (~0, card_bit (start_card)) |
+                                        highbits (~0, card_bit (end_card)));
+        }
+#ifdef VERYSLOWDEBUG
+        size_t  card = start_card;
+        while (card < end_card)
+        {
+            assert (! (card_set_p (card)));
+            card++;
+        }
+#endif //VERYSLOWDEBUG
+        dprintf (3,("Cleared cards [%Ix:%Ix, %Ix:%Ix[",
+                  start_card, (size_t)card_address (start_card),
+                  end_card, (size_t)card_address (end_card)));
+    }
+}
+
+void gc_heap::clear_card_for_addresses (uint8_t* start_address, uint8_t* end_address)
+{
+    size_t   start_card = card_of (align_on_card (start_address));
+    size_t   end_card = card_of (align_lower_card (end_address));
+    clear_cards (start_card, end_card);
+}
+
+// copy [srccard, ...[ to [dst_card, end_card[
+// This will set the same bit twice. Can be optimized.
+inline
+void gc_heap::copy_cards (size_t dst_card, size_t src_card,
+                 size_t end_card, BOOL nextp)
+{
+    // If the range is empty, this function is a no-op - with the subtlety that
+    // either of the accesses card_table[srcwrd] or card_table[dstwrd] could be
+    // outside the committed region.  To avoid the access, leave early.
+    if (!(dst_card < end_card))
+        return;
+
+    unsigned int srcbit = card_bit (src_card);
+    unsigned int dstbit = card_bit (dst_card);
+    size_t srcwrd = card_word (src_card);
+    size_t dstwrd = card_word (dst_card);
+    unsigned int srctmp = card_table[srcwrd];
+    unsigned int dsttmp = card_table[dstwrd];
+    for (size_t card = dst_card; card < end_card; card++)
+    {
+        if (srctmp & (1 << srcbit))
+            dsttmp |= 1 << dstbit;
+        else
+            dsttmp &= ~(1 << dstbit);
+        if (!(++srcbit % 32))
+        {
+            srctmp = card_table[++srcwrd];
+            srcbit = 0;
+        }
+        if (nextp)
+        {
+            if (srctmp & (1 << srcbit))
+                dsttmp |= 1 << dstbit;
+        }
+        if (!(++dstbit % 32))
+        {
+            card_table[dstwrd] = dsttmp;
+            dstwrd++;
+            dsttmp = card_table[dstwrd];
+            dstbit = 0;
+        }
+    }
+    card_table[dstwrd] = dsttmp;
+}
+
+void gc_heap::copy_cards_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
+{
+    ptrdiff_t relocation_distance = src - dest;
+    size_t start_dest_card = card_of (align_on_card (dest));
+    size_t end_dest_card = card_of (dest + len - 1);
+    size_t dest_card = start_dest_card;
+    size_t src_card = card_of (card_address (dest_card)+relocation_distance);
+    dprintf (3,("Copying cards [%Ix:%Ix->%Ix:%Ix, ",
+                 src_card, (size_t)src, dest_card, (size_t)dest));
+    dprintf (3,(" %Ix->%Ix:%Ix[",
+              (size_t)src+len, end_dest_card, (size_t)dest+len));
+
+    dprintf (3, ("dest: %Ix, src: %Ix, len: %Ix, reloc: %Ix, align_on_card(dest) is %Ix",
+        dest, src, len, relocation_distance, (align_on_card (dest))));
+
+    dprintf (3, ("start_dest_card: %Ix (address: %Ix), end_dest_card: %Ix(addr: %Ix), card_of (dest): %Ix",
+        start_dest_card, card_address (start_dest_card), end_dest_card, card_address (end_dest_card), card_of (dest)));
+
+    //First card has two boundaries
+    if (start_dest_card != card_of (dest))
+    {
+        if ((card_of (card_address (start_dest_card) + relocation_distance) <= card_of (src + len - 1))&&
+            card_set_p (card_of (card_address (start_dest_card) + relocation_distance)))
+        {
+            dprintf (3, ("card_address (start_dest_card) + reloc is %Ix, card: %Ix(set), src+len-1: %Ix, card: %Ix",
+                    (card_address (start_dest_card) + relocation_distance),
+                    card_of (card_address (start_dest_card) + relocation_distance),
+                    (src + len - 1),
+                    card_of (src + len - 1)));
+
+            dprintf (3, ("setting card: %Ix", card_of (dest)));
+            set_card (card_of (dest));
+        }
+    }
+
+    if (card_set_p (card_of (src)))
+        set_card (card_of (dest));
+
+
+    copy_cards (dest_card, src_card, end_dest_card,
+                ((dest - align_lower_card (dest)) != (src - align_lower_card (src))));
+
+    //Last card has two boundaries.
+    if ((card_of (card_address (end_dest_card) + relocation_distance) >= card_of (src)) &&
+        card_set_p (card_of (card_address (end_dest_card) + relocation_distance)))
+    {
+        dprintf (3, ("card_address (end_dest_card) + reloc is %Ix, card: %Ix(set), src: %Ix, card: %Ix",
+                (card_address (end_dest_card) + relocation_distance),
+                card_of (card_address (end_dest_card) + relocation_distance),
+                src,
+                card_of (src)));
+
+        dprintf (3, ("setting card: %Ix", end_dest_card));
+        set_card (end_dest_card);
+    }
+
+    if (card_set_p (card_of (src + len - 1)))
+        set_card (end_dest_card);
+}
+
+#ifdef BACKGROUND_GC
+// this does not need the Interlocked version of mark_array_set_marked.
+void gc_heap::copy_mark_bits_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
+{
+    dprintf (3, ("Copying mark_bits for addresses [%Ix->%Ix, %Ix->%Ix[",
+                 (size_t)src, (size_t)dest,
+                 (size_t)src+len, (size_t)dest+len));
+
+    uint8_t* src_o = src;
+    uint8_t* dest_o;
+    uint8_t* src_end = src + len;
+    int align_const = get_alignment_constant (TRUE);
+    ptrdiff_t reloc = dest - src;
+
+    while (src_o < src_end)
+    {
+        uint8_t*  next_o = src_o + Align (size (src_o), align_const);
+
+        if (background_object_marked (src_o, TRUE))
+        {
+            dest_o = src_o + reloc;
+
+            //if (background_object_marked (dest_o, FALSE))
+            //{
+            //    dprintf (3, ("*%Ix shouldn't have already been marked!", (size_t)(dest_o)));
+            //    FATAL_GC_ERROR();
+            //}
+
+            background_mark (dest_o, 
+                             background_saved_lowest_address, 
+                             background_saved_highest_address);
+            dprintf (3, ("bc*%Ix*bc, b*%Ix*b", (size_t)src_o, (size_t)(dest_o)));
+        }
+
+        src_o = next_o;
+    }
+}
+#endif //BACKGROUND_GC
+
+void gc_heap::fix_brick_to_highest (uint8_t* o, uint8_t* next_o)
+{
+    size_t new_current_brick = brick_of (o);
+    set_brick (new_current_brick,
+               (o - brick_address (new_current_brick)));
+    size_t b = 1 + new_current_brick;
+    size_t limit = brick_of (next_o);
+    //dprintf(3,(" fixing brick %Ix to point to object %Ix, till %Ix(%Ix)",
+    dprintf(3,("b:%Ix->%Ix-%Ix", 
+               new_current_brick, (size_t)o, (size_t)next_o));
+    while (b < limit)
+    {
+        set_brick (b,(new_current_brick - b));
+        b++;
+    }
+}
+
+// start can not be >= heap_segment_allocated for the segment.
+uint8_t* gc_heap::find_first_object (uint8_t* start, uint8_t* first_object)
+{
+    size_t brick = brick_of (start);
+    uint8_t* o = 0;
+    //last_object == null -> no search shortcut needed
+    if ((brick == brick_of (first_object) || (start <= first_object)))
+    {
+        o = first_object;
+    }
+    else
+    {
+        ptrdiff_t  min_brick = (ptrdiff_t)brick_of (first_object);
+        ptrdiff_t  prev_brick = (ptrdiff_t)brick - 1;
+        int         brick_entry = 0;
+        while (1)
+        {
+            if (prev_brick < min_brick)
+            {
+                break;
+            }
+            if ((brick_entry =  brick_table [ prev_brick ]) >= 0)
+            {
+                break;
+            }
+            assert (! ((brick_entry == 0)));
+            prev_brick = (brick_entry + prev_brick);
+
+        }
+        o = ((prev_brick < min_brick) ? first_object :
+                      brick_address (prev_brick) + brick_entry - 1);
+        assert (o <= start);
+    }
+
+    assert (Align (size (o)) >= Align (min_obj_size));
+    uint8_t*  next_o = o + Align (size (o));
+    size_t curr_cl = (size_t)next_o / brick_size;
+    size_t min_cl = (size_t)first_object / brick_size;
+
+    //dprintf (3,( "Looking for intersection with %Ix from %Ix", (size_t)start, (size_t)o));
+#ifdef TRACE_GC
+    unsigned int n_o = 1;
+#endif //TRACE_GC
+
+    uint8_t* next_b = min (align_lower_brick (next_o) + brick_size, start+1);
+
+    while (next_o <= start)
+    {
+        do
+        {
+#ifdef TRACE_GC
+            n_o++;
+#endif //TRACE_GC
+            o = next_o;
+            assert (Align (size (o)) >= Align (min_obj_size));
+            next_o = o + Align (size (o));
+            Prefetch (next_o);
+        }while (next_o < next_b);
+
+        if (((size_t)next_o / brick_size) != curr_cl)
+        {
+            if (curr_cl >= min_cl)
+            {
+                fix_brick_to_highest (o, next_o);
+            }
+            curr_cl = (size_t) next_o / brick_size;
+        }
+        next_b = min (align_lower_brick (next_o) + brick_size, start+1);
+    }
+
+    size_t bo = brick_of (o);
+    //dprintf (3, ("Looked at %Id objects, fixing brick [%Ix-[%Ix", 
+    dprintf (3, ("%Id o, [%Ix-[%Ix", 
+        n_o, bo, brick));
+    if (bo < brick)
+    {
+        set_brick (bo, (o - brick_address(bo)));
+        size_t b = 1 + bo;
+        int x = -1;
+        while (b < brick)
+        {
+            set_brick (b,x--);
+            b++;
+        }
+    }
+
+    return o;
+}
+
+#ifdef CARD_BUNDLE
+BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end)
+{
+    dprintf (3, ("gc: %d, find_card_dword cardw: %Ix, cardw_end: %Ix",
+                 dd_collection_count (dynamic_data_of (0)), cardw, cardw_end));
+
+    if (card_bundles_enabled())
+    {
+        size_t cardb = cardw_card_bundle (cardw);
+        size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (cardw_end));
+        while (1)
+        {
+            //find a non null bundle
+            while ((cardb < end_cardb) &&
+                   (card_bundle_set_p (cardb)==0))
+            {
+                cardb++;
+            }
+            if (cardb == end_cardb)
+                return FALSE;
+            //find a non empty card word
+
+            uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb),cardw)];
+            uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1),cardw_end)];
+            while ((card_word < card_word_end) &&
+                   !(*card_word))
+            {
+                card_word++;
+            }
+            if (card_word != card_word_end)
+            {
+                cardw = (card_word - &card_table [0]);
+                return TRUE;
+            }
+            else if ((cardw <= card_bundle_cardw (cardb)) &&
+                     (card_word == &card_table [card_bundle_cardw (cardb+1)]))
+            {
+                // a whole bundle was explored and is empty
+                dprintf  (3, ("gc: %d, find_card_dword clear bundle: %Ix cardw:[%Ix,%Ix[",
+                        dd_collection_count (dynamic_data_of (0)), 
+                        cardb, card_bundle_cardw (cardb),
+                        card_bundle_cardw (cardb+1)));
+                card_bundle_clear (cardb);
+            }
+            cardb++;
+        }
+    }
+    else
+    {
+        uint32_t* card_word = &card_table[cardw];
+        uint32_t* card_word_end = &card_table [cardw_end];
+
+        while (card_word < card_word_end)
+        {
+            if ((*card_word) != 0)
+            {
+                cardw = (card_word - &card_table [0]);
+                return TRUE;
+            }
+            card_word++;
+        }
+        return FALSE;
+
+    }
+
+}
+
+#endif //CARD_BUNDLE
+
+BOOL gc_heap::find_card (uint32_t* card_table, size_t& card,
+                size_t card_word_end, size_t& end_card)
+{
+    uint32_t* last_card_word;
+    uint32_t y;
+    uint32_t z;
+    // Find the first card which is set
+
+    last_card_word = &card_table [card_word (card)];
+    z = card_bit (card);
+    y = (*last_card_word) >> z;
+    if (!y)
+    {
+        z = 0;
+#ifdef CARD_BUNDLE
+        size_t lcw = card_word(card)+1;
+        if (gc_heap::find_card_dword (lcw, card_word_end) == FALSE)
+            return FALSE;
+        else
+        {
+            last_card_word = &card_table [lcw];
+            y = *last_card_word;
+        }
+
+#else //CARD_BUNDLE
+        do
+        {
+            ++last_card_word;
+        }
+
+        while ((last_card_word < &card_table [card_word_end]) &&
+               !(*last_card_word));
+        if (last_card_word < &card_table [card_word_end])
+            y = *last_card_word;
+        else
+            return FALSE;
+#endif //CARD_BUNDLE
+    }
+
+
+    // Look for the lowest bit set
+    if (y)
+    {
+        while (!(y & 1))
+        {
+            z++;
+            y = y / 2;
+        }
+    }
+    card = (last_card_word - &card_table [0])* card_word_width + z;
+    do
+    {
+        z++;
+        y = y / 2;
+        if ((z == card_word_width) &&
+            (last_card_word < &card_table [card_word_end]))
+        {
+
+            do
+            {
+                y = *(++last_card_word);
+            }while ((last_card_word < &card_table [card_word_end]) &&
+#ifdef _MSC_VER
+                     (y == (1 << card_word_width)-1)
+#else
+                     // if left shift count >= width of type,
+                     // gcc reports error.
+                     (y == ~0u)
+#endif // _MSC_VER
+                );
+            z = 0;
+        }
+    } while (y & 1);
+
+    end_card = (last_card_word - &card_table [0])* card_word_width + z;
+    //dprintf (3, ("find_card: [%Ix, %Ix[ set", card, end_card));
+    dprintf (3, ("fc: [%Ix, %Ix[", card, end_card));
+    return TRUE;
+}
+
+
+    //because of heap expansion, computing end is complicated.
+uint8_t* compute_next_end (heap_segment* seg, uint8_t* low)
+{
+    if ((low >=  heap_segment_mem (seg)) &&
+        (low < heap_segment_allocated (seg)))
+        return low;
+    else
+        return heap_segment_allocated (seg);
+}
+
+uint8_t*
+gc_heap::compute_next_boundary (uint8_t* low, int gen_number,
+                                BOOL relocating)
+{
+    UNREFERENCED_PARAMETER(low);
+
+    //when relocating, the fault line is the plan start of the younger
+    //generation because the generation is promoted.
+    if (relocating && (gen_number == (settings.condemned_generation + 1)))
+    {
+        generation* gen = generation_of (gen_number - 1);
+        uint8_t* gen_alloc = generation_plan_allocation_start (gen);
+        assert (gen_alloc);
+        return gen_alloc;
+    }
+    else
+    {
+        assert (gen_number > settings.condemned_generation);
+        return generation_allocation_start (generation_of (gen_number - 1 ));
+    }
+
+}
+
+inline void
+gc_heap::keep_card_live (uint8_t* o, size_t& n_gen,
+                         size_t& cg_pointers_found)
+{
+    THREAD_FROM_HEAP;
+    if ((gc_low <= o) && (gc_high > o))
+    {
+        n_gen++;
+    }
+#ifdef MULTIPLE_HEAPS
+    else if (o)
+    {
+        gc_heap* hp = heap_of (o);
+        if (hp != this)
+        {
+            if ((hp->gc_low <= o) &&
+                (hp->gc_high > o))
+            {
+                n_gen++;
+            }
+        }
+    }
+#endif //MULTIPLE_HEAPS
+    cg_pointers_found ++;
+    dprintf (4, ("keep card live for %Ix", o));
+}
+
+inline void
+gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen,
+                                    size_t& cg_pointers_found,
+                                    card_fn fn, uint8_t* nhigh,
+                                    uint8_t* next_boundary)
+{
+    THREAD_FROM_HEAP;
+    if ((gc_low <= *poo) && (gc_high > *poo))
+    {
+        n_gen++;
+        call_fn(fn) (poo THREAD_NUMBER_ARG);
+    }
+#ifdef MULTIPLE_HEAPS
+    else if (*poo)
+    {
+        gc_heap* hp = heap_of_gc (*poo);
+        if (hp != this)
+        {
+            if ((hp->gc_low <= *poo) &&
+                (hp->gc_high > *poo))
+            {
+                n_gen++;
+                call_fn(fn) (poo THREAD_NUMBER_ARG);
+            }
+            if ((fn == &gc_heap::relocate_address) ||
+                ((hp->ephemeral_low <= *poo) &&
+                 (hp->ephemeral_high > *poo)))
+            {
+                cg_pointers_found++;
+            }
+        }
+    }
+#endif //MULTIPLE_HEAPS
+    if ((next_boundary <= *poo) && (nhigh > *poo))
+    {
+        cg_pointers_found ++;
+        dprintf (4, ("cg pointer %Ix found, %Id so far",
+                     (size_t)*poo, cg_pointers_found ));
+
+    }
+}
+
+BOOL gc_heap::card_transition (uint8_t* po, uint8_t* end, size_t card_word_end,
+                               size_t& cg_pointers_found, 
+                               size_t& n_eph, size_t& n_card_set,
+                               size_t& card, size_t& end_card,
+                               BOOL& foundp, uint8_t*& start_address,
+                               uint8_t*& limit, size_t& n_cards_cleared)
+{
+    dprintf (3, ("pointer %Ix past card %Ix", (size_t)po, (size_t)card));
+    dprintf (3, ("ct: %Id cg", cg_pointers_found));
+    BOOL passed_end_card_p = FALSE;
+    foundp = FALSE;
+
+    if (cg_pointers_found == 0)
+    {
+        //dprintf(3,(" Clearing cards [%Ix, %Ix[ ",
+        dprintf(3,(" CC [%Ix, %Ix[ ",
+                (size_t)card_address(card), (size_t)po));
+        clear_cards (card, card_of(po));
+        n_card_set -= (card_of (po) - card);
+        n_cards_cleared += (card_of (po) - card);
+
+    }
+    n_eph +=cg_pointers_found;
+    cg_pointers_found = 0;
+    card = card_of (po);
+    if (card >= end_card)
+    {
+        passed_end_card_p = TRUE;
+        dprintf (3, ("card %Ix exceeding end_card %Ix",
+                    (size_t)card, (size_t)end_card));
+        foundp = find_card (card_table, card, card_word_end, end_card);
+        if (foundp)
+        {
+            n_card_set+= end_card - card;
+            start_address = card_address (card);
+            dprintf (3, ("NewC: %Ix, start: %Ix, end: %Ix",
+                        (size_t)card, (size_t)start_address,
+                        (size_t)card_address (end_card)));
+        }
+        limit = min (end, card_address (end_card));
+
+        assert (!((limit == card_address (end_card))&&
+                card_set_p (end_card)));
+    }
+
+    return passed_end_card_p;
+}
+
+void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
+{
+#ifdef BACKGROUND_GC
+    dprintf (3, ("current_sweep_pos is %Ix, saved_sweep_ephemeral_seg is %Ix(%Ix)",
+                 current_sweep_pos, saved_sweep_ephemeral_seg, saved_sweep_ephemeral_start));
+    heap_segment* soh_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+    PREFIX_ASSUME(soh_seg  != NULL);
+    while (soh_seg )
+    {
+        dprintf (3, ("seg %Ix, bgc_alloc: %Ix, alloc: %Ix", 
+            soh_seg, 
+            heap_segment_background_allocated (soh_seg),
+            heap_segment_allocated (soh_seg)));
+        soh_seg = heap_segment_next_rw (soh_seg);
+    }
+#endif //BACKGROUND_GC
+
+    uint8_t* low = gc_low;
+    uint8_t* high = gc_high;
+    size_t  end_card          = 0;
+    generation*   oldest_gen        = generation_of (max_generation);
+    int           curr_gen_number   = max_generation;
+    uint8_t*         gen_boundary      = generation_allocation_start
+        (generation_of (curr_gen_number - 1));
+    uint8_t*         next_boundary     = (compute_next_boundary
+                                       (gc_low, curr_gen_number, relocating));
+    heap_segment* seg               = heap_segment_rw (generation_start_segment (oldest_gen));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    uint8_t*         beg               = generation_allocation_start (oldest_gen);
+    uint8_t*         end               = compute_next_end (seg, low);
+    uint8_t*         last_object       = beg;
+
+    size_t  cg_pointers_found = 0;
+
+    size_t  card_word_end = (card_of (align_on_card_word (end)) /
+                             card_word_width);
+
+    size_t        n_eph             = 0;
+    size_t        n_gen             = 0;
+    size_t        n_card_set        = 0;
+    uint8_t*         nhigh             = (relocating ?
+                                       heap_segment_plan_allocated (ephemeral_heap_segment) : high);
+
+    BOOL          foundp            = FALSE;
+    uint8_t*         start_address     = 0;
+    uint8_t*         limit             = 0;
+    size_t        card              = card_of (beg);
+#ifdef BACKGROUND_GC
+    BOOL consider_bgc_mark_p        = FALSE;
+    BOOL check_current_sweep_p      = FALSE;
+    BOOL check_saved_sweep_p        = FALSE;
+    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+
+    dprintf(3, ("CMs: %Ix->%Ix", (size_t)beg, (size_t)end));
+    size_t total_cards_cleared = 0;
+
+    while (1)
+    {
+        if (card_of(last_object) > card)
+        {
+            dprintf (3, ("Found %Id cg pointers", cg_pointers_found));
+            if (cg_pointers_found == 0)
+            {
+                dprintf(3,(" Clearing cards [%Ix, %Ix[ ", (size_t)card_address(card), (size_t)last_object));
+                clear_cards (card, card_of(last_object));
+                n_card_set -= (card_of (last_object) - card);
+                total_cards_cleared += (card_of (last_object) - card);
+            }
+            n_eph +=cg_pointers_found;
+            cg_pointers_found = 0;
+            card = card_of (last_object);
+        }
+        if (card >= end_card)
+        {
+            foundp = find_card (card_table, card, card_word_end, end_card);
+            if (foundp)
+            {
+                n_card_set+= end_card - card;
+                start_address = max (beg, card_address (card));
+            }
+            limit = min (end, card_address (end_card));
+        }
+        if ((!foundp) || (last_object >= end) || (card_address (card) >= end))
+        {
+            if ((foundp) && (cg_pointers_found == 0))
+            {
+                dprintf(3,(" Clearing cards [%Ix, %Ix[ ", (size_t)card_address(card),
+                           (size_t)end));
+                clear_cards (card, card_of (end));
+                n_card_set -= (card_of (end) - card);
+                total_cards_cleared += (card_of (end) - card);
+            }
+            n_eph +=cg_pointers_found;
+            cg_pointers_found = 0;
+            if ((seg = heap_segment_next_in_range (seg)) != 0)
+            {
+#ifdef BACKGROUND_GC
+                should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+                beg = heap_segment_mem (seg);
+                end = compute_next_end (seg, low);
+                card_word_end = card_of (align_on_card_word (end)) / card_word_width;
+                card = card_of (beg);
+                last_object = beg;
+                end_card = 0;
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        assert (card_set_p (card));
+        {
+            uint8_t*   o             = last_object;
+
+            o = find_first_object (start_address, last_object);
+                //Never visit an object twice.
+                assert (o >= last_object);
+
+                //dprintf(3,("Considering card %Ix start object: %Ix, %Ix[ boundary: %Ix",
+                dprintf(3, ("c: %Ix, o: %Ix, l: %Ix[ boundary: %Ix",
+                       card, (size_t)o, (size_t)limit, (size_t)gen_boundary));
+
+            while (o < limit)
+            {
+                assert (Align (size (o)) >= Align (min_obj_size));
+                size_t s = size (o);
+
+                uint8_t* next_o =  o + Align (s);
+                Prefetch (next_o);
+
+                if ((o >= gen_boundary) &&
+                    (seg == ephemeral_heap_segment))
+                {
+                    dprintf (3, ("switching gen boundary %Ix", (size_t)gen_boundary));
+                    curr_gen_number--;
+                    assert ((curr_gen_number > 0));
+                    gen_boundary = generation_allocation_start
+                        (generation_of (curr_gen_number - 1));
+                    next_boundary = (compute_next_boundary
+                                     (low, curr_gen_number, relocating));
+                }
+
+                dprintf (4, ("|%Ix|", (size_t)o));
+
+                if (next_o < start_address)
+                {
+                    goto end_object;
+                }
+
+#ifdef BACKGROUND_GC
+                if (!fgc_should_consider_object (o, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p))
+                {
+                    goto end_object;
+                }
+#endif //BACKGROUND_GC
+
+#ifdef COLLECTIBLE_CLASS
+                if (is_collectible(o))
+                {
+                    BOOL passed_end_card_p = FALSE;
+
+                    if (card_of (o) > card)
+                    {
+                        passed_end_card_p = card_transition (o, end, card_word_end,
+                            cg_pointers_found, 
+                            n_eph, n_card_set,
+                            card, end_card,
+                            foundp, start_address,
+                            limit, total_cards_cleared);
+                    }
+
+                    if ((!passed_end_card_p || foundp) && (card_of (o) == card))
+                    {
+                        // card is valid and it covers the head of the object
+                        if (fn == &gc_heap::relocate_address)
+                        {
+                            keep_card_live (o, n_gen, cg_pointers_found);
+                        }
+                        else
+                        {
+                            uint8_t* class_obj = get_class_object (o);
+                            mark_through_cards_helper (&class_obj, n_gen,
+                                                    cg_pointers_found, fn,
+                                                    nhigh, next_boundary);
+                        }
+                    }
+
+                    if (passed_end_card_p)
+                    {
+                        if (foundp && (card_address (card) < next_o))
+                        {
+                            goto go_through_refs;
+                        }
+                        else if (foundp && (start_address < limit))
+                        {
+                            next_o = find_first_object (start_address, o);
+                            goto end_object;
+                        }
+                        else
+                            goto end_limit;                            
+                    }
+                }
+
+go_through_refs:
+#endif //COLLECTIBLE_CLASS
+
+                if (contain_pointers (o))
+                {
+                    dprintf(3,("Going through %Ix start_address: %Ix", (size_t)o, (size_t)start_address));
+
+                    {
+                        dprintf (4, ("normal object path"));
+                        go_through_object
+                            (method_table(o), o, s, poo,
+                             start_address, use_start, (o + s),
+                             {
+                                 dprintf (4, ("<%Ix>:%Ix", (size_t)poo, (size_t)*poo));
+                                 if (card_of ((uint8_t*)poo) > card)
+                                 {
+                                    BOOL passed_end_card_p  = card_transition ((uint8_t*)poo, end,
+                                            card_word_end,
+                                            cg_pointers_found, 
+                                            n_eph, n_card_set,
+                                            card, end_card,
+                                            foundp, start_address,
+                                            limit, total_cards_cleared);
+
+                                     if (passed_end_card_p)
+                                     {
+                                        if (foundp && (card_address (card) < next_o))
+                                        {
+                                             //new_start();
+                                             {
+                                                 if (ppstop <= (uint8_t**)start_address)
+                                                     {break;}
+                                                 else if (poo < (uint8_t**)start_address)
+                                                     {poo = (uint8_t**)start_address;}
+                                             }
+                                        }
+                                        else if (foundp && (start_address < limit))
+                                        {
+                                            next_o = find_first_object (start_address, o);
+                                            goto end_object;
+                                        }
+                                         else
+                                            goto end_limit;
+                                     }
+                                 }
+
+                                 mark_through_cards_helper (poo, n_gen,
+                                                            cg_pointers_found, fn,
+                                                            nhigh, next_boundary);
+                             }
+                            );
+                    }
+                }
+
+            end_object:
+                if (((size_t)next_o / brick_size) != ((size_t) o / brick_size))
+                {
+                    if (brick_table [brick_of (o)] <0)
+                        fix_brick_to_highest (o, next_o);
+                }
+                o = next_o;
+            }
+        end_limit:
+            last_object = o;
+        }
+    }
+    // compute the efficiency ratio of the card table
+    if (!relocating)
+    {
+        generation_skip_ratio = ((n_eph > 400)? (int)(((float)n_gen / (float)n_eph) * 100) : 100);
+        dprintf (3, ("Msoh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", 
+            n_eph, n_gen , n_card_set, total_cards_cleared, generation_skip_ratio));
+    }
+    else
+    {
+        dprintf (3, ("R: Msoh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", 
+            n_gen, n_eph, n_card_set, total_cards_cleared, generation_skip_ratio));
+    }
+}
+
+#ifdef SEG_REUSE_STATS
+size_t gc_heap::dump_buckets (size_t* ordered_indices, int count, size_t* total_size)
+{
+    size_t total_items = 0;
+    *total_size = 0;
+    for (int i = 0; i < count; i++)
+    {
+        total_items += ordered_indices[i];
+        *total_size += ordered_indices[i] << (MIN_INDEX_POWER2 + i);
+        dprintf (SEG_REUSE_LOG_0, ("[%d]%4d 2^%2d", heap_number, ordered_indices[i], (MIN_INDEX_POWER2 + i)));
+    } 
+    dprintf (SEG_REUSE_LOG_0, ("[%d]Total %d items, total size is 0x%Ix", heap_number, total_items, *total_size));
+    return total_items;
+}
+#endif // SEG_REUSE_STATS
+
+void gc_heap::count_plug (size_t last_plug_size, uint8_t*& last_plug)
+{
+    // detect pinned plugs
+    if (!pinned_plug_que_empty_p() && (last_plug == pinned_plug (oldest_pin())))
+    {
+        deque_pinned_plug();
+        update_oldest_pinned_plug();
+        dprintf (3, ("dequed pin,now oldest pin is %Ix", pinned_plug (oldest_pin())));
+    }
+    else
+    {
+        size_t plug_size = last_plug_size + Align(min_obj_size);
+        BOOL is_padded = FALSE;
+
+#ifdef SHORT_PLUGS
+        plug_size += Align (min_obj_size);
+        is_padded = TRUE;
+#endif //SHORT_PLUGS
+
+#ifdef RESPECT_LARGE_ALIGNMENT
+        plug_size += switch_alignment_size (is_padded);
+#endif //RESPECT_LARGE_ALIGNMENT
+
+        total_ephemeral_plugs += plug_size;
+        size_t plug_size_power2 = round_up_power2 (plug_size);
+        ordered_plug_indices[relative_index_power2_plug (plug_size_power2)]++;
+        dprintf (SEG_REUSE_LOG_1, ("[%d]count_plug: adding 0x%Ix - %Id (2^%d) to ordered plug array", 
+            heap_number, 
+            last_plug, 
+            plug_size, 
+            (relative_index_power2_plug (plug_size_power2) + MIN_INDEX_POWER2)));
+    }
+}
+
+void gc_heap::count_plugs_in_brick (uint8_t* tree, uint8_t*& last_plug)
+{
+    assert ((tree != NULL));
+    if (node_left_child (tree))
+    {
+        count_plugs_in_brick (tree + node_left_child (tree), last_plug);
+    }
+
+    if (last_plug != 0)
+    {
+        uint8_t*  plug = tree;
+        size_t gap_size = node_gap_size (plug);
+        uint8_t*   gap = (plug - gap_size);
+        uint8_t*  last_plug_end = gap;
+        size_t  last_plug_size = (last_plug_end - last_plug);
+        dprintf (3, ("tree: %Ix, last plug: %Ix, gap size: %Ix, gap: %Ix, last plug size: %Ix",
+            tree, last_plug, gap_size, gap, last_plug_size));
+
+        if (tree == oldest_pinned_plug)
+        {
+            dprintf (3, ("tree %Ix is pinned, last plug is %Ix, size is %Ix",
+                tree, last_plug, last_plug_size));
+            mark* m = oldest_pin();
+            if (m->has_pre_plug_info())
+            {
+                last_plug_size += sizeof (gap_reloc_pair);
+                dprintf (3, ("pin %Ix has pre plug, adjusting plug size to %Ix", tree, last_plug_size));
+            }
+        }
+        // Can't assert here - if it's a pinned plug it can be less.
+        //assert (last_plug_size >= Align (min_obj_size));
+
+        count_plug (last_plug_size, last_plug);
+    }
+
+    last_plug = tree;
+
+    if (node_right_child (tree))
+    {
+        count_plugs_in_brick (tree + node_right_child (tree), last_plug);
+    }
+}
+
+void gc_heap::build_ordered_plug_indices ()
+{
+    memset (ordered_plug_indices, 0, sizeof(ordered_plug_indices));
+    memset (saved_ordered_plug_indices, 0, sizeof(saved_ordered_plug_indices));
+
+    uint8_t*  start_address = generation_limit (max_generation);
+    uint8_t* end_address = heap_segment_allocated (ephemeral_heap_segment);
+    size_t  current_brick = brick_of (start_address);
+    size_t  end_brick = brick_of (end_address - 1);
+    uint8_t* last_plug = 0;
+
+    //Look for the right pinned plug to start from.
+    reset_pinned_queue_bos();
+    while (!pinned_plug_que_empty_p())
+    {
+        mark* m = oldest_pin();
+        if ((m->first >= start_address) && (m->first < end_address))
+        {
+            dprintf (3, ("found a pin %Ix between %Ix and %Ix", m->first, start_address, end_address));
+
+            break;
+        }
+        else
+            deque_pinned_plug();
+    }
+    
+    update_oldest_pinned_plug();
+
+    while (current_brick <= end_brick)
+    {
+        int brick_entry =  brick_table [ current_brick ];
+        if (brick_entry >= 0)
+        {
+            count_plugs_in_brick (brick_address (current_brick) + brick_entry -1, last_plug);
+        }
+
+        current_brick++;
+    }
+
+    if (last_plug !=0)
+    {
+        count_plug (end_address - last_plug, last_plug);
+    }
+
+    // we need to make sure that after fitting all the existing plugs, we
+    // have big enough free space left to guarantee that the next allocation
+    // will succeed.
+    size_t extra_size = END_SPACE_AFTER_GC + Align (min_obj_size);
+    total_ephemeral_plugs += extra_size;
+    dprintf (SEG_REUSE_LOG_0, ("Making sure we can fit a large object after fitting all plugs"));
+    ordered_plug_indices[relative_index_power2_plug (round_up_power2 (extra_size))]++;
+    
+    memcpy (saved_ordered_plug_indices, ordered_plug_indices, sizeof(ordered_plug_indices));
+
+#ifdef SEG_REUSE_STATS
+    dprintf (SEG_REUSE_LOG_0, ("Plugs:"));
+    size_t total_plug_power2 = 0;
+    dump_buckets (ordered_plug_indices, MAX_NUM_BUCKETS, &total_plug_power2);
+    dprintf (SEG_REUSE_LOG_0, ("plugs: 0x%Ix (rounded up to 0x%Ix (%d%%))", 
+                total_ephemeral_plugs, 
+                total_plug_power2, 
+                (total_ephemeral_plugs ? 
+                    (total_plug_power2 * 100 / total_ephemeral_plugs) :
+                    0)));
+    dprintf (SEG_REUSE_LOG_0, ("-------------------"));
+#endif // SEG_REUSE_STATS
+}
+
+void gc_heap::init_ordered_free_space_indices ()
+{
+    memset (ordered_free_space_indices, 0, sizeof(ordered_free_space_indices));
+    memset (saved_ordered_free_space_indices, 0, sizeof(saved_ordered_free_space_indices));
+}
+
+void gc_heap::trim_free_spaces_indices ()
+{
+    trimmed_free_space_index = -1;
+    size_t max_count = max_free_space_items - 1;
+    size_t count = 0;
+    int i = 0;
+    for (i = (MAX_NUM_BUCKETS - 1); i >= 0; i--)
+    {
+        count += ordered_free_space_indices[i];
+
+        if (count >= max_count)
+        {
+            break;
+        }
+    }
+
+    ptrdiff_t extra_free_space_items = count - max_count;
+
+    if (extra_free_space_items > 0)
+    {
+        ordered_free_space_indices[i] -= extra_free_space_items;
+        free_space_items = max_count;
+        trimmed_free_space_index = i;
+    }
+    else
+    {
+        free_space_items = count;
+    }
+
+    if (i == -1)
+    {
+        i = 0;
+    }
+
+    free_space_buckets = MAX_NUM_BUCKETS - i;
+
+    for (--i; i >= 0; i--)
+    {
+        ordered_free_space_indices[i] = 0;
+    }
+
+    memcpy (saved_ordered_free_space_indices, 
+            ordered_free_space_indices,
+            sizeof(ordered_free_space_indices));
+}
+
+// We fit as many plugs as we can and update the number of plugs left and the number
+// of free spaces left.
+BOOL gc_heap::can_fit_in_spaces_p (size_t* ordered_blocks, int small_index, size_t* ordered_spaces, int big_index)
+{
+    assert (small_index <= big_index);
+    assert (big_index < MAX_NUM_BUCKETS);
+
+    size_t small_blocks = ordered_blocks[small_index];
+
+    if (small_blocks == 0)
+    {
+        return TRUE;
+    }
+
+    size_t big_spaces = ordered_spaces[big_index];
+
+    if (big_spaces == 0)
+    {
+        return FALSE;
+    }
+
+    dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting %Id 2^%d plugs into %Id 2^%d free spaces", 
+        heap_number,
+        small_blocks, (small_index + MIN_INDEX_POWER2),
+        big_spaces, (big_index + MIN_INDEX_POWER2)));
+
+    size_t big_to_small = big_spaces << (big_index - small_index);
+
+    ptrdiff_t extra_small_spaces = big_to_small - small_blocks;
+    dprintf (SEG_REUSE_LOG_1, ("[%d]%d 2^%d spaces can fit %d 2^%d blocks", 
+        heap_number,
+        big_spaces, (big_index + MIN_INDEX_POWER2), big_to_small, (small_index + MIN_INDEX_POWER2)));
+    BOOL can_fit = (extra_small_spaces >= 0);
+
+    if (can_fit) 
+    {
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Can fit with %d 2^%d extras blocks", 
+            heap_number,
+            extra_small_spaces, (small_index + MIN_INDEX_POWER2)));
+    }
+
+    int i = 0;
+
+    dprintf (SEG_REUSE_LOG_1, ("[%d]Setting # of 2^%d spaces to 0", heap_number, (big_index + MIN_INDEX_POWER2)));
+    ordered_spaces[big_index] = 0;
+    if (extra_small_spaces > 0)
+    {
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Setting # of 2^%d blocks to 0", heap_number, (small_index + MIN_INDEX_POWER2)));
+        ordered_blocks[small_index] = 0;
+        for (i = small_index; i < big_index; i++)
+        {
+            if (extra_small_spaces & 1)
+            {
+                dprintf (SEG_REUSE_LOG_1, ("[%d]Increasing # of 2^%d spaces from %d to %d", 
+                    heap_number,
+                    (i + MIN_INDEX_POWER2), ordered_spaces[i], (ordered_spaces[i] + 1)));
+                ordered_spaces[i] += 1;
+            }
+            extra_small_spaces >>= 1;
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Finally increasing # of 2^%d spaces from %d to %d", 
+            heap_number,
+            (i + MIN_INDEX_POWER2), ordered_spaces[i], (ordered_spaces[i] + extra_small_spaces)));
+        ordered_spaces[i] += extra_small_spaces;
+    }
+    else
+    {
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Decreasing # of 2^%d blocks from %d to %d", 
+            heap_number,
+            (small_index + MIN_INDEX_POWER2), 
+            ordered_blocks[small_index], 
+            (ordered_blocks[small_index] - big_to_small)));
+        ordered_blocks[small_index] -= big_to_small;
+    }
+
+#ifdef SEG_REUSE_STATS
+    size_t temp;
+    dprintf (SEG_REUSE_LOG_1, ("[%d]Plugs became:", heap_number));
+    dump_buckets (ordered_blocks, MAX_NUM_BUCKETS, &temp);
+
+    dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces became:", heap_number));
+    dump_buckets (ordered_spaces, MAX_NUM_BUCKETS, &temp);
+#endif //SEG_REUSE_STATS
+
+    return can_fit;
+}
+
+// space_index gets updated to the biggest available space index.
+BOOL gc_heap::can_fit_blocks_p (size_t* ordered_blocks, int block_index, size_t* ordered_spaces, int* space_index)
+{
+    assert (*space_index >= block_index);
+
+    while (!can_fit_in_spaces_p (ordered_blocks, block_index, ordered_spaces, *space_index))
+    {
+        (*space_index)--;
+        if (*space_index < block_index)
+        {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+BOOL gc_heap::can_fit_all_blocks_p (size_t* ordered_blocks, size_t* ordered_spaces, int count)
+{
+#ifdef FEATURE_STRUCTALIGN
+    // BARTOKTODO (4841): reenable when can_fit_in_spaces_p takes alignment requirements into account
+    return FALSE;
+#endif // FEATURE_STRUCTALIGN
+    int space_index = count - 1;
+    for (int block_index = (count - 1); block_index >= 0; block_index--)
+    {
+        if (!can_fit_blocks_p (ordered_blocks, block_index, ordered_spaces, &space_index))
+        {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+void gc_heap::build_ordered_free_spaces (heap_segment* seg)
+{
+    assert (bestfit_seg);
+
+    //bestfit_seg->add_buckets (MAX_NUM_BUCKETS - free_space_buckets + MIN_INDEX_POWER2, 
+    //                    ordered_free_space_indices + (MAX_NUM_BUCKETS - free_space_buckets), 
+    //                    free_space_buckets, 
+    //                    free_space_items);
+
+    bestfit_seg->add_buckets (MIN_INDEX_POWER2, 
+                        ordered_free_space_indices, 
+                        MAX_NUM_BUCKETS, 
+                        free_space_items);
+
+    assert (settings.condemned_generation == max_generation);
+
+    uint8_t* first_address = heap_segment_mem (seg);
+    uint8_t* end_address   = heap_segment_reserved (seg);
+    //look through the pinned plugs for relevant ones.
+    //Look for the right pinned plug to start from.
+    reset_pinned_queue_bos();
+    mark* m = 0;
+    // See comment in can_expand_into_p why we need (max_generation + 1).
+    size_t eph_gen_starts = (Align (min_obj_size)) * (max_generation + 1);
+    BOOL has_fit_gen_starts = FALSE;
+
+    while (!pinned_plug_que_empty_p())
+    {
+        m = oldest_pin();
+        if ((pinned_plug (m) >= first_address) && 
+            (pinned_plug (m) < end_address) &&
+            (pinned_len (m) >= eph_gen_starts))
+        {
+
+            assert ((pinned_plug (m) - pinned_len (m)) == bestfit_first_pin);
+            break;
+        }
+        else
+        {
+            deque_pinned_plug();
+        }
+    }
+
+    if (!pinned_plug_que_empty_p())
+    {
+        bestfit_seg->add ((void*)m, TRUE, TRUE);
+        deque_pinned_plug();
+        m = oldest_pin();
+        has_fit_gen_starts = TRUE;
+    }
+
+    while (!pinned_plug_que_empty_p() &&
+            ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address)))
+    {
+        bestfit_seg->add ((void*)m, TRUE, FALSE);
+        deque_pinned_plug();
+        m = oldest_pin();
+    }
+
+    if (commit_end_of_seg)
+    {
+        if (!has_fit_gen_starts)
+        {
+            assert (bestfit_first_pin == heap_segment_plan_allocated (seg));
+        }
+        bestfit_seg->add ((void*)seg, FALSE, (!has_fit_gen_starts));
+    }
+
+#ifdef _DEBUG
+    bestfit_seg->check();
+#endif //_DEBUG
+}
+
+BOOL gc_heap::try_best_fit (BOOL end_of_segment_p)
+{
+    if (!end_of_segment_p)
+    {
+        trim_free_spaces_indices ();
+    }
+
+    BOOL can_bestfit = can_fit_all_blocks_p (ordered_plug_indices, 
+                                             ordered_free_space_indices, 
+                                             MAX_NUM_BUCKETS);
+
+    return can_bestfit;
+}
+
+BOOL gc_heap::best_fit (size_t free_space, 
+                        size_t largest_free_space, 
+                        size_t additional_space, 
+                        BOOL* use_additional_space)
+{
+    dprintf (SEG_REUSE_LOG_0, ("gen%d: trying best fit mechanism", settings.condemned_generation));
+
+    assert (!additional_space || (additional_space && use_additional_space));
+    if (use_additional_space)
+    {
+        *use_additional_space = FALSE;
+    }
+
+    if (ordered_plug_indices_init == FALSE)
+    {
+        total_ephemeral_plugs = 0;
+        build_ordered_plug_indices();
+        ordered_plug_indices_init = TRUE;
+    }
+    else
+    {
+        memcpy (ordered_plug_indices, saved_ordered_plug_indices, sizeof(ordered_plug_indices));
+    }
+
+    if (total_ephemeral_plugs == (END_SPACE_AFTER_GC + Align (min_obj_size)))
+    {
+        dprintf (SEG_REUSE_LOG_0, ("No ephemeral plugs to realloc, done"));
+        size_t empty_eph = (END_SPACE_AFTER_GC + Align (min_obj_size) + (Align (min_obj_size)) * (max_generation + 1));
+        BOOL can_fit_empty_eph = (largest_free_space >= empty_eph);
+        if (!can_fit_empty_eph)
+        {
+            can_fit_empty_eph = (additional_space >= empty_eph);
+
+            if (can_fit_empty_eph)
+            {
+                *use_additional_space = TRUE;
+            }
+        }
+
+        return can_fit_empty_eph;
+    }
+
+    if ((total_ephemeral_plugs + approximate_new_allocation()) >= (free_space + additional_space))
+    {
+        dprintf (SEG_REUSE_LOG_0, ("We won't have enough free space left in this segment after fitting, done"));
+        return FALSE;
+    }
+
+    if ((free_space + additional_space) == 0)
+    {
+        dprintf (SEG_REUSE_LOG_0, ("No free space in this segment, done"));
+        return FALSE;
+    }
+
+#ifdef SEG_REUSE_STATS
+    dprintf (SEG_REUSE_LOG_0, ("Free spaces:"));
+    size_t total_free_space_power2 = 0;
+    size_t total_free_space_items = 
+        dump_buckets (ordered_free_space_indices, 
+                      MAX_NUM_BUCKETS,
+                      &total_free_space_power2);
+    dprintf (SEG_REUSE_LOG_0, ("currently max free spaces is %Id", max_free_space_items));
+
+    dprintf (SEG_REUSE_LOG_0, ("Ephemeral plugs: 0x%Ix, free space: 0x%Ix (rounded down to 0x%Ix (%Id%%)), additional free_space: 0x%Ix",
+                total_ephemeral_plugs, 
+                free_space, 
+                total_free_space_power2, 
+                (free_space ? (total_free_space_power2 * 100 / free_space) : 0),
+                additional_space));
+
+    size_t saved_all_free_space_indices[MAX_NUM_BUCKETS];
+    memcpy (saved_all_free_space_indices, 
+            ordered_free_space_indices, 
+            sizeof(saved_all_free_space_indices));
+
+#endif // SEG_REUSE_STATS
+
+    if (total_ephemeral_plugs > (free_space + additional_space))
+    {
+        return FALSE;
+    }
+
+    use_bestfit = try_best_fit(FALSE);
+
+    if (!use_bestfit && additional_space)
+    {
+        int relative_free_space_index = relative_index_power2_free_space (round_down_power2 (additional_space));
+
+        if (relative_free_space_index != -1)
+        {
+            int relative_plug_index = 0;
+            size_t plugs_to_fit = 0;
+
+            for (relative_plug_index = (MAX_NUM_BUCKETS - 1); relative_plug_index >= 0; relative_plug_index--)
+            {
+                plugs_to_fit = ordered_plug_indices[relative_plug_index];
+                if (plugs_to_fit != 0)
+                {
+                    break;
+                }
+            }
+
+            if ((relative_plug_index > relative_free_space_index) ||
+                ((relative_plug_index == relative_free_space_index) &&
+                (plugs_to_fit > 1)))
+            {
+#ifdef SEG_REUSE_STATS
+                dprintf (SEG_REUSE_LOG_0, ("additional space is 2^%d but we stopped at %d 2^%d plug(s)",
+                            (relative_free_space_index + MIN_INDEX_POWER2),
+                            plugs_to_fit,
+                            (relative_plug_index + MIN_INDEX_POWER2)));
+#endif // SEG_REUSE_STATS
+                goto adjust;
+            }
+            
+            dprintf (SEG_REUSE_LOG_0, ("Adding end of segment (2^%d)", (relative_free_space_index + MIN_INDEX_POWER2)));
+            ordered_free_space_indices[relative_free_space_index]++;
+            use_bestfit = try_best_fit(TRUE);
+            if (use_bestfit)
+            {
+                free_space_items++;
+                // Since we might've trimmed away some of the free spaces we had, we should see
+                // if we really need to use end of seg space - if it's the same or smaller than
+                // the largest space we trimmed we can just add that one back instead of 
+                // using end of seg.
+                if (relative_free_space_index > trimmed_free_space_index)
+                {
+                    *use_additional_space = TRUE;
+                }
+                else 
+                {
+                    // If the addition space is <= than the last trimmed space, we
+                    // should just use that last trimmed space instead.
+                    saved_ordered_free_space_indices[trimmed_free_space_index]++;
+                }
+            }
+        }
+    }
+
+adjust:
+
+    if (!use_bestfit)
+    {
+        dprintf (SEG_REUSE_LOG_0, ("couldn't fit..."));
+
+#ifdef SEG_REUSE_STATS
+        size_t saved_max = max_free_space_items;
+        BOOL temp_bestfit = FALSE;
+
+        dprintf (SEG_REUSE_LOG_0, ("----Starting experiment process----"));
+        dprintf (SEG_REUSE_LOG_0, ("----Couldn't fit with max free items %Id", max_free_space_items));
+
+        // TODO: need to take the end of segment into consideration.
+        while (max_free_space_items <= total_free_space_items)
+        {
+            max_free_space_items += max_free_space_items / 2;
+            dprintf (SEG_REUSE_LOG_0, ("----Temporarily increasing max free spaces to %Id", max_free_space_items));
+            memcpy (ordered_free_space_indices, 
+                    saved_all_free_space_indices,
+                    sizeof(ordered_free_space_indices));
+            if (try_best_fit(FALSE))
+            {
+                temp_bestfit = TRUE;
+                break;
+            }
+        }
+
+        if (temp_bestfit)
+        {
+            dprintf (SEG_REUSE_LOG_0, ("----With %Id max free spaces we could fit", max_free_space_items));
+        }
+        else
+        {
+            dprintf (SEG_REUSE_LOG_0, ("----Tried all free spaces and still couldn't fit, lost too much space"));
+        }
+
+        dprintf (SEG_REUSE_LOG_0, ("----Restoring max free spaces to %Id", saved_max));
+        max_free_space_items = saved_max;
+#endif // SEG_REUSE_STATS
+        if (free_space_items)
+        {
+            max_free_space_items = min (MAX_NUM_FREE_SPACES, free_space_items * 2);
+            max_free_space_items = max (max_free_space_items, MIN_NUM_FREE_SPACES);
+        }
+        else
+        {
+            max_free_space_items = MAX_NUM_FREE_SPACES;
+        }
+    }
+
+    dprintf (SEG_REUSE_LOG_0, ("Adjusted number of max free spaces to %Id", max_free_space_items));
+    dprintf (SEG_REUSE_LOG_0, ("------End of best fitting process------\n"));
+
+    return use_bestfit;
+}
+
+BOOL gc_heap::process_free_space (heap_segment* seg, 
+                         size_t free_space,
+                         size_t min_free_size, 
+                         size_t min_cont_size,
+                         size_t* total_free_space,
+                         size_t* largest_free_space)
+{
+    *total_free_space += free_space;
+    *largest_free_space = max (*largest_free_space, free_space);
+
+#ifdef SIMPLE_DPRINTF
+    dprintf (SEG_REUSE_LOG_1, ("free space len: %Ix, total free space: %Ix, largest free space: %Ix", 
+                free_space, *total_free_space, *largest_free_space));
+#endif //SIMPLE_DPRINTF
+
+    if ((*total_free_space >= min_free_size) && (*largest_free_space >= min_cont_size))
+    {
+#ifdef SIMPLE_DPRINTF
+        dprintf (SEG_REUSE_LOG_0, ("(gen%d)total free: %Ix(min: %Ix), largest free: %Ix(min: %Ix). Found segment %Ix to reuse without bestfit", 
+            settings.condemned_generation,
+            *total_free_space, min_free_size, *largest_free_space, min_cont_size,
+            (size_t)seg));
+#else
+        UNREFERENCED_PARAMETER(seg);
+#endif //SIMPLE_DPRINTF
+        return TRUE;
+    }
+
+    int free_space_index = relative_index_power2_free_space (round_down_power2 (free_space));
+    if (free_space_index != -1)
+    {
+        ordered_free_space_indices[free_space_index]++;
+    }
+    return FALSE;
+}
+
+BOOL gc_heap::expand_reused_seg_p()
+{
+    BOOL reused_seg = FALSE;
+    int heap_expand_mechanism = gc_data_per_heap.get_mechanism (gc_heap_expand);
+    if ((heap_expand_mechanism == expand_reuse_bestfit) || 
+        (heap_expand_mechanism == expand_reuse_normal))
+    {
+        reused_seg = TRUE;
+    }
+
+    return reused_seg;
+}
+
+BOOL gc_heap::can_expand_into_p (heap_segment* seg, size_t min_free_size, size_t min_cont_size,
+                                 allocator* gen_allocator)
+{
+    min_cont_size += END_SPACE_AFTER_GC;
+    use_bestfit = FALSE;
+    commit_end_of_seg = FALSE;
+    bestfit_first_pin = 0;
+    uint8_t* first_address = heap_segment_mem (seg);
+    uint8_t* end_address   = heap_segment_reserved (seg);
+    size_t end_extra_space = end_space_after_gc();
+
+    if ((heap_segment_reserved (seg) - end_extra_space) <= heap_segment_plan_allocated (seg))
+    {
+        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: can't use segment [%Ix %Ix, has less than %d bytes at the end",
+                                   first_address, end_address, end_extra_space));
+        return FALSE;
+    }
+
+    end_address -= end_extra_space;
+
+    dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p(gen%d): min free: %Ix, min continuous: %Ix", 
+        settings.condemned_generation, min_free_size, min_cont_size));
+    size_t eph_gen_starts = eph_gen_starts_size;
+
+    if (settings.condemned_generation == max_generation)
+    {
+        size_t free_space = 0;
+        size_t largest_free_space = free_space;
+        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: gen2: testing segment [%Ix %Ix", first_address, end_address));
+        //Look through the pinned plugs for relevant ones and Look for the right pinned plug to start from. 
+        //We are going to allocate the generation starts in the 1st free space,
+        //so start from the first free space that's big enough for gen starts and a min object size.
+        // If we see a free space that is >= gen starts but < gen starts + min obj size we just don't use it - 
+        // we could use it by allocating the last generation start a bit bigger but 
+        // the complexity isn't worth the effort (those plugs are from gen2 
+        // already anyway).
+        reset_pinned_queue_bos();
+        mark* m = 0;
+        BOOL has_fit_gen_starts = FALSE;
+
+        init_ordered_free_space_indices ();
+        while (!pinned_plug_que_empty_p())
+        {
+            m = oldest_pin();
+            if ((pinned_plug (m) >= first_address) && 
+                (pinned_plug (m) < end_address) &&
+                (pinned_len (m) >= (eph_gen_starts + Align (min_obj_size))))
+            {
+                break;
+            }
+            else
+            {
+                deque_pinned_plug();
+            }
+        }
+
+        if (!pinned_plug_que_empty_p())
+        {
+            bestfit_first_pin = pinned_plug (m) - pinned_len (m);
+
+            if (process_free_space (seg, 
+                                    pinned_len (m) - eph_gen_starts, 
+                                    min_free_size, min_cont_size, 
+                                    &free_space, &largest_free_space))
+            {
+                return TRUE;
+            }
+
+            deque_pinned_plug();
+            m = oldest_pin();
+            has_fit_gen_starts = TRUE;
+        }
+
+        dprintf (3, ("first pin is %Ix", pinned_plug (m)));
+
+        //tally up free space
+        while (!pinned_plug_que_empty_p() &&
+               ((pinned_plug (m) >= first_address) && (pinned_plug (m) < end_address)))
+        {
+            dprintf (3, ("looking at pin %Ix", pinned_plug (m)));
+            if (process_free_space (seg, 
+                                    pinned_len (m), 
+                                    min_free_size, min_cont_size, 
+                                    &free_space, &largest_free_space))
+            {
+                return TRUE;
+            }
+
+            deque_pinned_plug();
+            m = oldest_pin();
+        }
+
+        //try to find space at the end of the segment. 
+        size_t end_space = (end_address - heap_segment_plan_allocated (seg)); 
+        size_t additional_space = ((min_free_size > free_space) ? (min_free_size - free_space) : 0); 
+        dprintf (SEG_REUSE_LOG_0, ("end space: %Ix; additional: %Ix", end_space, additional_space));
+        if (end_space >= additional_space)
+        {
+            BOOL can_fit = TRUE;
+            commit_end_of_seg = TRUE;
+
+            if (largest_free_space < min_cont_size)
+            {
+                if (end_space >= min_cont_size)
+                {
+                    additional_space = max (min_cont_size, additional_space);
+                    dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %Ix to reuse without bestfit, with committing end of seg for eph", 
+                        seg));
+                }
+                else 
+                {
+                    if (settings.concurrent)
+                    {
+                        can_fit = FALSE;
+                        commit_end_of_seg = FALSE;
+                    }
+                    else
+                    {
+                        size_t additional_space_bestfit = additional_space;
+                        if (!has_fit_gen_starts)
+                        {
+                            if (additional_space_bestfit < (eph_gen_starts + Align (min_obj_size)))
+                            {
+                                dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, gen starts not allocated yet and end space is too small: %Id",
+                                        additional_space_bestfit));
+                                return FALSE;
+                            }
+
+                            bestfit_first_pin = heap_segment_plan_allocated (seg);
+                            additional_space_bestfit -= eph_gen_starts;
+                        }
+
+                        can_fit = best_fit (free_space, 
+                                            largest_free_space,
+                                            additional_space_bestfit, 
+                                            &commit_end_of_seg);
+
+                        if (can_fit)
+                        {
+                            dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %Ix to reuse with bestfit, %s committing end of seg", 
+                                seg, (commit_end_of_seg ? "with" : "without")));
+                        }
+                        else
+                        {
+                            dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, total free space is %Ix", (free_space + end_space)));
+                        }
+                    }
+                }
+            }
+            else
+            {
+                dprintf (SEG_REUSE_LOG_0, ("(gen2)Found segment %Ix to reuse without bestfit, with committing end of seg", seg));
+            }
+
+            assert (additional_space <= end_space);
+            if (commit_end_of_seg)
+            {
+                if (!grow_heap_segment (seg, heap_segment_plan_allocated (seg) + additional_space))
+                {
+                    dprintf (2, ("Couldn't commit end of segment?!"));
+                    use_bestfit = FALSE;
+ 
+                    return FALSE;
+                }
+
+                if (use_bestfit)
+                {
+                    // We increase the index here because growing heap segment could create a discrepency with 
+                    // the additional space we used (could be bigger).
+                    size_t free_space_end_of_seg = 
+                        heap_segment_committed (seg) - heap_segment_plan_allocated (seg);
+                    int relative_free_space_index = relative_index_power2_free_space (round_down_power2 (free_space_end_of_seg));
+                    saved_ordered_free_space_indices[relative_free_space_index]++;
+                }
+            }
+        
+            if (use_bestfit)
+            {
+                memcpy (ordered_free_space_indices, 
+                        saved_ordered_free_space_indices, 
+                        sizeof(ordered_free_space_indices));
+                max_free_space_items = max (MIN_NUM_FREE_SPACES, free_space_items * 3 / 2);
+                max_free_space_items = min (MAX_NUM_FREE_SPACES, max_free_space_items);
+                dprintf (SEG_REUSE_LOG_0, ("could fit! %Id free spaces, %Id max", free_space_items, max_free_space_items));
+            }
+
+            return can_fit;
+        }
+
+        dprintf (SEG_REUSE_LOG_0, ("(gen2)Couldn't fit, total free space is %Ix", (free_space + end_space)));
+        return FALSE;
+    }
+    else
+    {
+        assert (settings.condemned_generation == (max_generation-1));
+        size_t free_space = (end_address - heap_segment_plan_allocated (seg));
+        size_t largest_free_space = free_space;
+        dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: gen1: testing segment [%Ix %Ix", first_address, end_address));
+        //find the first free list in range of the current segment
+        size_t sz_list = gen_allocator->first_bucket_size();
+        unsigned int a_l_idx = 0;
+        uint8_t* free_list = 0;
+        for (; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
+        {
+            if ((eph_gen_starts <= sz_list) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
+            {
+                free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+                while (free_list)
+                {
+                    if ((free_list >= first_address) && 
+                        (free_list < end_address) && 
+                        (unused_array_size (free_list) >= eph_gen_starts))
+                    {
+                        goto next;
+                    }
+                    else
+                    {
+                        free_list = free_list_slot (free_list);
+                    }
+                }
+            }
+        }
+next:
+        if (free_list)
+        {
+            init_ordered_free_space_indices ();
+            if (process_free_space (seg, 
+                                    unused_array_size (free_list) - eph_gen_starts + Align (min_obj_size), 
+                                    min_free_size, min_cont_size, 
+                                    &free_space, &largest_free_space))
+            {
+                return TRUE;
+            }
+
+            free_list = free_list_slot (free_list);
+        }
+        else
+        {
+            dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, no free list"));
+            return FALSE;
+        }
+
+       //tally up free space
+
+        while (1)
+        {
+            while (free_list)
+            {
+                if ((free_list >= first_address) && (free_list < end_address) &&
+                    process_free_space (seg, 
+                                        unused_array_size (free_list), 
+                                        min_free_size, min_cont_size, 
+                                        &free_space, &largest_free_space))
+                {
+                    return TRUE;
+                }
+
+                free_list = free_list_slot (free_list);
+            }
+            a_l_idx++;
+            if (a_l_idx < gen_allocator->number_of_buckets())
+            {
+                free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+            }
+            else
+                break;
+        } 
+
+        dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, total free space is %Ix", free_space));
+        return FALSE;
+
+        /*
+        BOOL can_fit = best_fit (free_space, 0, NULL);
+        if (can_fit)
+        {
+            dprintf (SEG_REUSE_LOG_0, ("(gen1)Found segment %Ix to reuse with bestfit", seg));
+        }
+        else
+        {
+            dprintf (SEG_REUSE_LOG_0, ("(gen1)Couldn't fit, total free space is %Ix", free_space));
+        }
+
+        return can_fit;
+        */
+    }
+}
+
+void gc_heap::realloc_plug (size_t last_plug_size, uint8_t*& last_plug,
+                            generation* gen, uint8_t* start_address,
+                            unsigned int& active_new_gen_number,
+                            uint8_t*& last_pinned_gap, BOOL& leftp,
+                            BOOL shortened_p
+#ifdef SHORT_PLUGS
+                            , mark* pinned_plug_entry
+#endif //SHORT_PLUGS
+                            )
+{
+    // detect generation boundaries
+    // make sure that active_new_gen_number is not the youngest generation.
+    // because the generation_limit wouldn't return the right thing in this case.
+    if (!use_bestfit)
+    {
+        if ((active_new_gen_number > 1) &&
+            (last_plug >= generation_limit (active_new_gen_number)))
+        {
+            assert (last_plug >= start_address);
+            active_new_gen_number--;
+            realloc_plan_generation_start (generation_of (active_new_gen_number), gen);
+            assert (generation_plan_allocation_start (generation_of (active_new_gen_number)));
+            leftp = FALSE;
+        }
+    }
+
+    // detect pinned plugs
+    if (!pinned_plug_que_empty_p() && (last_plug == pinned_plug (oldest_pin())))
+    {
+        size_t  entry = deque_pinned_plug();
+        mark*  m = pinned_plug_of (entry);
+
+        size_t saved_pinned_len = pinned_len(m);
+        pinned_len(m) = last_plug - last_pinned_gap;
+        //dprintf (3,("Adjusting pinned gap: [%Ix, %Ix[", (size_t)last_pinned_gap, (size_t)last_plug));
+
+        if (m->has_post_plug_info())
+        {
+            last_plug_size += sizeof (gap_reloc_pair);
+            dprintf (3, ("ra pinned %Ix was shortened, adjusting plug size to %Ix", last_plug, last_plug_size))
+        }
+
+        last_pinned_gap = last_plug + last_plug_size;
+        dprintf (3, ("ra found pin %Ix, len: %Ix->%Ix, last_p: %Ix, last_p_size: %Ix",
+            pinned_plug (m), saved_pinned_len, pinned_len (m), last_plug, last_plug_size));
+        leftp = FALSE;
+
+        //we are creating a generation fault. set the cards.
+        {
+            size_t end_card = card_of (align_on_card (last_plug + last_plug_size));
+            size_t card = card_of (last_plug);
+            while (card != end_card)
+            {
+                set_card (card);
+                card++;
+            }
+        }
+    }
+    else if (last_plug >= start_address)
+    {
+#ifdef FEATURE_STRUCTALIGN
+        int requiredAlignment;
+        ptrdiff_t pad;
+        node_aligninfo (last_plug, requiredAlignment, pad);
+
+        // from how we previously aligned the plug's destination address,
+        // compute the actual alignment offset.
+        uint8_t* reloc_plug = last_plug + node_relocation_distance (last_plug);
+        ptrdiff_t alignmentOffset = ComputeStructAlignPad(reloc_plug, requiredAlignment, 0);
+        if (!alignmentOffset)
+        {
+            // allocate_in_expanded_heap doesn't expect alignmentOffset to be zero.
+            alignmentOffset = requiredAlignment;
+        }
+
+        //clear the alignment info because we are reallocating
+        clear_node_aligninfo (last_plug);
+#else // FEATURE_STRUCTALIGN
+        //clear the realignment flag because we are reallocating
+        clear_node_realigned (last_plug);
+#endif // FEATURE_STRUCTALIGN
+        BOOL adjacentp = FALSE;
+        BOOL set_padding_on_saved_p = FALSE;
+
+        if (shortened_p)
+        {
+            last_plug_size += sizeof (gap_reloc_pair);
+
+#ifdef SHORT_PLUGS
+            assert (pinned_plug_entry != NULL);
+            if (last_plug_size <= sizeof (plug_and_gap))
+            {
+                set_padding_on_saved_p = TRUE;
+            }
+#endif //SHORT_PLUGS
+
+            dprintf (3, ("ra plug %Ix was shortened, adjusting plug size to %Ix", last_plug, last_plug_size))
+        }
+
+#ifdef SHORT_PLUGS
+        clear_padding_in_expand (last_plug, set_padding_on_saved_p, pinned_plug_entry);
+#endif //SHORT_PLUGS
+
+        uint8_t* new_address = allocate_in_expanded_heap(gen, last_plug_size, adjacentp, last_plug,
+#ifdef SHORT_PLUGS
+                                     set_padding_on_saved_p,
+                                     pinned_plug_entry,
+#endif //SHORT_PLUGS
+                                     TRUE, active_new_gen_number REQD_ALIGN_AND_OFFSET_ARG);
+
+        dprintf (3, ("ra NA: [%Ix, %Ix[: %Ix", new_address, (new_address + last_plug_size), last_plug_size));
+        assert (new_address);
+        set_node_relocation_distance (last_plug, new_address - last_plug);
+#ifdef FEATURE_STRUCTALIGN
+        if (leftp && node_alignpad (last_plug) == 0)
+#else // FEATURE_STRUCTALIGN
+        if (leftp && !node_realigned (last_plug))
+#endif // FEATURE_STRUCTALIGN
+        {
+            // TODO - temporarily disable L optimization because of a bug in it.
+            //set_node_left (last_plug);
+        }
+        dprintf (3,(" Re-allocating %Ix->%Ix len %Id", (size_t)last_plug, (size_t)new_address, last_plug_size));
+        leftp = adjacentp;
+    }
+}
+
+void gc_heap::realloc_in_brick (uint8_t* tree, uint8_t*& last_plug,
+                                uint8_t* start_address,
+                                generation* gen,
+                                unsigned int& active_new_gen_number,
+                                uint8_t*& last_pinned_gap, BOOL& leftp)
+{
+    assert (tree != NULL);
+    int   left_node = node_left_child (tree);
+    int   right_node = node_right_child (tree);
+
+    dprintf (3, ("ra: tree: %Ix, last_pin_gap: %Ix, last_p: %Ix, L: %d, R: %d", 
+        tree, last_pinned_gap, last_plug, left_node, right_node));
+
+    if (left_node)
+    {
+        dprintf (3, ("LN: realloc %Ix(%Ix)", (tree + left_node), last_plug));
+        realloc_in_brick ((tree + left_node), last_plug, start_address,
+                          gen, active_new_gen_number, last_pinned_gap,
+                          leftp);
+    }
+
+    if (last_plug != 0)
+    {
+        uint8_t*  plug = tree;
+
+        BOOL has_pre_plug_info_p = FALSE;
+        BOOL has_post_plug_info_p = FALSE;
+        mark* pinned_plug_entry = get_next_pinned_entry (tree, 
+                                                         &has_pre_plug_info_p,
+                                                         &has_post_plug_info_p, 
+                                                         FALSE);
+
+        // We only care about the pre plug info 'cause that's what decides if the last plug is shortened.
+        // The pinned plugs are handled in realloc_plug.
+        size_t gap_size = node_gap_size (plug);
+        uint8_t*   gap = (plug - gap_size);
+        uint8_t*  last_plug_end = gap;
+        size_t  last_plug_size = (last_plug_end - last_plug);
+        // Cannot assert this - a plug could be less than that due to the shortened ones.
+        //assert (last_plug_size >= Align (min_obj_size));
+        dprintf (3, ("ra: plug %Ix, gap size: %Ix, last_pin_gap: %Ix, last_p: %Ix, last_p_end: %Ix, shortened: %d",
+            plug, gap_size, last_pinned_gap, last_plug, last_plug_end, (has_pre_plug_info_p ? 1 : 0)));
+        realloc_plug (last_plug_size, last_plug, gen, start_address,
+                      active_new_gen_number, last_pinned_gap,
+                      leftp, has_pre_plug_info_p
+#ifdef SHORT_PLUGS
+                      , pinned_plug_entry
+#endif //SHORT_PLUGS
+                      );
+    }
+
+    last_plug = tree;
+
+    if (right_node)
+    {
+        dprintf (3, ("RN: realloc %Ix(%Ix)", (tree + right_node), last_plug));
+        realloc_in_brick ((tree + right_node), last_plug, start_address,
+                          gen, active_new_gen_number, last_pinned_gap,
+                          leftp);
+    }
+}
+
+void
+gc_heap::realloc_plugs (generation* consing_gen, heap_segment* seg,
+                        uint8_t* start_address, uint8_t* end_address,
+                        unsigned active_new_gen_number)
+{
+    dprintf (3, ("--- Reallocing ---"));
+
+    if (use_bestfit)
+    {
+        //make sure that every generation has a planned allocation start
+        int  gen_number = max_generation - 1;
+        while (gen_number >= 0)
+        {
+            generation* gen = generation_of (gen_number);
+            if (0 == generation_plan_allocation_start (gen))
+            {
+                generation_plan_allocation_start (gen) = 
+                    bestfit_first_pin + (max_generation - gen_number - 1) * Align (min_obj_size);
+                generation_plan_allocation_start_size (gen) = Align (min_obj_size);
+                assert (generation_plan_allocation_start (gen));
+            }
+            gen_number--;
+        }
+    }
+
+    uint8_t* first_address = start_address;
+    //Look for the right pinned plug to start from.
+    reset_pinned_queue_bos();
+    uint8_t* planned_ephemeral_seg_end = heap_segment_plan_allocated (seg);
+    while (!pinned_plug_que_empty_p())
+    {
+        mark* m = oldest_pin();
+        if ((pinned_plug (m) >= planned_ephemeral_seg_end) && (pinned_plug (m) < end_address))
+        {
+            if (pinned_plug (m) < first_address)
+            {
+                first_address = pinned_plug (m);
+            }
+            break;
+        }
+        else
+            deque_pinned_plug();
+    }
+
+    size_t  current_brick = brick_of (first_address);
+    size_t  end_brick = brick_of (end_address-1);
+    uint8_t*  last_plug = 0;
+
+    uint8_t* last_pinned_gap = heap_segment_plan_allocated (seg);
+    BOOL leftp = FALSE;
+
+    dprintf (3, ("start addr: %Ix, first addr: %Ix, current oldest pin: %Ix",
+        start_address, first_address, pinned_plug (oldest_pin())));
+
+    while (current_brick <= end_brick)
+    {
+        int   brick_entry =  brick_table [ current_brick ];
+        if (brick_entry >= 0)
+        {
+            realloc_in_brick ((brick_address (current_brick) + brick_entry - 1),
+                              last_plug, start_address, consing_gen,
+                              active_new_gen_number, last_pinned_gap,
+                              leftp);
+        }
+        current_brick++;
+    }
+
+    if (last_plug != 0)
+    {
+        realloc_plug (end_address - last_plug, last_plug, consing_gen,
+                      start_address,
+                      active_new_gen_number, last_pinned_gap,
+                      leftp, FALSE
+#ifdef SHORT_PLUGS
+                      , NULL
+#endif //SHORT_PLUGS
+                      );
+    }
+
+    //Fix the old segment allocated size
+    assert (last_pinned_gap >= heap_segment_mem (seg));
+    assert (last_pinned_gap <= heap_segment_committed (seg));
+    heap_segment_plan_allocated (seg) = last_pinned_gap;
+}
+
+void gc_heap::verify_no_pins (uint8_t* start, uint8_t* end)
+{
+#ifdef VERIFY_HEAP
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        BOOL contains_pinned_plugs = FALSE;
+        size_t mi = 0;
+        mark* m = 0;
+        while (mi != mark_stack_tos)
+        {
+            m = pinned_plug_of (mi);
+            if ((pinned_plug (m) >= start) && (pinned_plug (m) < end))
+            {
+                contains_pinned_plugs = TRUE;
+                break;
+            }
+            else
+                mi++;
+        }
+
+        if (contains_pinned_plugs)
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+#endif //VERIFY_HEAP
+}
+
+void gc_heap::set_expand_in_full_gc (int condemned_gen_number)
+{
+    if (!should_expand_in_full_gc)
+    {
+        if ((condemned_gen_number != max_generation) && 
+            (settings.pause_mode != pause_low_latency) &&
+            (settings.pause_mode != pause_sustained_low_latency))
+        {
+            should_expand_in_full_gc = TRUE;
+        }
+    }
+}
+
+void gc_heap::save_ephemeral_generation_starts()
+{
+    for (int ephemeral_generation = 0; ephemeral_generation < max_generation; ephemeral_generation++)
+    {
+        saved_ephemeral_plan_start[ephemeral_generation] = 
+            generation_plan_allocation_start (generation_of (ephemeral_generation));
+        saved_ephemeral_plan_start_size[ephemeral_generation] = 
+            generation_plan_allocation_start_size (generation_of (ephemeral_generation));
+    }
+}
+
+generation* gc_heap::expand_heap (int condemned_generation,
+                                  generation* consing_gen,
+                                  heap_segment* new_heap_segment)
+{
+    UNREFERENCED_PARAMETER(condemned_generation);
+    assert (condemned_generation >= (max_generation -1));
+    unsigned int active_new_gen_number = max_generation; //Set one too high to get generation gap
+    uint8_t*  start_address = generation_limit (max_generation);
+    uint8_t*  end_address = heap_segment_allocated (ephemeral_heap_segment);
+    BOOL should_promote_ephemeral = FALSE;
+    ptrdiff_t eph_size = total_ephemeral_size;
+#ifdef BACKGROUND_GC
+    dprintf(2,("%s: ---- Heap Expansion ----", (recursive_gc_sync::background_running_p() ? "FGC" : "NGC")));
+#endif //BACKGROUND_GC
+    settings.heap_expansion = TRUE;
+
+#ifdef BACKGROUND_GC
+    if (cm_in_progress)
+    {
+        if (!expanded_in_fgc)
+        {
+            expanded_in_fgc = TRUE;
+        }
+    }
+#endif //BACKGROUND_GC
+
+    //reset the elevation state for next time.
+    dprintf (2, ("Elevation: elevation = el_none"));
+    if (settings.should_lock_elevation && !expand_reused_seg_p())
+        settings.should_lock_elevation = FALSE;
+
+    heap_segment* new_seg = new_heap_segment;
+
+    if (!new_seg)
+        return consing_gen;
+
+    //copy the card and brick tables
+    if (g_card_table!= card_table)
+        copy_brick_card_table();
+
+    BOOL new_segment_p = (heap_segment_next (new_seg) == 0);
+    dprintf (2, ("new_segment_p %Ix", (size_t)new_segment_p));
+
+    assert (generation_plan_allocation_start (generation_of (max_generation-1)));
+    assert (generation_plan_allocation_start (generation_of (max_generation-1)) >=
+            heap_segment_mem (ephemeral_heap_segment));
+    assert (generation_plan_allocation_start (generation_of (max_generation-1)) <=
+            heap_segment_committed (ephemeral_heap_segment));
+
+    assert (generation_plan_allocation_start (youngest_generation));
+    assert (generation_plan_allocation_start (youngest_generation) <
+            heap_segment_plan_allocated (ephemeral_heap_segment));
+
+    if (settings.pause_mode == pause_no_gc)
+    {
+        // We don't reuse for no gc, so the size used on the new eph seg is eph_size.
+        if ((size_t)(heap_segment_reserved (new_seg) - heap_segment_mem (new_seg)) < (eph_size + soh_allocation_no_gc))
+            should_promote_ephemeral = TRUE;
+    }
+    else
+    {
+        if (!use_bestfit)
+        {
+            should_promote_ephemeral = dt_low_ephemeral_space_p (tuning_deciding_promote_ephemeral);
+        }
+    }
+
+    if (should_promote_ephemeral)
+    {
+        ephemeral_promotion = TRUE;
+        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_new_seg_ep);
+        dprintf (2, ("promoting ephemeral"));
+        save_ephemeral_generation_starts();
+    }
+    else
+    {
+        // commit the new ephemeral segment all at once if it is a new one.
+        if ((eph_size > 0) && new_segment_p)
+        {
+#ifdef FEATURE_STRUCTALIGN
+            // The destination may require a larger alignment padding than the source.
+            // Assume the worst possible alignment padding.
+            eph_size += ComputeStructAlignPad(heap_segment_mem (new_seg), MAX_STRUCTALIGN, OBJECT_ALIGNMENT_OFFSET);
+#endif // FEATURE_STRUCTALIGN
+#ifdef RESPECT_LARGE_ALIGNMENT
+            //Since the generation start can be larger than min_obj_size
+            //The alignment could be switched. 
+            eph_size += switch_alignment_size(FALSE);
+#endif //RESPECT_LARGE_ALIGNMENT
+            //Since the generation start can be larger than min_obj_size
+            //Compare the alignemnt of the first object in gen1 
+            if (grow_heap_segment (new_seg, heap_segment_mem (new_seg) + eph_size) == 0)
+            {
+                fgm_result.set_fgm (fgm_commit_eph_segment, eph_size, FALSE);
+                return consing_gen;
+            }
+            heap_segment_used (new_seg) = heap_segment_committed (new_seg);
+        }
+
+        //Fix the end of the old ephemeral heap segment
+        heap_segment_plan_allocated (ephemeral_heap_segment) =
+            generation_plan_allocation_start (generation_of (max_generation-1));
+
+        dprintf (3, ("Old ephemeral allocated set to %Ix",
+                    (size_t)heap_segment_plan_allocated (ephemeral_heap_segment)));
+    }
+
+    if (new_segment_p)
+    {
+        // TODO - Is this really necessary? We should think about it.
+        //initialize the first brick
+        size_t first_brick = brick_of (heap_segment_mem (new_seg));
+        set_brick (first_brick,
+                heap_segment_mem (new_seg) - brick_address (first_brick));
+    }
+
+    //From this point on, we cannot run out of memory
+
+    //reset the allocation of the consing generation back to the end of the
+    //old ephemeral segment
+    generation_allocation_limit (consing_gen) =
+        heap_segment_plan_allocated (ephemeral_heap_segment);
+    generation_allocation_pointer (consing_gen) = generation_allocation_limit (consing_gen);
+    generation_allocation_segment (consing_gen) = ephemeral_heap_segment;
+
+    //clear the generation gap for all of the ephemeral generations
+    {
+        int generation_num = max_generation-1;
+        while (generation_num >= 0)
+        {
+            generation* gen = generation_of (generation_num);
+            generation_plan_allocation_start (gen) = 0;
+            generation_num--;
+        }
+    }
+
+    heap_segment* old_seg = ephemeral_heap_segment;
+    ephemeral_heap_segment = new_seg;
+
+    //Note: the ephemeral segment shouldn't be threaded onto the segment chain
+    //because the relocation and compact phases shouldn't see it
+
+    // set the generation members used by allocate_in_expanded_heap
+    // and switch to ephemeral generation
+    consing_gen = ensure_ephemeral_heap_segment (consing_gen);
+
+    if (!should_promote_ephemeral)
+    {
+        realloc_plugs (consing_gen, old_seg, start_address, end_address,
+                    active_new_gen_number);
+    }
+
+    if (!use_bestfit)
+    {
+        repair_allocation_in_expanded_heap (consing_gen);
+    }
+
+    // assert that the generation gap for all of the ephemeral generations were allocated.
+#ifdef _DEBUG
+    {
+        int generation_num = max_generation-1;
+        while (generation_num >= 0)
+        {
+            generation* gen = generation_of (generation_num);
+            assert (generation_plan_allocation_start (gen));
+            generation_num--;
+        }
+    }
+#endif // _DEBUG
+
+    if (!new_segment_p)
+    {
+        dprintf (2, ("Demoting ephemeral segment"));
+        //demote the entire segment.
+        settings.demotion = TRUE;
+        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
+        demotion_low = heap_segment_mem (ephemeral_heap_segment);
+        demotion_high = heap_segment_reserved (ephemeral_heap_segment);
+    }
+    else
+    {
+        demotion_low = MAX_PTR;
+        demotion_high = 0;
+#ifndef MULTIPLE_HEAPS
+        settings.demotion = FALSE;
+        get_gc_data_per_heap()->clear_mechanism_bit (gc_demotion_bit);
+#endif //!MULTIPLE_HEAPS
+    }
+    ptrdiff_t eph_size1 = total_ephemeral_size;
+    MAYBE_UNUSED_VAR(eph_size1);
+
+    if (!should_promote_ephemeral && new_segment_p)
+    {
+        assert (eph_size1 <= eph_size);
+    }
+
+    if (heap_segment_mem (old_seg) == heap_segment_plan_allocated (old_seg))
+    {
+        // This is to catch when we accidently delete a segment that has pins.
+        verify_no_pins (heap_segment_mem (old_seg), heap_segment_reserved (old_seg));
+    }
+
+    verify_no_pins (heap_segment_plan_allocated (old_seg), heap_segment_reserved(old_seg));
+
+    dprintf(2,("---- End of Heap Expansion ----"));
+    return consing_gen;
+}
+
+bool gc_heap::init_dynamic_data()
+{
+    qpf = GCToOSInterface::QueryPerformanceFrequency();
+
+    uint32_t now = (uint32_t)GetHighPrecisionTimeStamp();
+
+    //clear some fields
+    for (int i = 0; i < max_generation+1; i++)
+    {
+        dynamic_data* dd = dynamic_data_of (i);
+        dd->gc_clock = 0;
+        dd->time_clock = now;
+    }
+
+#ifdef GC_CONFIG_DRIVEN
+    if (heap_number == 0)
+        time_init = now;
+#endif //GC_CONFIG_DRIVEN
+
+    // get the registry setting for generation 0 size
+    size_t gen0size = GCHeap::GetValidGen0MaxSize(get_valid_segment_size());
+
+    dprintf (2, ("gen 0 size: %Id", gen0size));
+
+    dynamic_data* dd = dynamic_data_of (0);
+    dd->current_size = 0;
+    dd->promoted_size = 0;
+    dd->collection_count = 0;
+//  dd->limit = 3.0f;
+#ifdef MULTIPLE_HEAPS
+    dd->limit = 20.0f;     // be more aggressive on server gc
+    dd->max_limit = 40.0f;
+#else
+    dd->limit = 9.0f;
+//  dd->max_limit = 15.0f; //10.0f;
+    dd->max_limit = 20.0f;
+#endif //MULTIPLE_HEAPS
+    dd->min_gc_size = Align(gen0size / 8 * 5);
+    dd->min_size = dd->min_gc_size;
+    //dd->max_size = Align (gen0size);
+
+#ifdef BACKGROUND_GC
+    //gc_can_use_concurrent is not necessarily 0 for server builds
+    bool can_use_concurrent = gc_can_use_concurrent;
+#else // !BACKGROUND_GC
+    bool can_use_concurrent = false;
+#endif // BACKGROUND_GC
+
+#ifdef MULTIPLE_HEAPS
+    dd->max_size = max (6*1024*1024, min ( Align(get_valid_segment_size()/2), 200*1024*1024));
+#else //MULTIPLE_HEAPS
+    dd->max_size = (can_use_concurrent ?
+                    6*1024*1024 :
+                    max (6*1024*1024,  min ( Align(get_valid_segment_size()/2), 200*1024*1024)));
+#endif //MULTIPLE_HEAPS
+    dd->new_allocation = dd->min_gc_size;
+    dd->gc_new_allocation = dd->new_allocation;
+    dd->desired_allocation = dd->new_allocation;
+    dd->default_new_allocation = dd->min_gc_size;
+    dd->fragmentation = 0;
+    dd->fragmentation_limit = 40000;
+    dd->fragmentation_burden_limit = 0.5f;
+
+    dd =  dynamic_data_of (1);
+    dd->current_size = 0;
+    dd->promoted_size = 0;
+    dd->collection_count = 0;
+    dd->limit = 2.0f;
+//  dd->max_limit = 15.0f;
+    dd->max_limit = 7.0f;
+    dd->min_gc_size = 9*32*1024;
+    dd->min_size = dd->min_gc_size;
+//  dd->max_size = 2397152;
+#ifdef MULTIPLE_HEAPS
+    dd->max_size = max (6*1024*1024, Align(get_valid_segment_size()/2));
+#else //MULTIPLE_HEAPS
+    dd->max_size = (can_use_concurrent ?
+                    6*1024*1024 :
+                    max (6*1024*1024, Align(get_valid_segment_size()/2)));
+#endif //MULTIPLE_HEAPS
+    dd->new_allocation = dd->min_gc_size;
+    dd->gc_new_allocation = dd->new_allocation;
+    dd->desired_allocation = dd->new_allocation;
+    dd->default_new_allocation = dd->min_gc_size;
+    dd->fragmentation = 0;
+    dd->fragmentation_limit = 80000;
+    dd->fragmentation_burden_limit = 0.5f;
+
+    dd =  dynamic_data_of (2);
+    dd->current_size = 0;
+    dd->promoted_size = 0;
+    dd->collection_count = 0;
+    dd->limit = 1.2f;
+    dd->max_limit = 1.8f;
+    dd->min_gc_size = 256*1024;
+    dd->min_size = dd->min_gc_size;
+    dd->max_size = SSIZE_T_MAX;
+    dd->new_allocation = dd->min_gc_size;
+    dd->gc_new_allocation = dd->new_allocation;
+    dd->desired_allocation = dd->new_allocation;
+    dd->default_new_allocation = dd->min_gc_size;
+    dd->fragmentation = 0;
+    dd->fragmentation_limit = 200000;
+    dd->fragmentation_burden_limit = 0.25f;
+
+    //dynamic data for large objects
+    dd =  dynamic_data_of (3);
+    dd->current_size = 0;
+    dd->promoted_size = 0;
+    dd->collection_count = 0;
+    dd->limit = 1.25f;
+    dd->max_limit = 4.5f;
+    dd->min_gc_size = 3*1024*1024;
+    dd->min_size = dd->min_gc_size;
+    dd->max_size = SSIZE_T_MAX;
+    dd->new_allocation = dd->min_gc_size;
+    dd->gc_new_allocation = dd->new_allocation;
+    dd->desired_allocation = dd->new_allocation;
+    dd->default_new_allocation = dd->min_gc_size;
+    dd->fragmentation = 0;
+    dd->fragmentation_limit = 0;
+    dd->fragmentation_burden_limit = 0.0f;
+
+    return true;
+}
+
+float gc_heap::surv_to_growth (float cst, float limit, float max_limit)
+{
+    if (cst < ((max_limit - limit ) / (limit * (max_limit-1.0f))))
+        return ((limit - limit*cst) / (1.0f - (cst * limit)));
+    else
+        return max_limit;
+}
+
+
+//if the allocation budget wasn't exhausted, the new budget may be wrong because the survival may 
+//not be correct (collection happened too soon). Correct with a linear estimation based on the previous 
+//value of the budget 
+static size_t linear_allocation_model (float allocation_fraction, size_t new_allocation, 
+                                       size_t previous_desired_allocation, size_t collection_count)
+{
+    if ((allocation_fraction < 0.95) && (allocation_fraction > 0.0))
+    {
+        dprintf (2, ("allocation fraction: %d", (int)(allocation_fraction/100.0)));
+        new_allocation = (size_t)(allocation_fraction*new_allocation + (1.0-allocation_fraction)*previous_desired_allocation);
+    }
+#if 0 
+    size_t smoothing = 3; // exponential smoothing factor
+    if (smoothing  > collection_count)
+        smoothing  = collection_count;
+    new_allocation = new_allocation / smoothing + ((previous_desired_allocation / smoothing) * (smoothing-1));
+#else
+    UNREFERENCED_PARAMETER(collection_count);
+#endif //0
+    return new_allocation;
+}
+
+size_t gc_heap::desired_new_allocation (dynamic_data* dd,
+                                        size_t out, int gen_number,
+                                        int pass)
+{
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+
+    if (dd_begin_data_size (dd) == 0)
+    {
+        size_t new_allocation = dd_default_new_allocation (dd);
+        current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation;        
+        return new_allocation;
+    }
+    else
+    {
+        float     cst;
+        size_t    previous_desired_allocation = dd_desired_allocation (dd);
+        size_t    current_size = dd_current_size (dd);
+        float     max_limit = dd_max_limit (dd);
+        float     limit = dd_limit (dd);
+        size_t    min_gc_size = dd_min_gc_size (dd);
+        float     f = 0;
+        size_t    max_size = dd_max_size (dd);
+        size_t    new_allocation = 0;
+        float allocation_fraction = (float) (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)) / (float) (dd_desired_allocation (dd));
+        if (gen_number >= max_generation)
+        {
+            size_t    new_size = 0;
+
+            cst = min (1.0f, float (out) / float (dd_begin_data_size (dd)));
+
+            f = surv_to_growth (cst, limit, max_limit);
+            size_t max_growth_size = (size_t)(max_size / f);
+            if (current_size >= max_growth_size)
+            {
+                new_size = max_size;
+            }
+            else
+            {
+                new_size = (size_t) min (max ( (f * current_size), min_gc_size), max_size);
+            }
+
+            assert ((new_size >= current_size) || (new_size == max_size));
+
+            if (gen_number == max_generation)
+            {
+                new_allocation  =  max((new_size - current_size), min_gc_size);
+
+                new_allocation = linear_allocation_model (allocation_fraction, new_allocation, 
+                                                          dd_desired_allocation (dd), dd_collection_count (dd));
+
+                if ((dd_fragmentation (dd) > ((size_t)((f-1)*current_size))))
+                {
+                    //reducing allocation in case of fragmentation
+                    size_t new_allocation1 = max (min_gc_size,
+                                                  // CAN OVERFLOW
+                                                  (size_t)((float)new_allocation * current_size /
+                                                           ((float)current_size + 2*dd_fragmentation (dd))));
+                    dprintf (2, ("Reducing max_gen allocation due to fragmentation from %Id to %Id",
+                                 new_allocation, new_allocation1));
+                    new_allocation = new_allocation1;
+                }
+            }
+            else //large object heap
+            {
+                uint64_t available_physical = 0;
+                get_memory_info (NULL, &available_physical);
+                if (available_physical > 1024*1024)
+                    available_physical -= 1024*1024;
+
+                uint64_t available_free = available_physical + (uint64_t)generation_free_list_space (generation_of (gen_number));
+                if (available_free > (uint64_t)MAX_PTR)
+                {
+                    available_free = (uint64_t)MAX_PTR;
+                }
+
+                //try to avoid OOM during large object allocation
+                new_allocation = max (min(max((new_size - current_size), dd_desired_allocation (dynamic_data_of (max_generation))), 
+                                          (size_t)available_free), 
+                                      max ((current_size/4), min_gc_size));
+
+                new_allocation = linear_allocation_model (allocation_fraction, new_allocation,
+                                                          dd_desired_allocation (dd), dd_collection_count (dd));
+
+            }
+        }
+        else
+        {
+            size_t survivors = out;
+            cst = float (survivors) / float (dd_begin_data_size (dd));
+            f = surv_to_growth (cst, limit, max_limit);
+            new_allocation = (size_t) min (max ((f * (survivors)), min_gc_size), max_size);
+
+            new_allocation = linear_allocation_model (allocation_fraction, new_allocation, 
+                                                      dd_desired_allocation (dd), dd_collection_count (dd));
+
+            if (gen_number == 0)
+            {
+                if (pass == 0)
+                {
+
+                    //printf ("%f, %Id\n", cst, new_allocation);
+                    size_t free_space = generation_free_list_space (generation_of (gen_number));
+                    // DTREVIEW - is min_gc_size really a good choice? 
+                    // on 64-bit this will almost always be true.
+                    dprintf (GTC_LOG, ("frag: %Id, min: %Id", free_space, min_gc_size));
+                    if (free_space > min_gc_size)
+                    {
+                        settings.gen0_reduction_count = 2;
+                    }
+                    else
+                    {
+                        if (settings.gen0_reduction_count > 0)
+                            settings.gen0_reduction_count--;
+                    }
+                }
+                if (settings.gen0_reduction_count > 0)
+                {
+                    dprintf (2, ("Reducing new allocation based on fragmentation"));
+                    new_allocation = min (new_allocation,
+                                          max (min_gc_size, (max_size/3)));
+                }
+            }
+        }
+
+        size_t new_allocation_ret = 
+            Align (new_allocation, get_alignment_constant (!(gen_number == (max_generation+1))));
+        int gen_data_index = gen_number;
+        gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_data_index]);
+        gen_data->new_allocation = new_allocation_ret;
+
+        dd_surv (dd) = cst;
+
+#ifdef SIMPLE_DPRINTF
+        dprintf (1, ("h%d g%d surv: %Id current: %Id alloc: %Id (%d%%) f: %d%% new-size: %Id new-alloc: %Id",
+                     heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
+                     (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
+#else
+        dprintf (1,("gen: %d in: %Id out: %Id ", gen_number, generation_allocation_size (generation_of (gen_number)), out));
+        dprintf (1,("current: %Id alloc: %Id ", current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd))));
+        dprintf (1,(" surv: %d%% f: %d%% new-size: %Id new-alloc: %Id",
+                    (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
+#endif //SIMPLE_DPRINTF
+
+        return new_allocation_ret;
+    }
+}
+
+//returns the planned size of a generation (including free list element)
+size_t gc_heap::generation_plan_size (int gen_number)
+{
+    if (0 == gen_number)
+        return max((heap_segment_plan_allocated (ephemeral_heap_segment) -
+                    generation_plan_allocation_start (generation_of (gen_number))),
+                   (int)Align (min_obj_size));
+    else
+    {
+        generation* gen = generation_of (gen_number);
+        if (heap_segment_rw (generation_start_segment (gen)) == ephemeral_heap_segment)
+            return (generation_plan_allocation_start (generation_of (gen_number - 1)) -
+                    generation_plan_allocation_start (generation_of (gen_number)));
+        else
+        {
+            size_t gensize = 0;
+            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+            PREFIX_ASSUME(seg != NULL);
+
+            while (seg && (seg != ephemeral_heap_segment))
+            {
+                gensize += heap_segment_plan_allocated (seg) -
+                           heap_segment_mem (seg);
+                seg = heap_segment_next_rw (seg);
+            }
+            if (seg)
+            {
+                gensize += (generation_plan_allocation_start (generation_of (gen_number - 1)) -
+                            heap_segment_mem (ephemeral_heap_segment));
+            }
+            return gensize;
+        }
+    }
+
+}
+
+//returns the size of a generation (including free list element)
+size_t gc_heap::generation_size (int gen_number)
+{
+    if (0 == gen_number)
+        return max((heap_segment_allocated (ephemeral_heap_segment) -
+                    generation_allocation_start (generation_of (gen_number))),
+                   (int)Align (min_obj_size));
+    else
+    {
+        generation* gen = generation_of (gen_number);
+        if (heap_segment_rw (generation_start_segment (gen)) == ephemeral_heap_segment)
+            return (generation_allocation_start (generation_of (gen_number - 1)) -
+                    generation_allocation_start (generation_of (gen_number)));
+        else
+        {
+            size_t gensize = 0;
+            heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+            PREFIX_ASSUME(seg != NULL);
+
+            while (seg && (seg != ephemeral_heap_segment))
+            {
+                gensize += heap_segment_allocated (seg) -
+                           heap_segment_mem (seg);
+                seg = heap_segment_next_rw (seg);
+            }
+            if (seg)
+            {
+                gensize += (generation_allocation_start (generation_of (gen_number - 1)) -
+                            heap_segment_mem (ephemeral_heap_segment));
+            }
+
+            return gensize;
+        }
+    }
+
+}
+
+size_t  gc_heap::compute_in (int gen_number)
+{
+    assert (gen_number != 0);
+    dynamic_data* dd = dynamic_data_of (gen_number);
+
+    size_t in = generation_allocation_size (generation_of (gen_number));
+
+    if (gen_number == max_generation && ephemeral_promotion)
+    {
+        in = 0;
+        for (int i = 0; i <= max_generation; i++)
+        {
+            dynamic_data* dd = dynamic_data_of (i);
+            in += dd_survived_size (dd);
+            if (i != max_generation)
+            {
+                generation_condemned_allocated (generation_of (gen_number)) += dd_survived_size (dd);
+            }
+        }
+    }
+
+    dd_gc_new_allocation (dd) -= in;
+    dd_new_allocation (dd) = dd_gc_new_allocation (dd);
+
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+    gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]);
+    gen_data->in = in;
+
+    generation_allocation_size (generation_of (gen_number)) = 0;
+    return in;
+}
+
+void  gc_heap::compute_promoted_allocation (int gen_number)
+{
+    compute_in (gen_number);
+}
+
+#ifdef BIT64
+inline
+size_t gc_heap::trim_youngest_desired (uint32_t memory_load,
+                                       size_t total_new_allocation,
+                                       size_t total_min_allocation)
+{
+    if (memory_load < MAX_ALLOWED_MEM_LOAD)
+    {
+        // If the total of memory load and gen0 budget exceeds 
+        // our max memory load limit, trim the gen0 budget so the total 
+        // is the max memory load limit.
+        size_t remain_memory_load = (MAX_ALLOWED_MEM_LOAD - memory_load) * mem_one_percent;
+        return min (total_new_allocation, remain_memory_load);
+    }
+    else
+    {
+        return max (mem_one_percent, total_min_allocation);
+    }
+}
+
+size_t gc_heap::joined_youngest_desired (size_t new_allocation)
+{
+    dprintf (2, ("Entry memory load: %d; gen0 new_alloc: %Id", settings.entry_memory_load, new_allocation));
+
+    size_t final_new_allocation = new_allocation;
+    if (new_allocation > MIN_YOUNGEST_GEN_DESIRED)
+    {
+        uint32_t num_heaps = 1;
+
+#ifdef MULTIPLE_HEAPS
+        num_heaps = gc_heap::n_heaps;
+#endif //MULTIPLE_HEAPS
+
+        size_t total_new_allocation = new_allocation * num_heaps;
+        size_t total_min_allocation = MIN_YOUNGEST_GEN_DESIRED * num_heaps;
+
+        if ((settings.entry_memory_load >= MAX_ALLOWED_MEM_LOAD) ||
+            (total_new_allocation > max (youngest_gen_desired_th, total_min_allocation)))
+        {
+            uint32_t memory_load = 0;
+            get_memory_info (&memory_load);
+            dprintf (2, ("Current emory load: %d", memory_load));
+
+            size_t final_total = 
+                trim_youngest_desired (memory_load, total_new_allocation, total_min_allocation);
+            size_t max_new_allocation = 
+#ifdef MULTIPLE_HEAPS
+                                         dd_max_size (g_heaps[0]->dynamic_data_of (0));
+#else //MULTIPLE_HEAPS
+                                         dd_max_size (dynamic_data_of (0));
+#endif //MULTIPLE_HEAPS
+
+            final_new_allocation  = min (Align ((final_total / num_heaps), get_alignment_constant (TRUE)), max_new_allocation);
+        }
+    }
+
+    if (final_new_allocation < new_allocation)
+    {
+        settings.gen0_reduction_count = 2;
+    }
+
+    return final_new_allocation;
+}
+#endif // BIT64 
+
+inline
+gc_history_per_heap* gc_heap::get_gc_data_per_heap()
+{
+#ifdef BACKGROUND_GC
+    return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap);
+#else
+    return &gc_data_per_heap;
+#endif //BACKGROUND_GC
+}
+
+void gc_heap::compute_new_dynamic_data (int gen_number)
+{
+    PREFIX_ASSUME(gen_number >= 0);
+    PREFIX_ASSUME(gen_number <= max_generation);
+
+    dynamic_data* dd = dynamic_data_of (gen_number);
+    generation*   gen = generation_of (gen_number);
+    size_t        in = (gen_number==0) ? 0 : compute_in (gen_number);
+
+    size_t total_gen_size = generation_size (gen_number);
+    //keep track of fragmentation
+    dd_fragmentation (dd) = generation_free_list_space (gen) + generation_free_obj_space (gen);
+    dd_current_size (dd) = total_gen_size - dd_fragmentation (dd);
+
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+
+    size_t out = dd_survived_size (dd);
+
+    gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]);
+    gen_data->size_after = total_gen_size;
+    gen_data->free_list_space_after = generation_free_list_space (gen);
+    gen_data->free_obj_space_after = generation_free_obj_space (gen);
+
+    if ((settings.pause_mode == pause_low_latency) && (gen_number <= 1))
+    {
+        // When we are in the low latency mode, we can still be
+        // condemning more than gen1's 'cause of induced GCs.
+        dd_desired_allocation (dd) = low_latency_alloc;
+    }
+    else
+    {
+        if (gen_number == 0)
+        {
+            //compensate for dead finalizable objects promotion.
+            //they shoudn't be counted for growth.
+            size_t final_promoted = 0;
+            final_promoted = min (promoted_bytes (heap_number), out);
+            // Prefast: this is clear from above but prefast needs to be told explicitly
+            PREFIX_ASSUME(final_promoted <= out);
+
+            dprintf (2, ("gen: %d final promoted: %Id", gen_number, final_promoted));
+            dd_freach_previous_promotion (dd) = final_promoted;
+            size_t lower_bound = desired_new_allocation  (dd, out-final_promoted, gen_number, 0);
+
+            if (settings.condemned_generation == 0)
+            {
+                //there is no noise.
+                dd_desired_allocation (dd) = lower_bound;
+            }
+            else
+            {
+                size_t higher_bound = desired_new_allocation (dd, out, gen_number, 1);
+
+                // <TODO>This assert was causing AppDomains\unload\test1n\test1nrun.bat to fail</TODO>
+                //assert ( lower_bound <= higher_bound);
+
+                //discount the noise. Change the desired allocation
+                //only if the previous value is outside of the range.
+                if (dd_desired_allocation (dd) < lower_bound)
+                {
+                    dd_desired_allocation (dd) = lower_bound;
+                }
+                else if (dd_desired_allocation (dd) > higher_bound)
+                {
+                    dd_desired_allocation (dd) = higher_bound;
+                }
+#if defined (BIT64) && !defined (MULTIPLE_HEAPS)
+                dd_desired_allocation (dd) = joined_youngest_desired (dd_desired_allocation (dd));
+#endif // BIT64 && !MULTIPLE_HEAPS
+                trim_youngest_desired_low_memory();
+                dprintf (2, ("final gen0 new_alloc: %Id", dd_desired_allocation (dd)));
+            }
+        }
+        else
+        {
+            dd_desired_allocation (dd) = desired_new_allocation (dd, out, gen_number, 0);
+        }
+    }
+
+    gen_data->pinned_surv = dd_pinned_survived_size (dd);
+    gen_data->npinned_surv = dd_survived_size (dd) - dd_pinned_survived_size (dd);
+
+    dd_gc_new_allocation (dd) = dd_desired_allocation (dd);
+    dd_new_allocation (dd) = dd_gc_new_allocation (dd);
+
+    //update counter
+    dd_promoted_size (dd) = out;
+    if (gen_number == max_generation)
+    {
+        dd = dynamic_data_of (max_generation+1);
+        total_gen_size = generation_size (max_generation + 1);
+        dd_fragmentation (dd) = generation_free_list_space (large_object_generation) + 
+                                generation_free_obj_space (large_object_generation);
+        dd_current_size (dd) = total_gen_size - dd_fragmentation (dd);
+        dd_survived_size (dd) = dd_current_size (dd);
+        in = 0;
+        out = dd_current_size (dd);
+        dd_desired_allocation (dd) = desired_new_allocation (dd, out, max_generation+1, 0);
+        dd_gc_new_allocation (dd) = Align (dd_desired_allocation (dd),
+                                           get_alignment_constant (FALSE));
+        dd_new_allocation (dd) = dd_gc_new_allocation (dd);
+
+        gen_data = &(current_gc_data_per_heap->gen_data[max_generation+1]);
+        gen_data->size_after = total_gen_size;
+        gen_data->free_list_space_after = generation_free_list_space (large_object_generation);
+        gen_data->free_obj_space_after = generation_free_obj_space (large_object_generation);
+        gen_data->npinned_surv = out;
+#ifdef BACKGROUND_GC
+        end_loh_size = total_gen_size;
+#endif //BACKGROUND_GC
+        //update counter
+        dd_promoted_size (dd) = out;
+    }
+}
+
+void gc_heap::trim_youngest_desired_low_memory()
+{
+    if (g_low_memory_status)
+    {
+        size_t committed_mem = 0;
+        heap_segment* seg = generation_start_segment (generation_of (max_generation));
+        while (seg)
+        {
+            committed_mem += heap_segment_committed (seg) - heap_segment_mem (seg);
+            seg = heap_segment_next (seg);
+        }
+        seg = generation_start_segment (generation_of (max_generation + 1));
+        while (seg)
+        {
+            committed_mem += heap_segment_committed (seg) - heap_segment_mem (seg);
+            seg = heap_segment_next (seg);
+        }
+
+        dynamic_data* dd = dynamic_data_of (0);
+        size_t current = dd_desired_allocation (dd);
+        size_t candidate = max (Align ((committed_mem / 10), get_alignment_constant(FALSE)), dd_min_gc_size (dd));
+
+        dd_desired_allocation (dd) = min (current, candidate);
+    }
+}
+
+void gc_heap::decommit_ephemeral_segment_pages()
+{
+    if (settings.concurrent)
+    {
+        return;
+    }
+
+    size_t slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
+    dynamic_data* dd = dynamic_data_of (0);
+
+#ifndef MULTIPLE_HEAPS
+    size_t extra_space = (g_low_memory_status ? 0 : (512 * 1024));
+    size_t decommit_timeout = (g_low_memory_status ? 0 : GC_EPHEMERAL_DECOMMIT_TIMEOUT);
+    size_t ephemeral_elapsed = dd_time_clock(dd) - gc_last_ephemeral_decommit_time;
+
+    if (dd_desired_allocation (dd) > gc_gen0_desired_high)
+    {
+        gc_gen0_desired_high = dd_desired_allocation (dd) + extra_space;
+    }
+
+    if (ephemeral_elapsed >= decommit_timeout)
+    {
+        slack_space = min (slack_space, gc_gen0_desired_high);
+
+        gc_last_ephemeral_decommit_time = dd_time_clock(dd);
+        gc_gen0_desired_high = 0;
+    }
+#endif //!MULTIPLE_HEAPS
+
+    if (settings.condemned_generation >= (max_generation-1))
+    {
+        size_t new_slack_space = 
+#ifdef BIT64
+                    max(min(min(get_valid_segment_size()/32, dd_max_size(dd)), (generation_size (max_generation) / 10)), dd_desired_allocation(dd));
+#else
+#ifdef FEATURE_CORECLR
+                    dd_desired_allocation (dd);
+#else
+                    dd_max_size (dd);
+#endif //FEATURE_CORECLR                                    
+#endif // BIT64
+
+        slack_space = min (slack_space, new_slack_space);
+    }
+
+    decommit_heap_segment_pages (ephemeral_heap_segment, slack_space);    
+
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+    current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
+}
+
+size_t gc_heap::new_allocation_limit (size_t size, size_t free_size, int gen_number)
+{
+    dynamic_data* dd        = dynamic_data_of (gen_number);
+    ptrdiff_t           new_alloc = dd_new_allocation (dd);
+    assert (new_alloc == (ptrdiff_t)Align (new_alloc,
+                                           get_alignment_constant (!(gen_number == (max_generation+1)))));
+    size_t        limit     = min (max (new_alloc, (ptrdiff_t)size), (ptrdiff_t)free_size);
+    assert (limit == Align (limit, get_alignment_constant (!(gen_number == (max_generation+1)))));
+    dd_new_allocation (dd) = (new_alloc - limit );
+    return limit;
+}
+
+//This is meant to be called by decide_on_compacting.
+
+size_t gc_heap::generation_fragmentation (generation* gen,
+                                          generation* consing_gen,
+                                          uint8_t* end)
+{
+    size_t frag;
+    uint8_t* alloc = generation_allocation_pointer (consing_gen);
+    // If the allocation pointer has reached the ephemeral segment
+    // fine, otherwise the whole ephemeral segment is considered
+    // fragmentation
+    if (in_range_for_segment (alloc, ephemeral_heap_segment))
+        {
+            if (alloc <= heap_segment_allocated(ephemeral_heap_segment))
+                frag = end - alloc;
+            else
+            {
+                // case when no survivors, allocated set to beginning
+                frag = 0;
+            }
+            dprintf (3, ("ephemeral frag: %Id", frag));
+        }
+    else
+        frag = (heap_segment_allocated (ephemeral_heap_segment) -
+                heap_segment_mem (ephemeral_heap_segment));
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    while (seg != ephemeral_heap_segment)
+    {
+        frag += (heap_segment_allocated (seg) -
+                 heap_segment_plan_allocated (seg));
+        dprintf (3, ("seg: %Ix, frag: %Id", (size_t)seg,
+                     (heap_segment_allocated (seg) -
+                      heap_segment_plan_allocated (seg))));
+
+        seg = heap_segment_next_rw (seg);
+        assert (seg);
+    }
+    dprintf (3, ("frag: %Id discounting pinned plugs", frag));
+    //add the length of the dequeued plug free space
+    size_t bos = 0;
+    while (bos < mark_stack_bos)
+    {
+        frag += (pinned_len (pinned_plug_of (bos)));
+        bos++;
+    }
+
+    return frag;
+}
+
+// for SOH this returns the total sizes of the generation and its 
+// younger generation(s).
+// for LOH this returns just LOH size.
+size_t gc_heap::generation_sizes (generation* gen)
+{
+    size_t result = 0;
+    if (generation_start_segment (gen ) == ephemeral_heap_segment)
+        result = (heap_segment_allocated (ephemeral_heap_segment) -
+                  generation_allocation_start (gen));
+    else
+    {
+        heap_segment* seg = heap_segment_in_range (generation_start_segment (gen));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        while (seg)
+        {
+            result += (heap_segment_allocated (seg) -
+                       heap_segment_mem (seg));
+            seg = heap_segment_next_in_range (seg);
+        }
+    }
+
+    return result;
+}
+
+BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
+                                    size_t fragmentation,
+                                    BOOL& should_expand)
+{
+    BOOL should_compact = FALSE;
+    should_expand = FALSE;
+    generation*   gen = generation_of (condemned_gen_number);
+    dynamic_data* dd = dynamic_data_of (condemned_gen_number);
+    size_t gen_sizes     = generation_sizes(gen);
+    float  fragmentation_burden = ( ((0 == fragmentation) || (0 == gen_sizes)) ? (0.0f) :
+                                    (float (fragmentation) / gen_sizes) );
+
+    dprintf (GTC_LOG, ("fragmentation: %Id (%d%%)", fragmentation, (int)(fragmentation_burden * 100.0)));
+
+#ifdef STRESS_HEAP
+    // for pure GC stress runs we need compaction, for GC stress "mix"
+    // we need to ensure a better mix of compacting and sweeping collections
+    if (GCStress<cfg_any>::IsEnabled() && !settings.concurrent
+        && !g_pConfig->IsGCStressMix())
+        should_compact = TRUE;
+
+#ifdef GC_STATS
+    // in GC stress "mix" mode, for stress induced collections make sure we 
+    // keep sweeps and compactions relatively balanced. do not (yet) force sweeps
+    // against the GC's determination, as it may lead to premature OOMs.
+    if (g_pConfig->IsGCStressMix() && settings.stress_induced)
+    {
+        int compactions = g_GCStatistics.cntCompactFGC+g_GCStatistics.cntCompactNGC;
+        int sweeps = g_GCStatistics.cntFGC + g_GCStatistics.cntNGC - compactions;
+        if (compactions < sweeps / 10)
+        {
+            should_compact = TRUE;
+        }
+    }
+#endif // GC_STATS
+#endif //STRESS_HEAP
+
+    if (g_pConfig->GetGCForceCompact())
+        should_compact = TRUE;
+
+    if ((condemned_gen_number == max_generation) && last_gc_before_oom)
+    {
+        should_compact = TRUE;
+        last_gc_before_oom = FALSE;
+        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_last_gc);
+    }
+
+    if (settings.reason == reason_induced_compacting)
+    {
+        dprintf (2, ("induced compacting GC"));
+        should_compact = TRUE;
+        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_induced_compacting);
+    }
+
+    dprintf (2, ("Fragmentation: %d Fragmentation burden %d%%",
+                fragmentation, (int) (100*fragmentation_burden)));
+
+    if (!should_compact)
+    {
+        if (dt_low_ephemeral_space_p (tuning_deciding_compaction))
+        {
+            dprintf(GTC_LOG, ("compacting due to low ephemeral"));
+            should_compact = TRUE;
+            get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_low_ephemeral);
+        }
+    }
+
+    if (should_compact)
+    {
+        if ((condemned_gen_number >= (max_generation - 1)))
+        {
+            if (dt_low_ephemeral_space_p (tuning_deciding_expansion))
+            {
+                dprintf (GTC_LOG,("Not enough space for all ephemeral generations with compaction"));
+                should_expand = TRUE;
+            }
+        }
+    }
+
+#ifdef BIT64
+    BOOL high_memory = FALSE;
+#endif // BIT64
+
+    if (!should_compact)
+    {
+        // We are not putting this in dt_high_frag_p because it's not exactly
+        // high fragmentation - it's just enough planned fragmentation for us to 
+        // want to compact. Also the "fragmentation" we are talking about here
+        // is different from anywhere else.
+        BOOL frag_exceeded = ((fragmentation >= dd_fragmentation_limit (dd)) &&
+                                (fragmentation_burden >= dd_fragmentation_burden_limit (dd)));
+
+        if (frag_exceeded)
+        {
+#ifdef BACKGROUND_GC
+            // do not force compaction if this was a stress-induced GC
+            IN_STRESS_HEAP(if (!settings.stress_induced))
+            {
+#endif // BACKGROUND_GC
+            assert (settings.concurrent == FALSE);
+            should_compact = TRUE;
+            get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_high_frag);
+#ifdef BACKGROUND_GC
+            }
+#endif // BACKGROUND_GC
+        }
+
+#ifdef BIT64
+        // check for high memory situation
+        if(!should_compact)
+        {
+            uint32_t num_heaps = 1;
+#ifdef MULTIPLE_HEAPS
+            num_heaps = gc_heap::n_heaps;
+#endif // MULTIPLE_HEAPS
+            
+            ptrdiff_t reclaim_space = generation_size(max_generation) - generation_plan_size(max_generation);
+            if((settings.entry_memory_load >= high_memory_load_th) && (settings.entry_memory_load < v_high_memory_load_th))
+            {
+                if(reclaim_space > (int64_t)(min_high_fragmentation_threshold (entry_available_physical_mem, num_heaps)))
+                {
+                    dprintf(GTC_LOG,("compacting due to fragmentation in high memory"));
+                    should_compact = TRUE;
+                    get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_high_mem_frag);
+                }
+                high_memory = TRUE;
+            }
+            else if(settings.entry_memory_load >= v_high_memory_load_th)
+            {
+                if(reclaim_space > (ptrdiff_t)(min_reclaim_fragmentation_threshold (num_heaps)))
+                {
+                    dprintf(GTC_LOG,("compacting due to fragmentation in very high memory"));
+                    should_compact = TRUE;
+                    get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_vhigh_mem_frag);
+                }
+                high_memory = TRUE;
+            }
+        }
+#endif // BIT64
+    }
+
+    // The purpose of calling ensure_gap_allocation here is to make sure
+    // that we actually are able to commit the memory to allocate generation
+    // starts.
+    if ((should_compact == FALSE) &&
+        (ensure_gap_allocation (condemned_gen_number) == FALSE))
+    {
+        should_compact = TRUE;
+        get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_no_gaps);
+    }
+
+    if (settings.condemned_generation == max_generation)
+    {
+        //check the progress
+        if (
+#ifdef BIT64
+            (high_memory && !should_compact) ||
+#endif // BIT64
+            (generation_plan_allocation_start (generation_of (max_generation - 1)) >= 
+                generation_allocation_start (generation_of (max_generation - 1))))
+        {
+            dprintf (2, (" Elevation: gen2 size: %d, gen2 plan size: %d, no progress, elevation = locked",
+                     generation_size (max_generation),
+                     generation_plan_size (max_generation)));
+            //no progress -> lock
+            settings.should_lock_elevation = TRUE;
+        }
+    }
+
+    if (settings.pause_mode == pause_no_gc)
+    {
+        should_compact = TRUE;
+        if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_plan_allocated (ephemeral_heap_segment))
+            < soh_allocation_no_gc)
+        {
+            should_expand = TRUE;
+        }
+    }
+
+    dprintf (2, ("will %s", (should_compact ? "compact" : "sweep")));
+    return should_compact;
+}
+
+size_t align_lower_good_size_allocation (size_t size)
+{
+    return (size/64)*64;
+}
+
+size_t gc_heap::approximate_new_allocation()
+{
+    dynamic_data* dd0 = dynamic_data_of (0);
+    return max (2*dd_min_size (dd0), ((dd_desired_allocation (dd0)*2)/3));
+}
+
+// After we did a GC we expect to have at least this 
+// much space at the end of the segment to satisfy
+// a reasonable amount of allocation requests.
+size_t gc_heap::end_space_after_gc()
+{
+    return max ((dd_min_gc_size (dynamic_data_of (0))/2), (END_SPACE_AFTER_GC + Align (min_obj_size)));
+}
+
+BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
+{
+    uint8_t* start = 0;
+    
+    if ((tp == tuning_deciding_condemned_gen) ||
+        (tp == tuning_deciding_compaction))
+    {
+        start = (settings.concurrent ? alloc_allocated : heap_segment_allocated (ephemeral_heap_segment));
+        if (settings.concurrent)
+        {
+            dprintf (GTC_LOG, ("%Id left at the end of ephemeral segment (alloc_allocated)", 
+                (size_t)(heap_segment_reserved (ephemeral_heap_segment) - alloc_allocated)));
+        }
+        else
+        {
+            dprintf (GTC_LOG, ("%Id left at the end of ephemeral segment (allocated)", 
+                (size_t)(heap_segment_reserved (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment))));
+        }
+    }
+    else if (tp == tuning_deciding_expansion)
+    {
+        start = heap_segment_plan_allocated (ephemeral_heap_segment);
+        dprintf (GTC_LOG, ("%Id left at the end of ephemeral segment based on plan", 
+            (size_t)(heap_segment_reserved (ephemeral_heap_segment) - start)));
+    }
+    else
+    {
+        assert (tp == tuning_deciding_full_gc);
+        dprintf (GTC_LOG, ("FGC: %Id left at the end of ephemeral segment (alloc_allocated)", 
+            (size_t)(heap_segment_reserved (ephemeral_heap_segment) - alloc_allocated)));
+        start = alloc_allocated;
+    }
+    
+    if (start == 0) // empty ephemeral generations
+    {
+        assert (tp == tuning_deciding_expansion);
+        // if there are no survivors in the ephemeral segment, 
+        // this should be the beginning of ephemeral segment.
+        start = generation_allocation_pointer (generation_of (max_generation));
+        assert (start == heap_segment_mem (ephemeral_heap_segment));
+    }
+
+    if (tp == tuning_deciding_expansion)
+    {
+        assert (settings.condemned_generation >= (max_generation-1));
+        size_t gen0size = approximate_new_allocation();
+        size_t eph_size = gen0size;
+
+        for (int j = 1; j <= max_generation-1; j++)
+        {
+            eph_size += 2*dd_min_size (dynamic_data_of(j));
+        }
+        
+        // We must find room for one large object and enough room for gen0size
+        if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > eph_size)
+        {
+            dprintf (3, ("Enough room before end of segment"));
+            return TRUE;
+        }
+        else
+        {
+            size_t room = align_lower_good_size_allocation
+                (heap_segment_reserved (ephemeral_heap_segment) - start);
+            size_t end_seg = room;
+
+            //look at the plug free space
+            size_t largest_alloc = END_SPACE_AFTER_GC + Align (min_obj_size);
+            bool large_chunk_found = FALSE;
+            size_t bos = 0;
+            uint8_t* gen0start = generation_plan_allocation_start (youngest_generation);
+            dprintf (3, ("ephemeral_gen_fit_p: gen0 plan start: %Ix", (size_t)gen0start));
+            if (gen0start == 0)
+                return FALSE;
+            dprintf (3, ("ephemeral_gen_fit_p: room before free list search %Id, needed: %Id",
+                         room, gen0size));
+            while ((bos < mark_stack_bos) &&
+                   !((room >= gen0size) && large_chunk_found))
+            {
+                uint8_t* plug = pinned_plug (pinned_plug_of (bos));
+                if (in_range_for_segment (plug, ephemeral_heap_segment))
+                {
+                    if (plug >= gen0start)
+                    {
+                        size_t chunk = align_lower_good_size_allocation (pinned_len (pinned_plug_of (bos)));
+                        room += chunk;
+                        if (!large_chunk_found)
+                        {
+                            large_chunk_found = (chunk >= largest_alloc);
+                        }
+                        dprintf (3, ("ephemeral_gen_fit_p: room now %Id, large chunk: %Id",
+                                     room, large_chunk_found));
+                    }
+                }
+                bos++;
+            }
+
+            if (room >= gen0size)
+            {
+                if (large_chunk_found)
+                {
+                    dprintf (3, ("Enough room"));
+                    return TRUE;
+                }
+                else
+                {
+                    // now we need to find largest_alloc at the end of the segment.
+                    if (end_seg >= end_space_after_gc())
+                    {
+                        dprintf (3, ("Enough room (may need end of seg)"));
+                        return TRUE;
+                    }
+                }
+            }
+
+            dprintf (3, ("Not enough room"));
+                return FALSE;
+        }
+    }
+    else
+    {
+        size_t end_space = 0;
+        dynamic_data* dd = dynamic_data_of (0);
+        if ((tp == tuning_deciding_condemned_gen) ||
+            (tp == tuning_deciding_full_gc))
+        {
+            end_space = 2*dd_min_size (dd);
+        }
+        else
+        {
+            assert (tp == tuning_deciding_compaction);
+            end_space = approximate_new_allocation();
+        }
+
+        if (!((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > end_space))
+        {
+            dprintf (GTC_LOG, ("ephemeral_gen_fit_p: does not fit without compaction"));
+        }
+        return ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > end_space);
+    }
+}
+
+CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_bytes)
+{
+    //create a new alloc context because gen3context is shared.
+    alloc_context acontext;
+    acontext.alloc_ptr = 0;
+    acontext.alloc_limit = 0;
+    acontext.alloc_bytes = 0;
+#ifdef MULTIPLE_HEAPS
+    acontext.alloc_heap = vm_heap;
+#endif //MULTIPLE_HEAPS
+
+#ifdef MARK_ARRAY
+    uint8_t* current_lowest_address = lowest_address;
+    uint8_t* current_highest_address = highest_address;
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        current_lowest_address = background_saved_lowest_address;
+        current_highest_address = background_saved_highest_address;
+    }
+#endif //BACKGROUND_GC
+#endif // MARK_ARRAY
+
+    size_t maxObjectSize = (INT32_MAX - 7 - Align(min_obj_size));
+
+#ifdef BIT64
+    if (g_pConfig->GetGCAllowVeryLargeObjects())
+    {
+        maxObjectSize = (INT64_MAX - 7 - Align(min_obj_size));
+    }
+#endif
+
+    if (jsize >= maxObjectSize)
+    {
+        if (g_pConfig->IsGCBreakOnOOMEnabled())
+        {
+            GCToOSInterface::DebugBreak();
+        }
+
+#ifndef FEATURE_REDHAWK
+        ThrowOutOfMemoryDimensionsExceeded();
+#else
+        return 0;
+#endif
+    }
+
+    size_t size = AlignQword (jsize);
+    int align_const = get_alignment_constant (FALSE);
+#ifdef FEATURE_LOH_COMPACTION
+    size_t pad = Align (loh_padding_obj_size, align_const);
+#else
+    size_t pad = 0;
+#endif //FEATURE_LOH_COMPACTION
+
+    assert (size >= Align (min_obj_size, align_const));
+#ifdef _MSC_VER
+#pragma inline_depth(0)
+#endif //_MSC_VER
+    if (! allocate_more_space (&acontext, (size + pad), max_generation+1))
+    {
+        return 0;
+    }
+
+#ifdef _MSC_VER
+#pragma inline_depth(20)
+#endif //_MSC_VER
+
+#ifdef FEATURE_LOH_COMPACTION
+    // The GC allocator made a free object already in this alloc context and
+    // adjusted the alloc_ptr accordingly.
+#endif //FEATURE_LOH_COMPACTION
+
+    uint8_t*  result = acontext.alloc_ptr;
+
+    assert ((size_t)(acontext.alloc_limit - acontext.alloc_ptr) == size);
+
+    CObjectHeader* obj = (CObjectHeader*)result;
+
+#ifdef MARK_ARRAY
+    if (recursive_gc_sync::background_running_p())
+    {
+        if ((result < current_highest_address) && (result >= current_lowest_address))
+        {
+            dprintf (3, ("Clearing mark bit at address %Ix",
+                     (size_t)(&mark_array [mark_word_of (result)])));
+
+            mark_array_clear_marked (result);
+        }
+#ifdef BACKGROUND_GC
+        //the object has to cover one full mark uint32_t
+        assert (size > mark_word_size);
+        if (current_c_gc_state == c_gc_state_marking)
+        {
+            dprintf (3, ("Concurrent allocation of a large object %Ix",
+                        (size_t)obj));
+            //mark the new block specially so we know it is a new object
+            if ((result < current_highest_address) && (result >= current_lowest_address))
+            {
+                dprintf (3, ("Setting mark bit at address %Ix",
+                            (size_t)(&mark_array [mark_word_of (result)])));
+    
+                mark_array_set_marked (result);
+            }
+        }
+#endif //BACKGROUND_GC
+    }
+#endif //MARK_ARRAY
+
+    assert (obj != 0);
+    assert ((size_t)obj == Align ((size_t)obj, align_const));
+
+    alloc_bytes += acontext.alloc_bytes;
+    return obj;
+}
+
+void reset_memory (uint8_t* o, size_t sizeo)
+{
+#ifndef FEATURE_PAL
+    if (sizeo > 128 * 1024)
+    {
+        // We cannot reset the memory for the useful part of a free object.
+        size_t size_to_skip = min_free_list - plug_skew;
+
+        size_t page_start = align_on_page ((size_t)(o + size_to_skip));
+        size_t size = align_lower_page ((size_t)o + sizeo - size_to_skip - plug_skew) - page_start;
+        // Note we need to compensate for an OS bug here. This bug would cause the MEM_RESET to fail
+        // on write watched memory.
+        if (reset_mm_p)
+        {
+            reset_mm_p = GCToOSInterface::VirtualReset((void*)page_start, size, true /* unlock */);
+        }
+    }
+#endif //!FEATURE_PAL
+}
+
+void gc_heap::reset_large_object (uint8_t* o)
+{
+    // If it's a large object, allow the O/S to discard the backing store for these pages.
+    reset_memory (o, size(o));
+}
+
+BOOL gc_heap::large_object_marked (uint8_t* o, BOOL clearp)
+{
+    BOOL m = FALSE;
+    // It shouldn't be necessary to do these comparisons because this is only used for blocking
+    // GCs and LOH segments cannot be out of range.
+    if ((o >= lowest_address) && (o < highest_address))
+    {
+        if (marked (o))
+        {
+            if (clearp)
+            {
+                clear_marked (o);
+                if (pinned (o))
+                    clear_pinned(o);
+            }
+            m = TRUE;
+        }
+        else
+            m = FALSE;
+    }
+    else
+        m = TRUE;
+    return m;
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void gc_heap::record_survived_for_profiler(int condemned_gen_number, uint8_t * start_address)
+{
+    size_t profiling_context = 0;
+
+    ETW::GCLog::BeginMovedReferences(&profiling_context);
+
+    // Now walk the portion of memory that is actually being relocated.
+    walk_relocation(condemned_gen_number, start_address, profiling_context);
+
+#ifdef FEATURE_LOH_COMPACTION
+    if (loh_compacted_p)
+    {
+        walk_relocation_loh (profiling_context);
+    }
+#endif //FEATURE_LOH_COMPACTION
+
+    // Notify the EE-side profiling code that all the references have been traced for
+    // this heap, and that it needs to flush all cached data it hasn't sent to the
+    // profiler and release resources it no longer needs.
+    ETW::GCLog::EndMovedReferences(profiling_context);
+}
+
+void gc_heap::notify_profiler_of_surviving_large_objects ()
+{
+    size_t profiling_context = 0;
+
+    ETW::GCLog::BeginMovedReferences(&profiling_context);
+
+    generation* gen        = large_object_generation;
+    heap_segment* seg      = heap_segment_rw (generation_start_segment (gen));;
+
+    PREFIX_ASSUME(seg != NULL);
+
+    uint8_t* o                = generation_allocation_start (gen);
+    uint8_t* plug_end         = o;
+    uint8_t* plug_start       = o;
+
+    // Generally, we can only get here if this is TRUE:
+    // (CORProfilerTrackGC() || ETW::GCLog::ShouldTrackMovementForEtw())
+    // But we can't always assert that, as races could theoretically cause GC profiling
+    // or ETW to turn off just before we get here.  This is harmless (we do checks later
+    // on, under appropriate locks, before actually calling into profilers), though it's
+    // a slowdown to determine these plugs for nothing.
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            seg = heap_segment_next (seg);
+            if (seg == 0)
+                break;
+            else
+                o = heap_segment_mem (seg);
+        }
+        if (large_object_marked(o, FALSE))
+        {
+            plug_start = o;
+
+            BOOL m = TRUE;
+            while (m)
+            {
+                o = o + AlignQword (size (o));
+                if (o >= heap_segment_allocated (seg))
+                {
+                    break;
+                }
+                m = large_object_marked (o, FALSE);
+            }
+
+            plug_end = o;
+
+            ETW::GCLog::MovedReference(
+                plug_start,
+                plug_end,
+                0,
+                profiling_context,
+                FALSE);
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !large_object_marked(o, FALSE))
+            {
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+    ETW::GCLog::EndMovedReferences(profiling_context);
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#ifdef BACKGROUND_GC
+
+BOOL gc_heap::background_object_marked (uint8_t* o, BOOL clearp)
+{
+    BOOL m = FALSE;
+    if ((o >= background_saved_lowest_address) && (o < background_saved_highest_address))
+    {
+        if (mark_array_marked (o))
+        {
+            if (clearp)
+            {
+                mark_array_clear_marked (o);
+                //dprintf (3, ("mark array bit for object %Ix is cleared", o));
+                dprintf (3, ("CM: %Ix", o));
+            }
+            m = TRUE;
+        }
+        else
+            m = FALSE;
+    }
+    else
+        m = TRUE;
+
+    dprintf (3, ("o %Ix(%d) %s", o, size(o), (m ? "was bm" : "was NOT bm")));
+    return m;
+}
+
+uint8_t* gc_heap::background_next_end (heap_segment* seg, BOOL large_objects_p)
+{
+    return
+        (large_objects_p ? heap_segment_allocated (seg) : heap_segment_background_allocated (seg));
+}
+
+void gc_heap::set_mem_verify (uint8_t* start, uint8_t* end, uint8_t b)
+{
+#ifdef VERIFY_HEAP
+    if (end > start)
+    {
+        if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC) &&
+           !(g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_NO_MEM_FILL))
+        {
+            dprintf (3, ("setting mem to %c [%Ix, [%Ix", b, start, end));
+            memset (start, b, (end - start));
+        }
+    }
+#endif //VERIFY_HEAP
+}
+
+void gc_heap::generation_delete_heap_segment (generation* gen, 
+                                              heap_segment* seg,
+                                              heap_segment* prev_seg,
+                                              heap_segment* next_seg)
+{
+    dprintf (3, ("bgc sweep: deleting seg %Ix", seg));
+    if (gen == large_object_generation)
+    {
+        heap_segment_next (prev_seg) = next_seg;
+
+        dprintf (3, ("Preparing empty large segment %Ix for deletion", (size_t)seg));
+
+        heap_segment_next (seg) = freeable_large_heap_segment;
+        freeable_large_heap_segment = seg;
+    }
+    else
+    {
+        if (seg == ephemeral_heap_segment)
+        {
+            FATAL_GC_ERROR();
+        }
+
+        heap_segment_next (next_seg) = prev_seg;
+
+        dprintf (3, ("Preparing empty small segment %Ix for deletion", (size_t)seg));
+        heap_segment_next (seg) = freeable_small_heap_segment;
+        freeable_small_heap_segment = seg;
+    }
+
+    decommit_heap_segment (seg);
+    seg->flags |= heap_segment_flags_decommitted;
+
+    set_mem_verify (heap_segment_allocated (seg) - plug_skew, heap_segment_used (seg), 0xbb);
+}
+
+void gc_heap::process_background_segment_end (heap_segment* seg, 
+                                          generation* gen,
+                                          uint8_t* last_plug_end,
+                                          heap_segment* start_seg,
+                                          BOOL* delete_p)
+{
+    *delete_p = FALSE;
+    uint8_t* allocated = heap_segment_allocated (seg);
+    uint8_t* background_allocated = heap_segment_background_allocated (seg);
+
+    dprintf (3, ("Processing end of background segment [%Ix, %Ix[(%Ix[)", 
+                (size_t)heap_segment_mem (seg), background_allocated, allocated));
+
+
+    if (allocated != background_allocated)
+    {
+        if (gen == large_object_generation)
+        {
+            FATAL_GC_ERROR();
+        }
+
+        dprintf (3, ("Make a free object before newly promoted objects [%Ix, %Ix[", 
+                    (size_t)last_plug_end, background_allocated));
+        thread_gap (last_plug_end, background_allocated - last_plug_end, generation_of (max_generation));
+
+        fix_brick_to_highest (last_plug_end, background_allocated);
+
+        // When we allowed fgc's during going through gaps, we could have erased the brick
+        // that corresponds to bgc_allocated 'cause we had to update the brick there, 
+        // recover it here.
+        fix_brick_to_highest (background_allocated, background_allocated);
+    }
+    else
+    {
+        // by default, if allocated == background_allocated, it can't
+        // be the ephemeral segment.
+        if (seg == ephemeral_heap_segment)
+        {
+            FATAL_GC_ERROR();
+        }
+
+        if (allocated == heap_segment_mem (seg))
+        {
+            // this can happen with LOH segments when multiple threads
+            // allocate new segments and not all of them were needed to
+            // satisfy allocation requests.
+            assert (gen == large_object_generation);
+        }
+
+        if (last_plug_end == heap_segment_mem (seg))
+        {
+            dprintf (3, ("Segment allocated is %Ix (beginning of this seg) - %s be deleted",
+                        (size_t)allocated, (*delete_p ? "should" : "should not")));
+
+            if (seg != start_seg)
+            {
+                *delete_p = TRUE;
+            }
+        }
+        else
+        {
+            dprintf (3, ("Trimming seg to %Ix[", (size_t)last_plug_end));
+            heap_segment_allocated (seg) = last_plug_end;
+            set_mem_verify (heap_segment_allocated (seg) - plug_skew, heap_segment_used (seg), 0xbb);
+
+            decommit_heap_segment_pages (seg, 0);
+        }
+    }
+
+    dprintf (3, ("verifying seg %Ix's mark array was completely cleared", seg));
+    bgc_verify_mark_array_cleared (seg);
+}
+
+void gc_heap::process_n_background_segments (heap_segment* seg, 
+                                             heap_segment* prev_seg,
+                                             generation* gen)
+{
+    assert (gen != large_object_generation);
+
+    while (seg)
+    {
+        dprintf (2, ("processing seg %Ix (not seen by bgc mark)", seg));
+        heap_segment* next_seg = heap_segment_next (seg);
+
+        if (heap_segment_read_only_p (seg))
+        {
+            prev_seg = seg;
+        }
+        else
+        {
+            if (heap_segment_allocated (seg) == heap_segment_mem (seg))
+            {
+                // This can happen - if we have a LOH segment where nothing survived
+                // or a SOH segment allocated by a gen1 GC when BGC was going where 
+                // nothing survived last time we did a gen1 GC.
+                generation_delete_heap_segment (gen, seg, prev_seg, next_seg);
+            }
+            else
+            {
+                prev_seg = seg;
+            }
+        }
+
+        verify_soh_segment_list();
+        seg = next_seg;
+    }
+}
+
+inline
+BOOL gc_heap::fgc_should_consider_object (uint8_t* o,
+                                          heap_segment* seg,
+                                          BOOL consider_bgc_mark_p, 
+                                          BOOL check_current_sweep_p, 
+                                          BOOL check_saved_sweep_p)
+{
+    // the logic for this function must be kept in sync with the analogous function
+    // in ToolBox\SOS\Strike\gc.cpp
+
+    // TRUE means we don't need to check the bgc mark bit
+    // FALSE means we do.
+    BOOL no_bgc_mark_p = FALSE;
+
+    if (consider_bgc_mark_p)
+    {
+        if (check_current_sweep_p && (o < current_sweep_pos))
+        {
+            dprintf (3, ("no bgc mark - o: %Ix < cs: %Ix", o, current_sweep_pos));
+            no_bgc_mark_p = TRUE;
+        }
+
+        if (!no_bgc_mark_p)
+        {
+            if(check_saved_sweep_p && (o >= saved_sweep_ephemeral_start))
+            {
+                dprintf (3, ("no bgc mark - o: %Ix >= ss: %Ix", o, saved_sweep_ephemeral_start));
+                no_bgc_mark_p = TRUE;
+            }
+
+            if (!check_saved_sweep_p)
+            {
+                uint8_t* background_allocated = heap_segment_background_allocated (seg);
+                // if this was the saved ephemeral segment, check_saved_sweep_p 
+                // would've been true.
+                assert (heap_segment_background_allocated (seg) != saved_sweep_ephemeral_start);
+                // background_allocated could be 0 for the new segments acquired during bgc
+                // sweep and we still want no_bgc_mark_p to be true.
+                if (o >= background_allocated)
+                {
+                    dprintf (3, ("no bgc mark - o: %Ix >= ba: %Ix", o, background_allocated));
+                    no_bgc_mark_p = TRUE;
+                }
+            }
+        }
+    }
+    else
+    {
+        no_bgc_mark_p = TRUE;
+    }
+
+    dprintf (3, ("bgc mark %Ix: %s (bm: %s)", o, (no_bgc_mark_p ? "no" : "yes"), (background_object_marked (o, FALSE) ? "yes" : "no")));
+    return (no_bgc_mark_p ? TRUE : background_object_marked (o, FALSE));
+}
+
+// consider_bgc_mark_p tells you if you need to care about the bgc mark bit at all
+// if it's TRUE, check_current_sweep_p tells you if you should consider the
+// current sweep position or not.
+void gc_heap::should_check_bgc_mark (heap_segment* seg, 
+                                     BOOL* consider_bgc_mark_p, 
+                                     BOOL* check_current_sweep_p,
+                                     BOOL* check_saved_sweep_p)
+{
+    // the logic for this function must be kept in sync with the analogous function
+    // in ToolBox\SOS\Strike\gc.cpp
+    *consider_bgc_mark_p = FALSE;
+    *check_current_sweep_p = FALSE;
+    *check_saved_sweep_p = FALSE;
+
+    if (current_c_gc_state == c_gc_state_planning)
+    {
+        // We are doing the current_sweep_pos comparison here because we have yet to 
+        // turn on the swept flag for the segment but in_range_for_segment will return
+        // FALSE if the address is the same as reserved.
+        if ((seg->flags & heap_segment_flags_swept) || (current_sweep_pos == heap_segment_reserved (seg)))
+        {
+            dprintf (3, ("seg %Ix is already swept by bgc", seg));
+        }
+        else
+        {
+            *consider_bgc_mark_p = TRUE;
+
+            dprintf (3, ("seg %Ix hasn't been swept by bgc", seg));
+
+            if (seg == saved_sweep_ephemeral_seg)
+            {
+                dprintf (3, ("seg %Ix is the saved ephemeral seg", seg));
+                *check_saved_sweep_p = TRUE;
+            }
+
+            if (in_range_for_segment (current_sweep_pos, seg))
+            {
+                dprintf (3, ("current sweep pos is %Ix and within seg %Ix", 
+                              current_sweep_pos, seg));
+                *check_current_sweep_p = TRUE;
+            }
+        }
+    }
+}
+
+void gc_heap::background_ephemeral_sweep()
+{
+    dprintf (3, ("bgc ephemeral sweep"));
+
+    int align_const = get_alignment_constant (TRUE);
+
+    saved_sweep_ephemeral_seg = ephemeral_heap_segment;
+    saved_sweep_ephemeral_start = generation_allocation_start (generation_of (max_generation - 1));
+
+    // Since we don't want to interfere with gen0 allocation while we are threading gen0 free list,
+    // we thread onto a list first then publish it when we are done.
+    allocator youngest_free_list;
+    size_t youngest_free_list_space = 0;
+    size_t youngest_free_obj_space = 0;
+
+    youngest_free_list.clear();
+
+    for (int i = 0; i <= (max_generation - 1); i++)
+    {
+        generation* gen_to_reset = generation_of (i);
+        assert (generation_free_list_space (gen_to_reset) == 0);
+        // Can only assert free_list_space is 0, not free_obj_space as the allocator could have added 
+        // something there.
+    }
+
+    for (int i = (max_generation - 1); i >= 0; i--)
+    {
+        generation* current_gen = generation_of (i);
+        uint8_t* o = generation_allocation_start (current_gen);
+        //Skip the generation gap object
+        o = o + Align(size (o), align_const);
+        uint8_t* end = ((i > 0) ?
+                     generation_allocation_start (generation_of (i - 1)) : 
+                     heap_segment_allocated (ephemeral_heap_segment));
+
+        uint8_t* plug_end = o;
+        uint8_t* plug_start = o;
+        BOOL marked_p = FALSE;
+
+        while (o < end)
+        {
+            marked_p = background_object_marked (o, TRUE);
+            if (marked_p)
+            {
+                plug_start = o;
+                size_t plug_size = plug_start - plug_end;
+
+                if (i >= 1)
+                {
+                    thread_gap (plug_end, plug_size, current_gen);
+                }
+                else
+                {
+                    if (plug_size > 0)
+                    {
+                        make_unused_array (plug_end, plug_size);
+                        if (plug_size >= min_free_list)
+                        {
+                            youngest_free_list_space += plug_size;
+                            youngest_free_list.thread_item (plug_end, plug_size);
+                        }
+                        else
+                        {
+                            youngest_free_obj_space += plug_size;
+                        }
+                    }
+                }
+
+                fix_brick_to_highest (plug_end, plug_start);
+                fix_brick_to_highest (plug_start, plug_start);
+
+                BOOL m = TRUE;
+                while (m)
+                {
+                    o = o + Align (size (o), align_const);
+                    if (o >= end)
+                    {
+                        break;
+                    }
+
+                    m = background_object_marked (o, TRUE);
+                }
+                plug_end = o;
+                dprintf (3, ("bgs: plug [%Ix, %Ix[", (size_t)plug_start, (size_t)plug_end));
+            }
+            else
+            {
+                while ((o < end) && !background_object_marked (o, FALSE))
+                {
+                    o = o + Align (size (o), align_const);
+                }
+            }
+        }
+
+        if (plug_end != end)
+        {
+            if (i >= 1)
+            {
+                thread_gap (plug_end, end - plug_end, current_gen);
+                fix_brick_to_highest (plug_end, end);
+            }
+            else
+            {
+                heap_segment_allocated (ephemeral_heap_segment) = plug_end;
+                // the following line is temporary.
+                heap_segment_saved_bg_allocated (ephemeral_heap_segment) = plug_end;
+#ifdef VERIFY_HEAP
+                if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+                {
+                    make_unused_array (plug_end, (end - plug_end));
+                }
+#endif //VERIFY_HEAP
+            }
+        }
+
+        dd_fragmentation (dynamic_data_of (i)) = 
+            generation_free_list_space (current_gen) + generation_free_obj_space (current_gen);
+    }
+
+    generation* youngest_gen = generation_of (0);
+    generation_free_list_space (youngest_gen) = youngest_free_list_space;
+    generation_free_obj_space (youngest_gen) = youngest_free_obj_space;
+    dd_fragmentation (dynamic_data_of (0)) = youngest_free_list_space + youngest_free_obj_space;
+    generation_allocator (youngest_gen)->copy_with_no_repair (&youngest_free_list);
+}
+
+void gc_heap::background_sweep()
+{
+    Thread* current_thread  = GetThread();
+    generation* gen         = generation_of (max_generation);
+    dynamic_data* dd        = dynamic_data_of (max_generation);
+    // For SOH segments we go backwards.
+    heap_segment* start_seg = ephemeral_heap_segment;
+    PREFIX_ASSUME(start_seg != NULL);
+    heap_segment* fseg      = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+    heap_segment* seg       = start_seg;
+    uint8_t* o                 = heap_segment_mem (seg);
+
+    heap_segment* prev_seg = heap_segment_next (seg);
+    int align_const        = get_alignment_constant (TRUE);
+    if (seg == fseg)
+    {
+        assert (o == generation_allocation_start (generation_of (max_generation)));
+        o = o + Align(size (o), align_const);
+    }
+
+    uint8_t* plug_end      = o;
+    uint8_t* plug_start    = o;
+    next_sweep_obj         = o;
+    current_sweep_pos      = o;
+
+    //uint8_t* end              = background_next_end (seg, (gen == large_object_generation));
+    uint8_t* end              = heap_segment_background_allocated (seg);
+    BOOL delete_p          = FALSE;
+
+    //concurrent_print_time_delta ("finished with mark and start with sweep");
+    concurrent_print_time_delta ("Sw");
+    dprintf (2, ("---- (GC%d)Background Sweep Phase ----", VolatileLoad(&settings.gc_index)));
+
+    //block concurrent allocation for large objects
+    dprintf (3, ("lh state: planning"));
+    if (gc_lh_block_event.IsValid())
+    {
+        gc_lh_block_event.Reset();
+    }
+
+    for (int i = 0; i <= (max_generation + 1); i++)
+    {
+        generation* gen_to_reset = generation_of (i);
+        generation_allocator (gen_to_reset)->clear();
+        generation_free_list_space (gen_to_reset) = 0;
+        generation_free_obj_space (gen_to_reset) = 0;
+        generation_free_list_allocated (gen_to_reset) = 0;
+        generation_end_seg_allocated (gen_to_reset) = 0;
+        generation_condemned_allocated (gen_to_reset) = 0; 
+        //reset the allocation so foreground gc can allocate into older generation
+        generation_allocation_pointer (gen_to_reset)= 0;
+        generation_allocation_limit (gen_to_reset) = 0;
+        generation_allocation_segment (gen_to_reset) = heap_segment_rw (generation_start_segment (gen_to_reset));
+    }
+
+    fire_bgc_event (BGC2ndNonConEnd);
+
+    current_bgc_state = bgc_sweep_soh;
+    verify_soh_segment_list();
+
+#ifdef FEATURE_BASICFREEZE
+    if ((generation_start_segment (gen) != ephemeral_heap_segment) &&
+        ro_segments_in_range)
+    {
+        sweep_ro_segments (generation_start_segment (gen));
+    }
+#endif // FEATURE_BASICFREEZE
+
+    //TODO BACKGROUND_GC: can we move this to where we switch to the LOH?
+    if (current_c_gc_state != c_gc_state_planning)
+    {
+        current_c_gc_state = c_gc_state_planning;
+    }
+
+    concurrent_print_time_delta ("Swe");
+
+    heap_segment* loh_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation + 1)));
+    PREFIX_ASSUME(loh_seg  != NULL);
+    while (loh_seg )
+    {
+        loh_seg->flags &= ~heap_segment_flags_swept;
+        heap_segment_background_allocated (loh_seg) = heap_segment_allocated (loh_seg);
+        loh_seg = heap_segment_next_rw (loh_seg);
+    }
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_restart_ee);
+    if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS 
+    {
+#ifdef MULTIPLE_HEAPS
+        dprintf(2, ("Starting BGC threads for resuming EE"));
+        bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    if (heap_number == 0)
+    {
+        restart_EE ();
+    }
+
+    fire_bgc_event (BGC2ndConBegin);
+
+    background_ephemeral_sweep();
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_after_ephemeral_sweep);
+    if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+#ifdef FEATURE_EVENT_TRACE
+        bgc_heap_walk_for_etw_p = ETW::GCLog::ShouldTrackMovementForEtw();
+#endif //FEATURE_EVENT_TRACE
+
+        leave_spin_lock (&gc_lock);
+
+#ifdef MULTIPLE_HEAPS
+        dprintf(2, ("Starting BGC threads for BGC sweeping"));
+        bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    disable_preemptive (current_thread, TRUE);
+
+    dprintf (2, ("bgs: sweeping gen2 objects"));
+    dprintf (2, ("bgs: seg: %Ix, [%Ix, %Ix[%Ix", (size_t)seg,
+                    (size_t)heap_segment_mem (seg),
+                    (size_t)heap_segment_allocated (seg),
+                    (size_t)heap_segment_background_allocated (seg)));
+
+    int num_objs = 256;
+    int current_num_objs = 0;
+    heap_segment* next_seg = 0;
+
+    while (1)
+    {
+        if (o >= end)
+        {
+            if (gen == large_object_generation)
+            {
+                next_seg = heap_segment_next (seg);
+            }
+            else
+            {
+                next_seg = heap_segment_prev (fseg, seg);
+            }
+
+            delete_p = FALSE;
+
+            if (!heap_segment_read_only_p (seg))
+            {
+                if (gen == large_object_generation)
+                {
+                    // we can treat all LOH segments as in the bgc domain
+                    // regardless of whether we saw in bgc mark or not
+                    // because we don't allow LOH allocations during bgc
+                    // sweep anyway - the LOH segments can't change.
+                    process_background_segment_end (seg, gen, plug_end, 
+                                                    start_seg, &delete_p);
+                }
+                else
+                {
+                    assert (heap_segment_background_allocated (seg) != 0);
+                    process_background_segment_end (seg, gen, plug_end, 
+                                                    start_seg, &delete_p);
+
+                    assert (next_seg || !delete_p);
+                }
+            }
+
+            if (delete_p)
+            {
+                generation_delete_heap_segment (gen, seg, prev_seg, next_seg);
+            }
+            else
+            {
+                prev_seg = seg;
+                dprintf (2, ("seg %Ix has been swept", seg));
+                seg->flags |= heap_segment_flags_swept;
+            }
+
+            verify_soh_segment_list();
+
+            seg = next_seg;
+
+            dprintf (GTC_LOG, ("seg: %Ix, next_seg: %Ix, prev_seg: %Ix", seg, next_seg, prev_seg));
+            
+            if (seg == 0)
+            {
+                generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
+
+                PREFIX_ASSUME(generation_allocation_segment(gen) != NULL);
+
+                if (gen != large_object_generation)
+                {
+                    dprintf (2, ("bgs: sweeping gen3 objects"));
+                    current_bgc_state = bgc_sweep_loh;
+                    gen = generation_of (max_generation+1);
+                    start_seg = heap_segment_rw (generation_start_segment (gen));
+
+                    PREFIX_ASSUME(start_seg != NULL);
+
+                    seg = start_seg;
+                    prev_seg = 0;
+                    o = generation_allocation_start (gen);
+                    assert (method_table (o) == g_pFreeObjectMethodTable);
+                    align_const = get_alignment_constant (FALSE);
+                    o = o + Align(size (o), align_const);
+                    plug_end = o;
+                    end = heap_segment_allocated (seg);
+                    dprintf (2, ("sweeping gen3 objects"));
+                    generation_free_obj_space (gen) = 0;
+                    generation_allocator (gen)->clear();
+                    generation_free_list_space (gen) = 0;
+
+                    dprintf (2, ("bgs: seg: %Ix, [%Ix, %Ix[%Ix", (size_t)seg,
+                                    (size_t)heap_segment_mem (seg),
+                                    (size_t)heap_segment_allocated (seg),
+                                    (size_t)heap_segment_background_allocated (seg)));
+                }
+                else
+                    break;
+            }
+            else
+            {
+                o = heap_segment_mem (seg);
+                if (seg == fseg)
+                {
+                    assert (gen != large_object_generation);
+                    assert (o == generation_allocation_start (generation_of (max_generation)));
+                    align_const = get_alignment_constant (TRUE);
+                    o = o + Align(size (o), align_const);
+                }
+
+                plug_end = o;
+                current_sweep_pos = o;
+                next_sweep_obj = o;
+                
+                allow_fgc();
+                end = background_next_end (seg, (gen == large_object_generation));
+                dprintf (2, ("bgs: seg: %Ix, [%Ix, %Ix[%Ix", (size_t)seg,
+                                (size_t)heap_segment_mem (seg),
+                                (size_t)heap_segment_allocated (seg),
+                                (size_t)heap_segment_background_allocated (seg)));
+            }
+        }
+
+        if ((o < end) && background_object_marked (o, TRUE))
+        {
+            plug_start = o;
+            if (gen == large_object_generation)
+            {
+                dprintf (2, ("loh fr: [%Ix-%Ix[(%Id)", plug_end, plug_start, plug_start-plug_end));
+            }
+
+            thread_gap (plug_end, plug_start-plug_end, gen);
+            if (gen != large_object_generation)
+            {
+                add_gen_free (max_generation, plug_start-plug_end);
+                fix_brick_to_highest (plug_end, plug_start);
+                // we need to fix the brick for the next plug here 'cause an FGC can
+                // happen and can't read a stale brick.
+                fix_brick_to_highest (plug_start, plug_start);
+            }
+
+            BOOL m = TRUE;
+
+            while (m)
+            {
+                next_sweep_obj = o + Align(size (o), align_const);
+                current_num_objs++;
+                if (current_num_objs >= num_objs)
+                {
+                    current_sweep_pos = next_sweep_obj;
+
+                    allow_fgc();
+                    current_num_objs = 0;
+                }
+
+                o = next_sweep_obj;
+                if (o >= end)
+                {
+                    break;
+                }
+
+                m = background_object_marked (o, TRUE);
+            }
+            plug_end = o;
+            if (gen != large_object_generation)
+            {
+                add_gen_plug (max_generation, plug_end-plug_start);
+                dd_survived_size (dd) += (plug_end - plug_start);
+            }
+            dprintf (3, ("bgs: plug [%Ix, %Ix[", (size_t)plug_start, (size_t)plug_end));
+        }
+        else
+        {
+            while ((o < end) && !background_object_marked (o, FALSE))
+            {
+                next_sweep_obj = o + Align(size (o), align_const);;
+                current_num_objs++;
+                if (current_num_objs >= num_objs)
+                {
+                    current_sweep_pos = plug_end;
+                    dprintf (1234, ("f: swept till %Ix", current_sweep_pos));
+                    allow_fgc();
+                    current_num_objs = 0;
+                }
+
+                o = next_sweep_obj;
+            }
+        }
+    }
+
+    size_t total_loh_size = generation_size (max_generation + 1);
+    size_t total_soh_size = generation_sizes (generation_of (max_generation));
+
+    dprintf (GTC_LOG, ("h%d: S: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+
+    dprintf (GTC_LOG, ("end of bgc sweep: gen2 FL: %Id, FO: %Id", 
+        generation_free_list_space (generation_of (max_generation)),
+        generation_free_obj_space (generation_of (max_generation))));
+    dprintf (GTC_LOG, ("h%d: end of bgc sweep: gen3 FL: %Id, FO: %Id", 
+        heap_number,
+        generation_free_list_space (generation_of (max_generation + 1)),
+        generation_free_obj_space (generation_of (max_generation + 1))));
+
+    fire_bgc_event (BGC2ndConEnd);
+    concurrent_print_time_delta ("background sweep");
+    
+    heap_segment* reset_seg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
+    PREFIX_ASSUME(reset_seg != NULL);
+
+    while (reset_seg)
+    {
+        heap_segment_saved_bg_allocated (reset_seg) = heap_segment_background_allocated (reset_seg);
+        heap_segment_background_allocated (reset_seg) = 0;
+        reset_seg = heap_segment_next_rw (reset_seg);
+    }
+
+    // We calculate dynamic data here because if we wait till we signal the lh event, 
+    // the allocation thread can change the fragmentation and we may read an intermediate
+    // value (which can be greater than the generation size). Plus by that time it won't 
+    // be accurate.
+    compute_new_dynamic_data (max_generation);
+
+    enable_preemptive (current_thread);
+
+#ifdef MULTIPLE_HEAPS
+    bgc_t_join.join(this, gc_join_set_state_free);
+    if (bgc_t_join.joined())
+#endif //MULTIPLE_HEAPS
+    {
+        // TODO: We are using this join just to set the state. Should
+        // look into eliminating it - check to make sure things that use 
+        // this state can live with per heap state like should_check_bgc_mark.
+        current_c_gc_state = c_gc_state_free;
+
+#ifdef MULTIPLE_HEAPS
+        dprintf(2, ("Starting BGC threads after background sweep phase"));
+        bgc_t_join.restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+    disable_preemptive (current_thread, TRUE);
+
+    if (gc_lh_block_event.IsValid())
+    {
+        gc_lh_block_event.Set();
+    }
+
+    //dprintf (GTC_LOG, ("---- (GC%d)End Background Sweep Phase ----", VolatileLoad(&settings.gc_index)));
+    dprintf (GTC_LOG, ("---- (GC%d)ESw ----", VolatileLoad(&settings.gc_index)));
+}
+#endif //BACKGROUND_GC
+
+void gc_heap::sweep_large_objects ()
+{
+    //this min value is for the sake of the dynamic tuning.
+    //so we know that we are not starting even if we have no
+    //survivors.
+    generation* gen        = large_object_generation;
+    heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
+
+    PREFIX_ASSUME(start_seg != NULL);
+
+    heap_segment* seg      = start_seg;
+    heap_segment* prev_seg = 0;
+    uint8_t* o             = generation_allocation_start (gen);
+    int align_const        = get_alignment_constant (FALSE);
+
+    //Skip the generation gap object
+    o = o + Align(size (o), align_const);
+
+    uint8_t* plug_end         = o;
+    uint8_t* plug_start       = o;
+
+    generation_allocator (gen)->clear();
+    generation_free_list_space (gen) = 0;
+    generation_free_obj_space (gen) = 0;
+
+
+    dprintf (3, ("sweeping large objects"));
+    dprintf (3, ("seg: %Ix, [%Ix, %Ix[, starting from %Ix", 
+                 (size_t)seg,
+                 (size_t)heap_segment_mem (seg),
+                 (size_t)heap_segment_allocated (seg),
+                 o));
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            heap_segment* next_seg = heap_segment_next (seg);
+            //delete the empty segment if not the only one
+            if ((plug_end == heap_segment_mem (seg)) &&
+                (seg != start_seg) && !heap_segment_read_only_p (seg))
+            {
+                //prepare for deletion
+                dprintf (3, ("Preparing empty large segment %Ix", (size_t)seg));
+                assert (prev_seg);
+                heap_segment_next (prev_seg) = next_seg;
+                heap_segment_next (seg) = freeable_large_heap_segment;
+                freeable_large_heap_segment = seg;
+            }
+            else
+            {
+                if (!heap_segment_read_only_p (seg))
+                {
+                    dprintf (3, ("Trimming seg to %Ix[", (size_t)plug_end));
+                    heap_segment_allocated (seg) = plug_end;
+                    decommit_heap_segment_pages (seg, 0);
+                }
+                prev_seg = seg;
+            }
+            seg = next_seg;
+            if (seg == 0)
+                break;
+            else
+            {
+                o = heap_segment_mem (seg);
+                plug_end = o;
+                dprintf (3, ("seg: %Ix, [%Ix, %Ix[", (size_t)seg,
+                             (size_t)heap_segment_mem (seg),
+                             (size_t)heap_segment_allocated (seg)));
+            }
+        }
+        if (large_object_marked(o, TRUE))
+        {
+            plug_start = o;
+            //everything between plug_end and plug_start is free
+            thread_gap (plug_end, plug_start-plug_end, gen);
+
+            BOOL m = TRUE;
+            while (m)
+            {
+                o = o + AlignQword (size (o));
+                if (o >= heap_segment_allocated (seg))
+                {
+                    break;
+                }
+                m = large_object_marked (o, TRUE);
+            }
+            plug_end = o;
+            dprintf (3, ("plug [%Ix, %Ix[", (size_t)plug_start, (size_t)plug_end));
+        }
+        else
+        {
+            while (o < heap_segment_allocated (seg) && !large_object_marked(o, FALSE))
+            {
+                o = o + AlignQword (size (o));
+            }
+        }
+    }
+
+    generation_allocation_segment (gen) = heap_segment_rw (generation_start_segment (gen));
+
+    PREFIX_ASSUME(generation_allocation_segment(gen) != NULL);
+}
+
+void gc_heap::relocate_in_large_objects ()
+{
+    relocate_args args;
+    args.low = gc_low;
+    args.high = gc_high;
+    args.last_plug = 0;
+
+    generation* gen = large_object_generation;
+
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    uint8_t* o = generation_allocation_start (gen);
+
+    while (1)
+    {
+        if (o >= heap_segment_allocated (seg))
+        {
+            seg = heap_segment_next_rw (seg);
+            if (seg == 0)
+                break;
+            else
+            {
+                o = heap_segment_mem (seg);
+            }
+        }
+        while (o < heap_segment_allocated (seg))
+        {
+            check_class_object_demotion (o);
+            if (contain_pointers (o))
+            {
+                dprintf(3, ("Relocating through large object %Ix", (size_t)o));
+                go_through_object_nostart (method_table (o), o, size(o), pval,
+                        {
+                            reloc_survivor_helper (pval);
+                        });
+            }
+            o = o + AlignQword (size (o));
+        }
+    }
+}
+
+void gc_heap::mark_through_cards_for_large_objects (card_fn fn,
+                                                    BOOL relocating)
+{
+    uint8_t*      low               = gc_low;
+    size_t        end_card          = 0;
+    generation*   oldest_gen        = generation_of (max_generation+1);
+    heap_segment* seg               = heap_segment_rw (generation_start_segment (oldest_gen));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    uint8_t*      beg               = generation_allocation_start (oldest_gen);
+    uint8_t*      end               = heap_segment_allocated (seg);
+
+    size_t  cg_pointers_found = 0;
+
+    size_t  card_word_end = (card_of (align_on_card_word (end)) /
+                             card_word_width);
+
+    size_t      n_eph             = 0;
+    size_t      n_gen             = 0;
+    size_t      n_card_set        = 0;
+    uint8_t*    next_boundary = (relocating ?
+                              generation_plan_allocation_start (generation_of (max_generation -1)) :
+                              ephemeral_low);
+
+    uint8_t*    nhigh         = (relocating ?
+                              heap_segment_plan_allocated (ephemeral_heap_segment) :
+                              ephemeral_high);
+
+    BOOL          foundp            = FALSE;
+    uint8_t*      start_address     = 0;
+    uint8_t*      limit             = 0;
+    size_t        card              = card_of (beg);
+    uint8_t*      o                 = beg;
+#ifdef BACKGROUND_GC
+    BOOL consider_bgc_mark_p        = FALSE;
+    BOOL check_current_sweep_p      = FALSE;
+    BOOL check_saved_sweep_p        = FALSE;
+    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+
+    size_t total_cards_cleared = 0;
+
+    //dprintf(3,( "scanning large objects from %Ix to %Ix", (size_t)beg, (size_t)end));
+    dprintf(3, ("CMl: %Ix->%Ix", (size_t)beg, (size_t)end));
+    while (1)
+    {
+        if ((o < end) && (card_of(o) > card))
+        {
+            dprintf (3, ("Found %Id cg pointers", cg_pointers_found));
+            if (cg_pointers_found == 0)
+            {
+                dprintf(3,(" Clearing cards [%Ix, %Ix[ ", (size_t)card_address(card), (size_t)o));
+                clear_cards (card, card_of((uint8_t*)o));
+                total_cards_cleared += (card_of((uint8_t*)o) - card);
+            }
+            n_eph +=cg_pointers_found;
+            cg_pointers_found = 0;
+            card = card_of ((uint8_t*)o);
+        }
+        if ((o < end) &&(card >= end_card))
+        {
+            foundp = find_card (card_table, card, card_word_end, end_card);
+            if (foundp)
+            {
+                n_card_set+= end_card - card;
+                start_address = max (beg, card_address (card));
+            }
+            limit = min (end, card_address (end_card));
+        }
+        if ((!foundp) || (o >= end) || (card_address (card) >= end))
+        {
+            if ((foundp) && (cg_pointers_found == 0))
+            {
+                dprintf(3,(" Clearing cards [%Ix, %Ix[ ", (size_t)card_address(card),
+                           (size_t)card_address(card+1)));
+                clear_cards (card, card+1);
+                total_cards_cleared += 1;
+            }
+            n_eph +=cg_pointers_found;
+            cg_pointers_found = 0;
+            if ((seg = heap_segment_next_rw (seg)) != 0)
+            {
+#ifdef BACKGROUND_GC
+                should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+                beg = heap_segment_mem (seg);
+                end = compute_next_end (seg, low);
+                card_word_end = card_of (align_on_card_word (end)) / card_word_width;
+                card = card_of (beg);
+                o  = beg;
+                end_card = 0;
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        assert (card_set_p (card));
+        {
+            dprintf(3,("card %Ix: o: %Ix, l: %Ix[ ",
+                       card, (size_t)o, (size_t)limit));
+
+            assert (Align (size (o)) >= Align (min_obj_size));
+            size_t s = size (o);
+            uint8_t* next_o =  o + AlignQword (s);
+            Prefetch (next_o);
+
+            while (o < limit)
+            {
+                s = size (o);
+                assert (Align (s) >= Align (min_obj_size));
+                next_o =  o + AlignQword (s);
+                Prefetch (next_o);
+
+                dprintf (4, ("|%Ix|", (size_t)o));
+                if (next_o < start_address)
+                {
+                    goto end_object;
+                }
+
+#ifdef BACKGROUND_GC
+                if (!fgc_should_consider_object (o, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p))
+                {
+                    goto end_object;
+                }
+#endif //BACKGROUND_GC
+
+#ifdef COLLECTIBLE_CLASS
+                if (is_collectible(o))
+                {
+                    BOOL passed_end_card_p = FALSE;
+
+                    if (card_of (o) > card)
+                    {
+                        passed_end_card_p = card_transition (o, end, card_word_end,
+                            cg_pointers_found, 
+                            n_eph, n_card_set,
+                            card, end_card,
+                            foundp, start_address,
+                            limit, total_cards_cleared);
+                    }
+
+                    if ((!passed_end_card_p || foundp) && (card_of (o) == card))
+                    {
+                        // card is valid and it covers the head of the object
+                        if (fn == &gc_heap::relocate_address)
+                        {
+                            keep_card_live (o, n_gen, cg_pointers_found);
+                        }
+                        else
+                        {
+                            uint8_t* class_obj = get_class_object (o);
+                            mark_through_cards_helper (&class_obj, n_gen,
+                                                    cg_pointers_found, fn,
+                                                    nhigh, next_boundary);
+                        }
+                    }
+
+                    if (passed_end_card_p)
+                    {
+                        if (foundp && (card_address (card) < next_o))
+                        {
+                            goto go_through_refs;
+                        }
+                        else 
+                        {
+                            goto end_object;
+                        }
+                    }
+                }
+
+go_through_refs:
+#endif //COLLECTIBLE_CLASS
+
+                if (contain_pointers (o))
+                {
+                    dprintf(3,("Going through %Ix", (size_t)o));
+
+                    go_through_object (method_table(o), o, s, poo,
+                                       start_address, use_start, (o + s),
+                       {
+                           if (card_of ((uint8_t*)poo) > card)
+                           {
+                                BOOL passed_end_card_p  = card_transition ((uint8_t*)poo, end,
+                                        card_word_end,
+                                        cg_pointers_found, 
+                                        n_eph, n_card_set,
+                                        card, end_card,
+                                        foundp, start_address,
+                                        limit, total_cards_cleared);
+
+                                if (passed_end_card_p)
+                                {
+                                    if (foundp && (card_address (card) < next_o))
+                                    {
+                                        //new_start();
+                                        {
+                                            if (ppstop <= (uint8_t**)start_address)
+                                            {break;}
+                                            else if (poo < (uint8_t**)start_address)
+                                            {poo = (uint8_t**)start_address;}
+                                        }
+                                    }
+                                    else
+                                    {
+                                        goto end_object;
+                                    }
+                                }
+                            }
+
+                           mark_through_cards_helper (poo, n_gen,
+                                                      cg_pointers_found, fn,
+                                                      nhigh, next_boundary);
+                       }
+                        );
+                }
+
+            end_object:
+                o = next_o;
+            }
+
+        }
+    }
+
+    // compute the efficiency ratio of the card table
+    if (!relocating)
+    {
+        generation_skip_ratio = min (((n_eph > 800) ?
+                                      (int)(((float)n_gen / (float)n_eph) * 100) : 100),
+                                     generation_skip_ratio);
+
+        dprintf (3, ("Mloh: cross: %Id, useful: %Id, cards cleared: %Id, cards set: %Id, ratio: %d", 
+             n_eph, n_gen, total_cards_cleared, n_card_set, generation_skip_ratio));
+    }
+    else
+    {
+        dprintf (3, ("R: Mloh: cross: %Id, useful: %Id, cards set: %Id, ratio: %d", 
+             n_eph, n_gen, n_card_set, generation_skip_ratio));
+    }
+}
+
+void gc_heap::descr_segment (heap_segment* seg )
+{
+#ifdef TRACE_GC
+    uint8_t*  x = heap_segment_mem (seg);
+    while (x < heap_segment_allocated (seg))
+    {
+        dprintf(2, ( "%Ix: %d ", (size_t)x, size (x)));
+        x = x + Align(size (x));
+    }
+#else // TRACE_GC
+    UNREFERENCED_PARAMETER(seg);
+#endif // TRACE_GC
+}
+
+void gc_heap::descr_card_table ()
+{
+#ifdef TRACE_GC
+    if (trace_gc && (print_level >= 4))
+    {
+        ptrdiff_t  min = -1;
+        dprintf(3,("Card Table set at: "));
+        for (size_t i = card_of (lowest_address); i < card_of (highest_address); i++)
+        {
+            if (card_set_p (i))
+            {
+                if (min == -1)
+                {
+                    min = i;
+                }
+            }
+            else
+            {
+                if (! ((min == -1)))
+                {
+                    dprintf (3,("[%Ix %Ix[, ",
+                            (size_t)card_address (min), (size_t)card_address (i)));
+                    min = -1;
+                }
+            }
+        }
+    }
+#endif //TRACE_GC
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void gc_heap::descr_generations_to_profiler (gen_walk_fn fn, void *context)
+{
+#ifdef MULTIPLE_HEAPS
+    int n_heaps = GCHeap::GetGCHeap()->GetNumberOfHeaps ();
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* hp = GCHeap::GetHeap(i)->pGenGCHeap;
+#else //MULTIPLE_HEAPS
+    {
+        gc_heap* hp = NULL;
+#ifdef _PREFAST_
+        // prefix complains about us dereferencing hp in wks build even though we only access static members
+        // this way. not sure how to shut it up except for this ugly workaround:
+        PREFIX_ASSUME(hp != NULL);
+#endif // _PREFAST_
+#endif //MULTIPLE_HEAPS
+
+        int curr_gen_number0 = max_generation+1;
+        while (curr_gen_number0 >= 0)
+        {
+            generation* gen = hp->generation_of (curr_gen_number0);
+            heap_segment* seg = generation_start_segment (gen);
+            while (seg && (seg != hp->ephemeral_heap_segment))
+            {
+                assert (curr_gen_number0 > 0);
+
+                // report bounds from heap_segment_mem (seg) to
+                // heap_segment_allocated (seg);
+                // for generation # curr_gen_number0
+                // for heap # heap_no
+
+                fn(context, curr_gen_number0, heap_segment_mem (seg),
+                                              heap_segment_allocated (seg),
+                                              curr_gen_number0 == max_generation+1 ? heap_segment_reserved (seg) : heap_segment_allocated (seg));
+
+                seg = heap_segment_next (seg);
+            }
+            if (seg)
+            {
+                assert (seg == hp->ephemeral_heap_segment);
+                assert (curr_gen_number0 <= max_generation);
+                //
+                if (curr_gen_number0 == max_generation)
+                {
+                    if (heap_segment_mem (seg) < generation_allocation_start (hp->generation_of (max_generation-1)))
+                    {
+                        // report bounds from heap_segment_mem (seg) to
+                        // generation_allocation_start (generation_of (max_generation-1))
+                        // for heap # heap_number
+
+                        fn(context, curr_gen_number0, heap_segment_mem (seg),
+                                                      generation_allocation_start (hp->generation_of (max_generation-1)),
+                                                      generation_allocation_start (hp->generation_of (max_generation-1)) );
+                    }
+                }
+                else if (curr_gen_number0 != 0)
+                {
+                    //report bounds from generation_allocation_start (generation_of (curr_gen_number0))
+                    // to generation_allocation_start (generation_of (curr_gen_number0-1))
+                    // for heap # heap_number
+
+                    fn(context, curr_gen_number0, generation_allocation_start (hp->generation_of (curr_gen_number0)),
+                                                  generation_allocation_start (hp->generation_of (curr_gen_number0-1)),
+                                                  generation_allocation_start (hp->generation_of (curr_gen_number0-1)));
+                }
+                else
+                {
+                    //report bounds from generation_allocation_start (generation_of (curr_gen_number0))
+                    // to heap_segment_allocated (ephemeral_heap_segment);
+                    // for heap # heap_number
+
+                    fn(context, curr_gen_number0, generation_allocation_start (hp->generation_of (curr_gen_number0)),
+                                                  heap_segment_allocated (hp->ephemeral_heap_segment),
+                                                  heap_segment_reserved (hp->ephemeral_heap_segment) );
+                }
+            }
+            curr_gen_number0--;
+        }
+    }
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#ifdef TRACE_GC
+// Note that when logging is on it can take a long time to go through the free items.
+void gc_heap::print_free_list (int gen, heap_segment* seg)
+{
+    UNREFERENCED_PARAMETER(gen);
+    UNREFERENCED_PARAMETER(seg);
+/*
+    if (settings.concurrent == FALSE)
+    {
+        uint8_t* seg_start = heap_segment_mem (seg);
+        uint8_t* seg_end = heap_segment_allocated (seg);
+
+        dprintf (3, ("Free list in seg %Ix:", seg_start));
+
+        size_t total_free_item = 0;
+
+        allocator* gen_allocator = generation_allocator (generation_of (gen));
+        for (unsigned int b = 0; b < gen_allocator->number_of_buckets(); b++)
+        {
+            uint8_t* fo = gen_allocator->alloc_list_head_of (b);
+            while (fo)
+            {
+                if (fo >= seg_start && fo < seg_end)
+                {
+                    total_free_item++;
+
+                    size_t free_item_len = size(fo);
+
+                    dprintf (3, ("[%Ix, %Ix[:%Id",
+                                 (size_t)fo,
+                                 (size_t)(fo + free_item_len),
+                                 free_item_len));
+                }
+
+                fo = free_list_slot (fo);
+            }
+        }
+
+        dprintf (3, ("total %Id free items", total_free_item));
+    }
+*/
+}
+#endif //TRACE_GC
+
+void gc_heap::descr_generations (BOOL begin_gc_p)
+{
+    UNREFERENCED_PARAMETER(begin_gc_p);
+#ifdef STRESS_LOG
+    if (StressLog::StressLogOn(LF_GC, LL_INFO10))
+    {
+        gc_heap* hp = 0;
+#ifdef MULTIPLE_HEAPS
+        hp= this;
+#endif //MULTIPLE_HEAPS
+
+        STRESS_LOG1(LF_GC, LL_INFO10, "GC Heap %p\n", hp);
+        for (int n = max_generation; n >= 0; --n)
+        {
+            STRESS_LOG4(LF_GC, LL_INFO10, "    Generation %d [%p, %p] cur = %p\n",
+                    n,
+                    generation_allocation_start(generation_of(n)),
+                    generation_allocation_limit(generation_of(n)),
+                    generation_allocation_pointer(generation_of(n)));
+
+            heap_segment* seg = generation_start_segment(generation_of(n));
+            while (seg)
+            {
+                STRESS_LOG4(LF_GC, LL_INFO10, "        Segment mem %p alloc = %p used %p committed %p\n",
+                        heap_segment_mem(seg),
+                        heap_segment_allocated(seg),
+                        heap_segment_used(seg),
+                        heap_segment_committed(seg));
+                seg = heap_segment_next(seg);
+            }
+        }
+    }
+#endif  // STRESS_LOG
+
+#ifdef TRACE_GC
+    dprintf (2, ("lowest_address: %Ix highest_address: %Ix",
+             (size_t) lowest_address, (size_t) highest_address));
+#ifdef BACKGROUND_GC
+    dprintf (2, ("bgc lowest_address: %Ix bgc highest_address: %Ix",
+             (size_t) background_saved_lowest_address, (size_t) background_saved_highest_address));
+#endif //BACKGROUND_GC
+
+    if (heap_number == 0)
+    {
+        dprintf (1, ("total heap size: %Id, commit size: %Id", get_total_heap_size(), get_total_committed_size()));
+    }
+
+    int curr_gen_number = max_generation+1;
+    while (curr_gen_number >= 0)
+    {
+        size_t total_gen_size = generation_size (curr_gen_number);
+#ifdef SIMPLE_DPRINTF
+        dprintf (GTC_LOG, ("[%s][g%d]gen %d:, size: %Id, frag: %Id(L: %Id, O: %Id), f: %d%% %s %s %s",
+                      (begin_gc_p ? "BEG" : "END"),
+                      settings.condemned_generation,
+                      curr_gen_number,
+                      total_gen_size,
+                      dd_fragmentation (dynamic_data_of (curr_gen_number)),
+                      generation_free_list_space (generation_of (curr_gen_number)),
+                      generation_free_obj_space (generation_of (curr_gen_number)),
+                      (total_gen_size ? 
+                        (int)(((double)dd_fragmentation (dynamic_data_of (curr_gen_number)) / (double)total_gen_size) * 100) :
+                        0),
+                      (begin_gc_p ? ("") : (settings.compaction ? "(compact)" : "(sweep)")),
+                      (settings.heap_expansion ? "(EX)" : " "),
+                      (settings.promotion ? "Promotion" : "NoPromotion")));
+#else
+        dprintf (2, ( "Generation %d: gap size: %d, generation size: %Id, fragmentation: %Id",
+                      curr_gen_number,
+                      size (generation_allocation_start (generation_of (curr_gen_number))),
+                      total_gen_size,
+                      dd_fragmentation (dynamic_data_of (curr_gen_number))));
+#endif //SIMPLE_DPRINTF
+
+        generation* gen = generation_of (curr_gen_number);
+        heap_segment* seg = generation_start_segment (gen);
+        while (seg && (seg != ephemeral_heap_segment))
+        {
+            dprintf (GTC_LOG, ("g%d: [%Ix %Ix[-%Ix[ (%Id) (%Id)",
+                        curr_gen_number,
+                        (size_t)heap_segment_mem (seg),
+                        (size_t)heap_segment_allocated (seg),
+                        (size_t)heap_segment_committed (seg),
+                        (size_t)(heap_segment_allocated (seg) - heap_segment_mem (seg)),
+                        (size_t)(heap_segment_committed (seg) - heap_segment_allocated (seg))));
+            print_free_list (curr_gen_number, seg);
+            seg = heap_segment_next (seg);
+        }
+        if (seg && (seg != generation_start_segment (gen)))
+        {
+            dprintf (GTC_LOG, ("g%d: [%Ix %Ix[",
+                         curr_gen_number,
+                         (size_t)heap_segment_mem (seg),
+                         (size_t)generation_allocation_start (generation_of (curr_gen_number-1))));
+            print_free_list (curr_gen_number, seg);
+
+        }
+        else if (seg)
+        {
+            dprintf (GTC_LOG, ("g%d: [%Ix %Ix[",
+                         curr_gen_number,
+                         (size_t)generation_allocation_start (generation_of (curr_gen_number)),
+                         (size_t)(((curr_gen_number == 0)) ?
+                                  (heap_segment_allocated
+                                   (generation_start_segment
+                                    (generation_of (curr_gen_number)))) :
+                                  (generation_allocation_start
+                                   (generation_of (curr_gen_number - 1))))
+                         ));
+            print_free_list (curr_gen_number, seg);
+        }
+        curr_gen_number--;
+    }
+
+#endif //TRACE_GC
+}
+
+#undef TRACE_GC
+
+//#define TRACE_GC
+
+//-----------------------------------------------------------------------------
+//
+//                                  VM Specific support
+//
+//-----------------------------------------------------------------------------
+
+
+#ifdef TRACE_GC
+
+ unsigned int PromotedObjectCount  = 0;
+ unsigned int CreatedObjectCount       = 0;
+ unsigned int AllocDuration            = 0;
+ unsigned int AllocCount               = 0;
+ unsigned int AllocBigCount            = 0;
+ unsigned int AllocSmallCount      = 0;
+ unsigned int AllocStart             = 0;
+#endif //TRACE_GC
+
+//Static member variables.
+VOLATILE(BOOL)    GCHeap::GcInProgress            = FALSE;
+//GCTODO
+//CMCSafeLock*      GCHeap::fGcLock;
+CLREvent            *GCHeap::WaitForGCEvent         = NULL;
+//GCTODO
+#ifdef TRACE_GC
+unsigned int       GCHeap::GcDuration;
+#endif //TRACE_GC
+unsigned            GCHeap::GcCondemnedGeneration   = 0;
+size_t              GCHeap::totalSurvivedSize       = 0;
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+CFinalize*          GCHeap::m_Finalize              = 0;
+BOOL                GCHeap::GcCollectClasses        = FALSE;
+VOLATILE(int32_t)      GCHeap::m_GCFLock               = 0;
+
+#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
+#ifdef STRESS_HEAP
+#ifdef BACKGROUND_GC
+int                 GCHeap::gc_stress_fgcs_in_bgc   = 0;
+#endif // BACKGROUND_GC
+#ifndef MULTIPLE_HEAPS
+OBJECTHANDLE        GCHeap::m_StressObjs[NUM_HEAP_STRESS_OBJS];
+int                 GCHeap::m_CurStressObj          = 0;
+#endif // !MULTIPLE_HEAPS
+#endif // STRESS_HEAP
+#endif // FEATURE_REDHAWK
+
+#endif //FEATURE_PREMORTEM_FINALIZATION
+inline
+static void spin_lock ()
+{
+    enter_spin_lock_noinstru (&m_GCLock);
+}
+
+inline
+void EnterAllocLock()
+{
+#if defined(_TARGET_X86_)
+    __asm {
+        inc dword ptr m_GCLock
+        jz gotit
+        call spin_lock
+            gotit:
+    }
+#else //_TARGET_X86_
+    spin_lock();
+#endif //_TARGET_X86_
+}
+
+inline
+void LeaveAllocLock()
+{
+    // Trick this out
+    leave_spin_lock_noinstru (&m_GCLock);
+}
+
+class AllocLockHolder
+{
+public:
+    AllocLockHolder()
+    {
+        EnterAllocLock();
+    }
+
+    ~AllocLockHolder()
+    {
+        LeaveAllocLock();
+    }
+};
+
+// An explanation of locking for finalization:
+//
+// Multiple threads allocate objects.  During the allocation, they are serialized by
+// the AllocLock above.  But they release that lock before they register the object
+// for finalization.  That's because there is much contention for the alloc lock, but
+// finalization is presumed to be a rare case.
+//
+// So registering an object for finalization must be protected by the FinalizeLock.
+//
+// There is another logical queue that involves finalization.  When objects registered
+// for finalization become unreachable, they are moved from the "registered" queue to
+// the "unreachable" queue.  Note that this only happens inside a GC, so no other
+// threads can be manipulating either queue at that time.  Once the GC is over and
+// threads are resumed, the Finalizer thread will dequeue objects from the "unreachable"
+// queue and call their finalizers.  This dequeue operation is also protected with
+// the finalize lock.
+//
+// At first, this seems unnecessary.  Only one thread is ever enqueuing or dequeuing
+// on the unreachable queue (either the GC thread during a GC or the finalizer thread
+// when a GC is not in progress).  The reason we share a lock with threads enqueuing
+// on the "registered" queue is that the "registered" and "unreachable" queues are
+// interrelated.
+//
+// They are actually two regions of a longer list, which can only grow at one end.
+// So to enqueue an object to the "registered" list, you actually rotate an unreachable
+// object at the boundary between the logical queues, out to the other end of the
+// unreachable queue -- where all growing takes place.  Then you move the boundary
+// pointer so that the gap we created at the boundary is now on the "registered"
+// side rather than the "unreachable" side.  Now the object can be placed into the
+// "registered" side at that point.  This is much more efficient than doing moves
+// of arbitrarily long regions, but it causes the two queues to require a shared lock.
+//
+// Notice that Enter/LeaveFinalizeLock is not a GC-aware spin lock.  Instead, it relies
+// on the fact that the lock will only be taken for a brief period and that it will
+// never provoke or allow a GC while the lock is held.  This is critical.  If the
+// FinalizeLock used enter_spin_lock (and thus sometimes enters preemptive mode to
+// allow a GC), then the Alloc client would have to GC protect a finalizable object
+// to protect against that eventuality.  That is too slow!
+
+
+
+BOOL IsValidObject99(uint8_t *pObject)
+{
+#ifdef VERIFY_HEAP
+    if (!((CObjectHeader*)pObject)->IsFree())
+        ((CObjectHeader *) pObject)->Validate();
+#endif //VERIFY_HEAP
+    return(TRUE);
+}
+
+#ifdef BACKGROUND_GC 
+BOOL gc_heap::bgc_mark_array_range (heap_segment* seg, 
+                                    BOOL whole_seg_p,
+                                    uint8_t** range_beg,
+                                    uint8_t** range_end)
+{
+    uint8_t* seg_start = heap_segment_mem (seg);
+    uint8_t* seg_end = (whole_seg_p ? heap_segment_reserved (seg) : align_on_mark_word (heap_segment_allocated (seg)));
+
+    if ((seg_start < background_saved_highest_address) &&
+        (seg_end > background_saved_lowest_address))
+    {
+        *range_beg = max (seg_start, background_saved_lowest_address);
+        *range_end = min (seg_end, background_saved_highest_address);
+        return TRUE;
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+
+void gc_heap::bgc_verify_mark_array_cleared (heap_segment* seg)
+{
+#if defined (VERIFY_HEAP) && defined (MARK_ARRAY)
+    if (recursive_gc_sync::background_running_p() && g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        uint8_t* range_beg = 0;
+        uint8_t* range_end = 0;
+
+        if (bgc_mark_array_range (seg, TRUE, &range_beg, &range_end))
+        {
+            size_t  markw = mark_word_of (range_beg);
+            size_t  markw_end = mark_word_of (range_end);
+            while (markw < markw_end)
+            {
+                if (mark_array [markw])
+                {
+                    dprintf  (3, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                                    markw, mark_array [markw], mark_word_address (markw)));
+                    FATAL_GC_ERROR();
+                }
+                markw++;
+            }
+            uint8_t* p = mark_word_address (markw_end);
+            while (p < range_end)
+            {
+                assert (!(mark_array_marked (p)));
+                p++;
+            }
+        }
+    }
+#endif //VERIFY_HEAP && MARK_ARRAY
+}
+
+void gc_heap::verify_mark_bits_cleared (uint8_t* obj, size_t s)
+{
+#if defined (VERIFY_HEAP) && defined (MARK_ARRAY)
+    size_t start_mark_bit = mark_bit_of (obj) + 1;
+    size_t end_mark_bit = mark_bit_of (obj + s);
+    unsigned int startbit = mark_bit_bit (start_mark_bit);
+    unsigned int endbit = mark_bit_bit (end_mark_bit);
+    size_t startwrd = mark_bit_word (start_mark_bit);
+    size_t endwrd = mark_bit_word (end_mark_bit);
+    unsigned int result = 0;
+
+    unsigned int firstwrd = ~(lowbits (~0, startbit));
+    unsigned int lastwrd = ~(highbits (~0, endbit));
+
+    if (startwrd == endwrd)
+    {
+        unsigned int wrd = firstwrd & lastwrd;
+        result = mark_array[startwrd] & wrd;
+        if (result)
+        {
+            FATAL_GC_ERROR();
+        }
+        return;
+    }
+
+    // verify the first mark word is cleared.
+    if (startbit)
+    {
+        result = mark_array[startwrd] & firstwrd;
+        if (result)
+        {
+            FATAL_GC_ERROR();
+        }
+        startwrd++;
+    }
+
+    for (size_t wrdtmp = startwrd; wrdtmp < endwrd; wrdtmp++)
+    {
+        result = mark_array[wrdtmp];
+        if (result)
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+
+    // set the last mark word.
+    if (endbit)
+    {
+        result = mark_array[endwrd] & lastwrd;
+        if (result)
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+#endif //VERIFY_HEAP && MARK_ARRAY
+}
+
+void gc_heap::clear_all_mark_array()
+{
+#ifdef MARK_ARRAY
+    //size_t num_dwords_written = 0;
+    //size_t begin_time = GetHighPrecisionTimeStamp();
+
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+    
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (gen != large_object_generation)
+            {
+                gen = generation_of (max_generation+1);
+                seg = heap_segment_rw (generation_start_segment (gen));
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        uint8_t* range_beg = 0;
+        uint8_t* range_end = 0;
+
+        if (bgc_mark_array_range (seg, (seg == ephemeral_heap_segment), &range_beg, &range_end))
+        { 
+            size_t markw = mark_word_of (range_beg);
+            size_t markw_end = mark_word_of (range_end);
+            size_t size_total = (markw_end - markw) * sizeof (uint32_t);
+            //num_dwords_written = markw_end - markw;
+            size_t size = 0;
+            size_t size_left = 0;
+
+            assert (((size_t)&mark_array[markw] & (sizeof(PTR_PTR)-1)) == 0);
+
+            if ((size_total & (sizeof(PTR_PTR) - 1)) != 0)
+            {
+                size = (size_total & ~(sizeof(PTR_PTR) - 1));
+                size_left = size_total - size;
+                assert ((size_left & (sizeof (uint32_t) - 1)) == 0);
+            }
+            else
+            {
+                size = size_total;
+            }
+
+            memclr ((uint8_t*)&mark_array[markw], size);
+
+            if (size_left != 0)
+            {
+                uint32_t* markw_to_clear = &mark_array[markw + size / sizeof (uint32_t)];
+                for (size_t i = 0; i < (size_left / sizeof (uint32_t)); i++)
+                {
+                    *markw_to_clear = 0;
+                    markw_to_clear++;
+                }
+            }
+        }
+
+        seg = heap_segment_next_rw (seg);
+    }
+
+    //size_t end_time = GetHighPrecisionTimeStamp() - begin_time; 
+
+    //printf ("took %Id ms to clear %Id bytes\n", end_time, num_dwords_written*sizeof(uint32_t));
+
+#endif //MARK_ARRAY
+}
+
+#endif //BACKGROUND_GC 
+
+void gc_heap::verify_mark_array_cleared (heap_segment* seg)
+{
+#if defined (VERIFY_HEAP) && defined (MARK_ARRAY)
+    assert (card_table == g_card_table);
+    size_t  markw = mark_word_of (heap_segment_mem (seg));
+    size_t  markw_end = mark_word_of (heap_segment_reserved (seg));
+
+    while (markw < markw_end)
+    {
+        if (mark_array [markw])
+        {
+            dprintf  (3, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                            markw, mark_array [markw], mark_word_address (markw)));
+            FATAL_GC_ERROR();
+        }
+        markw++;
+    }
+#endif //VERIFY_HEAP && MARK_ARRAY
+}
+
+void gc_heap::verify_mark_array_cleared ()
+{
+#if defined (VERIFY_HEAP) && defined (MARK_ARRAY)
+    if (recursive_gc_sync::background_running_p() && g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        generation* gen = generation_of (max_generation);
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+        
+        while (1)
+        {
+            if (seg == 0)
+            {
+                if (gen != large_object_generation)
+                {
+                    gen = generation_of (max_generation+1);
+                    seg = heap_segment_rw (generation_start_segment (gen));
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            bgc_verify_mark_array_cleared (seg);
+            seg = heap_segment_next_rw (seg);
+        }
+    }
+#endif //VERIFY_HEAP && MARK_ARRAY
+}
+
+void gc_heap::verify_seg_end_mark_array_cleared()
+{
+#if defined (VERIFY_HEAP) && defined (MARK_ARRAY)
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        generation* gen = generation_of (max_generation);
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+        
+        while (1)
+        {
+            if (seg == 0)
+            {
+                if (gen != large_object_generation)
+                {
+                    gen = generation_of (max_generation+1);
+                    seg = heap_segment_rw (generation_start_segment (gen));
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            // We already cleared all mark array bits for ephemeral generations
+            // at the beginning of bgc sweep
+            uint8_t* from = ((seg == ephemeral_heap_segment) ?
+                          generation_allocation_start (generation_of (max_generation - 1)) :
+                          heap_segment_allocated (seg));
+            size_t  markw = mark_word_of (align_on_mark_word (from));
+            size_t  markw_end = mark_word_of (heap_segment_reserved (seg));
+
+            while (from < mark_word_address (markw))
+            {
+                if (is_mark_bit_set (from))
+                {
+                    dprintf (3, ("mark bit for %Ix was not cleared", from));
+                    FATAL_GC_ERROR();
+                }
+
+                from += mark_bit_pitch;
+            }
+
+            while (markw < markw_end)
+            {
+                if (mark_array [markw])
+                {
+                    dprintf  (3, ("The mark bits at 0x%Ix:0x%Ix(addr: 0x%Ix) were not cleared", 
+                                    markw, mark_array [markw], mark_word_address (markw)));
+                    FATAL_GC_ERROR();
+                }
+                markw++;
+            }
+            seg = heap_segment_next_rw (seg);
+        }
+    }
+#endif //VERIFY_HEAP && MARK_ARRAY
+}
+
+// This function is called to make sure we don't mess up the segment list
+// in SOH. It's called by:
+// 1) begin and end of ephemeral GCs
+// 2) during bgc sweep when we switch segments.
+void gc_heap::verify_soh_segment_list()
+{
+#ifdef VERIFY_HEAP
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_GC)
+    {
+        generation* gen = generation_of (max_generation);
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+        heap_segment* last_seg = 0;
+        while (seg)
+        {
+            last_seg = seg;
+            seg = heap_segment_next_rw (seg);
+        }
+        if (last_seg != ephemeral_heap_segment)
+        {
+            FATAL_GC_ERROR();
+        }
+    }
+#endif //VERIFY_HEAP
+}
+
+// This function can be called at any foreground GCs or blocking GCs. For background GCs,
+// it can be called at the end of the final marking; and at any point during background
+// sweep.
+// NOTE - to be able to call this function during background sweep, we need to temporarily 
+// NOT clear the mark array bits as we go.
+void gc_heap::verify_partial ()
+{
+#ifdef BACKGROUND_GC
+    //printf ("GC#%d: Verifying loh during sweep\n", settings.gc_index);
+    //generation* gen = large_object_generation;
+    generation* gen = generation_of (max_generation);
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+    int align_const = get_alignment_constant (gen != large_object_generation);
+
+    uint8_t* o = 0;
+    uint8_t* end = 0;
+    size_t s = 0;
+
+    // Different ways to fail.
+    BOOL mark_missed_p = FALSE;
+    BOOL bad_ref_p = FALSE;
+    BOOL free_ref_p = FALSE;
+
+    while (1)
+    {
+        if (seg == 0)
+        {
+            if (gen != large_object_generation)
+            {
+                //switch to LOH
+                gen = large_object_generation;
+                align_const = get_alignment_constant (gen != large_object_generation);
+                seg = heap_segment_rw (generation_start_segment (gen));
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        o = heap_segment_mem (seg);
+        end  = heap_segment_allocated (seg);
+        //printf ("validating [%Ix-[%Ix\n", o, end);
+        while (o < end)
+        {
+            s = size (o);
+
+            BOOL marked_p = background_object_marked (o, FALSE);
+
+            if (marked_p)
+            {
+                go_through_object_cl (method_table (o), o, s, oo,
+                    {
+                        if (*oo)
+                        {
+                            //dprintf (3, ("VOM: verifying member %Ix in obj %Ix", (size_t)*oo, o));
+                            MethodTable *pMT = method_table (*oo);
+
+                            if (pMT == g_pFreeObjectMethodTable)
+                            {
+                                free_ref_p = TRUE;
+                                FATAL_GC_ERROR();
+                            }
+
+                            if (!pMT->SanityCheck()) 
+                            {
+                                bad_ref_p = TRUE;
+                                dprintf (3, ("Bad member of %Ix %Ix",
+                                            (size_t)oo, (size_t)*oo));
+                                FATAL_GC_ERROR();
+                            }
+
+                            if (current_bgc_state == bgc_final_marking)
+                            {
+                                if (marked_p && !background_object_marked (*oo, FALSE))
+                                {
+                                    mark_missed_p = TRUE;
+                                    FATAL_GC_ERROR();
+                                }
+                            }
+                        }
+                    }
+                                    );
+            }
+
+            o = o + Align(s, align_const);
+        }
+        seg = heap_segment_next_rw (seg);
+    }
+
+    //printf ("didn't find any large object large enough...\n");
+    //printf ("finished verifying loh\n");
+#endif //BACKGROUND_GC 
+}
+
+#ifdef VERIFY_HEAP
+
+void 
+gc_heap::verify_free_lists ()
+{
+    for (int gen_num = 0; gen_num <= max_generation+1; gen_num++)
+    {
+        dprintf (3, ("Verifying free list for gen:%d", gen_num));
+        allocator* gen_alloc = generation_allocator (generation_of (gen_num));
+        size_t sz = gen_alloc->first_bucket_size();
+        bool verify_undo_slot = (gen_num != 0) && (gen_num != max_generation+1) && !gen_alloc->discard_if_no_fit_p();
+
+        for (unsigned int a_l_number = 0; a_l_number < gen_alloc->number_of_buckets(); a_l_number++)
+        {
+            uint8_t* free_list = gen_alloc->alloc_list_head_of (a_l_number);
+            uint8_t* prev = 0;
+            while (free_list)
+            {
+                if (!((CObjectHeader*)free_list)->IsFree())
+                {
+                    dprintf (3, ("Verifiying Heap: curr free list item %Ix isn't a free object)",
+                                 (size_t)free_list));
+                    FATAL_GC_ERROR();
+                }
+                if (((a_l_number < (gen_alloc->number_of_buckets()-1))&& (unused_array_size (free_list) >= sz))
+                    || ((a_l_number != 0) && (unused_array_size (free_list) < sz/2)))
+                {
+                    dprintf (3, ("Verifiying Heap: curr free list item %Ix isn't in the right bucket",
+                                 (size_t)free_list));
+                    FATAL_GC_ERROR();
+                }
+                if (verify_undo_slot && (free_list_undo (free_list) != UNDO_EMPTY))
+                {
+                    dprintf (3, ("Verifiying Heap: curr free list item %Ix has non empty undo slot",
+                                 (size_t)free_list));
+                    FATAL_GC_ERROR();
+                }
+                if ((gen_num != max_generation+1)&&(object_gennum (free_list)!= gen_num))
+                {
+                    dprintf (3, ("Verifiying Heap: curr free list item %Ix is in the wrong generation free list",
+                                 (size_t)free_list));
+                    FATAL_GC_ERROR();
+                }
+                    
+                prev = free_list;
+                free_list = free_list_slot (free_list);
+            }
+            //verify the sanity of the tail 
+            uint8_t* tail = gen_alloc->alloc_list_tail_of (a_l_number);
+            if (!((tail == 0) || (tail == prev)))
+            {
+                dprintf (3, ("Verifying Heap: tail of free list is not correct"));
+                FATAL_GC_ERROR();
+            }
+            if (tail == 0)
+            {
+                uint8_t* head = gen_alloc->alloc_list_head_of (a_l_number);
+                if ((head != 0) && (free_list_slot (head) != 0))
+                {
+                    dprintf (3, ("Verifying Heap: tail of free list is not correct"));
+                    FATAL_GC_ERROR();
+                }
+            }
+
+            sz *=2;
+        }
+    }
+}
+
+void
+gc_heap::verify_heap (BOOL begin_gc_p)
+{
+    int             heap_verify_level = g_pConfig->GetHeapVerifyLevel();
+    size_t          last_valid_brick = 0;
+    BOOL            bCurrentBrickInvalid = FALSE;
+    BOOL            large_brick_p = TRUE;
+    size_t          curr_brick = 0;
+    size_t          prev_brick = (size_t)-1;
+    int             curr_gen_num = max_generation+1;    
+    heap_segment*   seg = heap_segment_in_range (generation_start_segment (generation_of (curr_gen_num ) ));
+
+    PREFIX_ASSUME(seg != NULL);
+
+    uint8_t*        curr_object = heap_segment_mem (seg);
+    uint8_t*        prev_object = 0;
+    uint8_t*        begin_youngest = generation_allocation_start(generation_of(0));
+    uint8_t*        end_youngest = heap_segment_allocated (ephemeral_heap_segment);
+    uint8_t*        next_boundary = generation_allocation_start (generation_of (max_generation - 1));
+    int             align_const = get_alignment_constant (FALSE);
+    size_t          total_objects_verified = 0;
+    size_t          total_objects_verified_deep = 0;
+
+#ifdef BACKGROUND_GC
+    BOOL consider_bgc_mark_p    = FALSE;
+    BOOL check_current_sweep_p  = FALSE;
+    BOOL check_saved_sweep_p    = FALSE;
+    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+
+#ifdef MULTIPLE_HEAPS
+    t_join* current_join = &gc_t_join;
+#ifdef BACKGROUND_GC
+    if (settings.concurrent && (bgc_thread_id.IsCurrentThread()))
+    {
+        // We always call verify_heap on entry of GC on the SVR GC threads.
+        current_join = &bgc_t_join;
+    }
+#endif //BACKGROUND_GC
+#endif //MULTIPLE_HEAPS
+
+    UNREFERENCED_PARAMETER(begin_gc_p);
+#ifdef BACKGROUND_GC 
+    dprintf (2,("[%s]GC#%d(%s): Verifying heap - begin", 
+        (begin_gc_p ? "BEG" : "END"),
+        VolatileLoad(&settings.gc_index), 
+        (settings.concurrent ? "BGC" : (recursive_gc_sync::background_running_p() ? "FGC" : "NGC"))));
+#else
+    dprintf (2,("[%s]GC#%d: Verifying heap - begin", 
+                (begin_gc_p ? "BEG" : "END"), VolatileLoad(&settings.gc_index)));
+#endif //BACKGROUND_GC 
+
+#ifndef MULTIPLE_HEAPS
+    if ((g_ephemeral_low != generation_allocation_start (generation_of (max_generation - 1))) ||
+        (g_ephemeral_high != heap_segment_reserved (ephemeral_heap_segment)))
+    {
+        FATAL_GC_ERROR();
+    }
+#endif //MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+    //don't touch the memory because the program is allocating from it.
+    if (!settings.concurrent)
+#endif //BACKGROUND_GC
+    {
+        if (!(heap_verify_level & EEConfig::HEAPVERIFY_NO_MEM_FILL))
+        {
+            //uninit the unused portions of segments.
+            generation* gen1 = large_object_generation;
+            heap_segment* seg1 = heap_segment_rw (generation_start_segment (gen1));
+            PREFIX_ASSUME(seg1 != NULL);
+
+            while (1)
+            {
+                if (seg1)
+                {
+                    uint8_t* clear_start = heap_segment_allocated (seg1) - plug_skew;
+                    if (heap_segment_used (seg1) > clear_start)
+                    {
+                        dprintf (3, ("setting end of seg %Ix: [%Ix-[%Ix to 0xaa", 
+                                    heap_segment_mem (seg1),
+                                    clear_start ,
+                                    heap_segment_used (seg1)));
+                        memset (heap_segment_allocated (seg1) - plug_skew, 0xaa,
+                            (heap_segment_used (seg1) - clear_start));
+                    }
+                    seg1 = heap_segment_next_rw (seg1);
+                }
+                else
+                {
+                    if (gen1 == large_object_generation)
+                    {
+                        gen1 = generation_of (max_generation);
+                        seg1 = heap_segment_rw (generation_start_segment (gen1));
+                        PREFIX_ASSUME(seg1 != NULL);
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+#ifdef MULTIPLE_HEAPS
+    current_join->join(this, gc_join_verify_copy_table);
+    if (current_join->joined())
+    {
+        // in concurrent GC, new segment could be allocated when GC is working so the card brick table might not be updated at this point
+        for (int i = 0; i < n_heaps; i++)
+        {
+            //copy the card and brick tables
+            if (g_card_table != g_heaps[i]->card_table)
+            {
+                g_heaps[i]->copy_brick_card_table();
+            }
+        }
+
+        current_join->restart();
+    }
+#else
+        if (g_card_table != card_table)
+            copy_brick_card_table();
+#endif //MULTIPLE_HEAPS
+
+    //verify that the generation structures makes sense
+    {
+        generation* gen = generation_of (max_generation);
+
+        assert (generation_allocation_start (gen) ==
+                heap_segment_mem (heap_segment_rw (generation_start_segment (gen))));
+        int gen_num = max_generation-1;
+        generation* prev_gen = gen;
+        while (gen_num >= 0)
+        {
+            gen = generation_of (gen_num);
+            assert (generation_allocation_segment (gen) == ephemeral_heap_segment);
+            assert (generation_allocation_start (gen) >= heap_segment_mem (ephemeral_heap_segment));
+            assert (generation_allocation_start (gen) < heap_segment_allocated (ephemeral_heap_segment));
+
+            if (generation_start_segment (prev_gen ) ==
+                generation_start_segment (gen))
+            {
+                assert (generation_allocation_start (prev_gen) <
+                        generation_allocation_start (gen));
+            }
+            prev_gen = gen;
+            gen_num--;
+        }
+    }
+
+    while (1)
+    {
+        // Handle segment transitions
+        if (curr_object >= heap_segment_allocated (seg))
+        {
+            if (curr_object > heap_segment_allocated(seg))
+            {
+                dprintf (3, ("Verifiying Heap: curr_object: %Ix > heap_segment_allocated (seg: %Ix)",
+                        (size_t)curr_object, (size_t)seg));
+                FATAL_GC_ERROR();
+            }
+            seg = heap_segment_next_in_range (seg);
+            if (seg)
+            {
+#ifdef BACKGROUND_GC
+                should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+                curr_object = heap_segment_mem(seg);
+                prev_object = 0;
+                continue;
+            }
+            else
+            {
+                if (curr_gen_num == (max_generation+1))
+                {
+                    curr_gen_num--;
+                    seg = heap_segment_in_range (generation_start_segment (generation_of (curr_gen_num)));
+
+                    PREFIX_ASSUME(seg != NULL);
+
+#ifdef BACKGROUND_GC
+                    should_check_bgc_mark (seg, &consider_bgc_mark_p, &check_current_sweep_p, &check_saved_sweep_p);
+#endif //BACKGROUND_GC
+                    curr_object = heap_segment_mem (seg);
+                    prev_object = 0;
+                    large_brick_p = FALSE;
+                    align_const = get_alignment_constant (TRUE);
+                }
+                else
+                    break;  // Done Verifying Heap -- no more segments
+            }
+        }
+
+        // Are we at the end of the youngest_generation?
+        if (seg == ephemeral_heap_segment)
+        {
+            if (curr_object >= end_youngest)
+            {
+                // prev_object length is too long if we hit this int3
+                if (curr_object > end_youngest)
+                {
+                    dprintf (3, ("Verifiying Heap: curr_object: %Ix > end_youngest: %Ix",
+                            (size_t)curr_object, (size_t)end_youngest));
+                    FATAL_GC_ERROR();
+                }
+                break;
+            }
+            
+            if ((curr_object >= next_boundary) && (curr_gen_num > 0))
+            {
+                curr_gen_num--;
+                if (curr_gen_num > 0)
+                {
+                    next_boundary = generation_allocation_start (generation_of (curr_gen_num - 1));
+                }
+            }
+        }
+
+         //if (is_mark_set (curr_object))
+         //{
+         //        printf ("curr_object: %Ix is marked!",(size_t)curr_object);
+         //        FATAL_GC_ERROR();
+         //}
+
+        size_t s = size (curr_object);
+        dprintf (3, ("o: %Ix, s: %d", (size_t)curr_object, s));
+        if (s == 0)
+        {
+            dprintf (3, ("Verifying Heap: size of current object %Ix == 0", curr_object));
+            FATAL_GC_ERROR();
+        }
+
+        // If object is not in the youngest generation, then lets
+        // verify that the brick table is correct....
+        if (((seg != ephemeral_heap_segment) ||
+             (brick_of(curr_object) < brick_of(begin_youngest))))
+        {
+            curr_brick = brick_of(curr_object);
+
+            // Brick Table Verification...
+            //
+            // On brick transition
+            //     if brick is negative
+            //          verify that brick indirects to previous valid brick
+            //     else
+            //          set current brick invalid flag to be flipped if we
+            //          encounter an object at the correct place
+            //
+            if (curr_brick != prev_brick)
+            {
+                // If the last brick we were examining had positive
+                // entry but we never found the matching object, then
+                // we have a problem
+                // If prev_brick was the last one of the segment
+                // it's ok for it to be invalid because it is never looked at
+                if (bCurrentBrickInvalid &&
+                    (curr_brick != brick_of (heap_segment_mem (seg))) &&
+                    !heap_segment_read_only_p (seg))
+                {
+                    dprintf (3, ("curr brick %Ix invalid", curr_brick));
+                    FATAL_GC_ERROR();
+                }
+
+                if (large_brick_p)
+                {
+                    //large objects verify the table only if they are in
+                    //range.
+                    if ((heap_segment_reserved (seg) <= highest_address) &&
+                        (heap_segment_mem (seg) >= lowest_address) &&
+                        brick_table [curr_brick] != 0)
+                    {
+                        dprintf (3, ("curr_brick %Ix for large object %Ix not set to -32768",
+                                curr_brick, (size_t)curr_object));
+                        FATAL_GC_ERROR();
+                    }
+                    else
+                    {
+                        bCurrentBrickInvalid = FALSE;
+                    }
+                }
+                else
+                {
+                    // If the current brick contains a negative value make sure
+                    // that the indirection terminates at the last  valid brick
+                    if (brick_table [curr_brick] <= 0)
+                    {
+                        if (brick_table [curr_brick] == 0)
+                        {
+                            dprintf(3, ("curr_brick %Ix for object %Ix set to 0",
+                                    curr_brick, (size_t)curr_object));
+                            FATAL_GC_ERROR();
+                        }
+                        ptrdiff_t i = curr_brick;
+                        while ((i >= ((ptrdiff_t) brick_of (heap_segment_mem (seg)))) &&
+                               (brick_table[i] < 0))
+                        {
+                            i = i + brick_table[i];
+                        }
+                        if (i <  ((ptrdiff_t)(brick_of (heap_segment_mem (seg))) - 1))
+                        {
+                            dprintf (3, ("ptrdiff i: %Ix < brick_of (heap_segment_mem (seg)):%Ix - 1. curr_brick: %Ix",
+                                    i, brick_of (heap_segment_mem (seg)),
+                                    curr_brick));
+                            FATAL_GC_ERROR();
+                        }
+                        // if (i != last_valid_brick)
+                        //  FATAL_GC_ERROR();
+                        bCurrentBrickInvalid = FALSE;
+                    }
+                    else if (!heap_segment_read_only_p (seg))
+                    {
+                        bCurrentBrickInvalid = TRUE;
+                    }
+                }
+            }
+
+            if (bCurrentBrickInvalid)
+            {
+                if (curr_object == (brick_address(curr_brick) + brick_table[curr_brick] - 1))
+                {
+                    bCurrentBrickInvalid = FALSE;
+                    last_valid_brick = curr_brick;
+                }
+            }
+        }
+
+        if (*((uint8_t**)curr_object) != (uint8_t *) g_pFreeObjectMethodTable)
+        {
+#ifdef FEATURE_LOH_COMPACTION
+            if ((curr_gen_num == (max_generation+1)) && (prev_object != 0))
+            {
+                assert (method_table (prev_object) == g_pFreeObjectMethodTable);
+            }
+#endif //FEATURE_LOH_COMPACTION
+
+            total_objects_verified++;
+
+            BOOL can_verify_deep = TRUE;
+#ifdef BACKGROUND_GC
+            can_verify_deep = fgc_should_consider_object (curr_object, seg, consider_bgc_mark_p, check_current_sweep_p, check_saved_sweep_p);
+#endif //BACKGROUND_GC
+
+            BOOL deep_verify_obj = can_verify_deep;
+            if ((heap_verify_level & EEConfig::HEAPVERIFY_DEEP_ON_COMPACT) && !settings.compaction)
+                deep_verify_obj = FALSE;
+
+            ((CObjectHeader*)curr_object)->ValidateHeap((Object*)curr_object, deep_verify_obj);
+
+            if (can_verify_deep)
+            {
+                if (curr_gen_num > 0)
+                {
+                    BOOL need_card_p = FALSE;
+                    if (contain_pointers_or_collectible (curr_object))
+                    {
+                        dprintf (4, ("curr_object: %Ix", (size_t)curr_object));
+                        size_t crd = card_of (curr_object);
+                        BOOL found_card_p = card_set_p (crd);
+
+#ifdef COLLECTIBLE_CLASS
+                        if (is_collectible(curr_object))
+                        {
+                            uint8_t* class_obj = get_class_object (curr_object);
+                            if ((class_obj < ephemeral_high) && (class_obj >= next_boundary))
+                            {
+                                if (!found_card_p)
+                                {
+                                    dprintf (3, ("Card not set, curr_object = [%Ix:%Ix pointing to class object %Ix",
+                                                card_of (curr_object), (size_t)curr_object, class_obj));
+
+                                    FATAL_GC_ERROR();
+                                }
+                            }
+                        }
+#endif //COLLECTIBLE_CLASS
+
+                        if (contain_pointers(curr_object))
+                        {
+                            go_through_object_nostart
+                                (method_table(curr_object), curr_object, s, oo,
+                                {
+                                    if ((crd != card_of ((uint8_t*)oo)) && !found_card_p)
+                                    {
+                                        crd = card_of ((uint8_t*)oo);
+                                        found_card_p = card_set_p (crd);
+                                        need_card_p = FALSE;
+                                    }
+                                    if ((*oo < ephemeral_high) && (*oo >= next_boundary))
+                                    {
+                                        need_card_p = TRUE;
+                                    }
+
+                                if (need_card_p && !found_card_p)
+                                {
+
+                                        dprintf (3, ("Card not set, curr_object = [%Ix:%Ix, %Ix:%Ix[",
+                                                    card_of (curr_object), (size_t)curr_object,
+                                                    card_of (curr_object+Align(s, align_const)), (size_t)curr_object+Align(s, align_const)));
+                                        FATAL_GC_ERROR();
+                                    }
+                                }
+                                    );
+                        }
+                        if (need_card_p && !found_card_p)
+                        {
+                            dprintf (3, ("Card not set, curr_object = [%Ix:%Ix, %Ix:%Ix[",
+                                    card_of (curr_object), (size_t)curr_object,
+                                    card_of (curr_object+Align(s, align_const)), (size_t)curr_object+Align(s, align_const)));
+                            FATAL_GC_ERROR();
+                        }
+                    }
+                }
+                total_objects_verified_deep++;
+            }
+        }
+
+        prev_object = curr_object;
+        prev_brick = curr_brick;
+        curr_object = curr_object + Align(s, align_const);
+        if (curr_object < prev_object)
+        {
+            dprintf (3, ("overflow because of a bad object size: %Ix size %Ix", prev_object, s));
+            FATAL_GC_ERROR();
+        }
+    }
+
+#ifdef BACKGROUND_GC
+    dprintf (2, ("(%s)(%s)(%s) total_objects_verified is %Id, total_objects_verified_deep is %Id", 
+                 (settings.concurrent ? "BGC" : (recursive_gc_sync::background_running_p () ? "FGC" : "NGC")),
+                 (begin_gc_p ? "BEG" : "END"),
+                 ((current_c_gc_state == c_gc_state_planning) ? "in plan" : "not in plan"),
+                 total_objects_verified, total_objects_verified_deep));
+    if (current_c_gc_state != c_gc_state_planning)
+    {
+        assert (total_objects_verified == total_objects_verified_deep);
+    }
+#endif //BACKGROUND_GC
+    
+    verify_free_lists();
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    finalize_queue->CheckFinalizerObjects();
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+    {
+        // to be consistent with handle table APIs pass a ScanContext*
+        // to provide the heap number.  the SC isn't complete though so
+        // limit its scope to handle table verification.
+        ScanContext sc;
+        sc.thread_number = heap_number;
+        GCScan::VerifyHandleTable(max_generation, max_generation, &sc);
+    }
+
+#ifdef MULTIPLE_HEAPS
+    current_join->join(this, gc_join_verify_objects_done);
+    if (current_join->joined())
+#endif //MULTIPLE_HEAPS
+    {
+        SyncBlockCache::GetSyncBlockCache()->VerifySyncTableEntry();
+#ifdef MULTIPLE_HEAPS
+        current_join->restart();
+#endif //MULTIPLE_HEAPS
+    }
+
+#ifdef BACKGROUND_GC 
+    if (!settings.concurrent)
+    {
+        if (current_c_gc_state == c_gc_state_planning)
+        {
+            // temporarily commenting this out 'cause an FGC
+            // could be triggered before we sweep ephemeral.
+            //verify_seg_end_mark_array_cleared();
+        }
+    }
+
+    if (settings.concurrent)
+    {
+        verify_mark_array_cleared();
+    }
+    dprintf (2,("GC%d(%s): Verifying heap - end", 
+        VolatileLoad(&settings.gc_index), 
+        (settings.concurrent ? "BGC" : (recursive_gc_sync::background_running_p() ? "FGC" : "NGC"))));
+#else
+    dprintf (2,("GC#d: Verifying heap - end", VolatileLoad(&settings.gc_index)));
+#endif //BACKGROUND_GC 
+}
+
+void GCHeap::ValidateObjectMember (Object* obj)
+{
+    size_t s = size (obj);
+    uint8_t* o = (uint8_t*)obj;
+
+    go_through_object_cl (method_table (obj), o, s, oo,
+                                {
+                                    uint8_t* child_o = *oo;
+                                    if (child_o)
+                                    {
+                                        dprintf (3, ("VOM: m: %Ix obj %Ix", (size_t)child_o, o));
+                                        MethodTable *pMT = method_table (child_o);
+                                        if (!pMT->SanityCheck()) {
+                                            dprintf (3, ("Bad member of %Ix %Ix",
+                                                        (size_t)oo, (size_t)child_o));
+                                            FATAL_GC_ERROR();
+                                        }
+                                    }
+                                } );
+
+}
+#endif  //VERIFY_HEAP
+
+void DestructObject (CObjectHeader* hdr)
+{
+    UNREFERENCED_PARAMETER(hdr); // compiler bug? -- this *is*, indeed, referenced
+    hdr->~CObjectHeader();
+}
+
+HRESULT GCHeap::Shutdown ()
+{
+    deleteGCShadow();
+
+    GCScan::GcRuntimeStructuresValid (FALSE);
+
+    // Cannot assert this, since we use SuspendEE as the mechanism to quiesce all
+    // threads except the one performing the shutdown.
+    // ASSERT( !GcInProgress );
+
+    // Guard against any more GC occurring and against any threads blocking
+    // for GC to complete when the GC heap is gone.  This fixes a race condition
+    // where a thread in GC is destroyed as part of process destruction and
+    // the remaining threads block for GC complete.
+
+    //GCTODO
+    //EnterAllocLock();
+    //Enter();
+    //EnterFinalizeLock();
+    //SetGCDone();
+
+    // during shutdown lot of threads are suspended
+    // on this even, we don't want to wake them up just yet
+    //CloseHandle (WaitForGCEvent);
+
+    //find out if the global card table hasn't been used yet
+    uint32_t* ct = &g_card_table[card_word (gcard_of (g_lowest_address))];
+    if (card_table_refcount (ct) == 0)
+    {
+        destroy_card_table (ct);
+        g_card_table = 0;
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+        SoftwareWriteWatch::StaticClose();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    }
+
+    //destroy all segments on the standby list
+    while(gc_heap::segment_standby_list != 0)
+    {
+        heap_segment* next_seg = heap_segment_next (gc_heap::segment_standby_list);
+#ifdef MULTIPLE_HEAPS
+        (gc_heap::g_heaps[0])->delete_heap_segment (gc_heap::segment_standby_list, FALSE);
+#else //MULTIPLE_HEAPS
+        pGenGCHeap->delete_heap_segment (gc_heap::segment_standby_list, FALSE);
+#endif //MULTIPLE_HEAPS
+        gc_heap::segment_standby_list = next_seg;
+    }
+
+
+#ifdef MULTIPLE_HEAPS
+
+    for (int i = 0; i < gc_heap::n_heaps; i ++)
+    {
+        delete gc_heap::g_heaps[i]->vm_heap;
+        //destroy pure GC stuff
+        gc_heap::destroy_gc_heap (gc_heap::g_heaps[i]);
+    }
+#else
+    gc_heap::destroy_gc_heap (pGenGCHeap);
+
+#endif //MULTIPLE_HEAPS
+    gc_heap::shutdown_gc();
+
+    return S_OK;
+}
+
+// Wait until a garbage collection is complete
+// returns NOERROR if wait was OK, other error code if failure.
+// WARNING: This will not undo the must complete state. If you are
+// in a must complete when you call this, you'd better know what you're
+// doing.
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+static
+HRESULT AllocateCFinalize(CFinalize **pCFinalize)
+{
+    *pCFinalize = new (nothrow) CFinalize();
+    if (*pCFinalize == NULL || !(*pCFinalize)->Initialize())
+        return E_OUTOFMEMORY;
+
+    return S_OK;
+}
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+// init the instance heap
+HRESULT GCHeap::Init(size_t hn)
+{
+    HRESULT hres = S_OK;
+
+    //Initialize all of the instance members.
+
+#ifdef MULTIPLE_HEAPS
+    m_GCLock                = -1;
+#endif //MULTIPLE_HEAPS
+
+    // Rest of the initialization
+
+#ifdef MULTIPLE_HEAPS
+    if ((pGenGCHeap = gc_heap::make_gc_heap(this, (int)hn)) == 0)
+        hres = E_OUTOFMEMORY;
+#else
+    UNREFERENCED_PARAMETER(hn);
+    if (!gc_heap::make_gc_heap())
+        hres = E_OUTOFMEMORY;
+#endif //MULTIPLE_HEAPS
+
+    // Failed.
+    return hres;
+}
+
+//System wide initialization
+HRESULT GCHeap::Initialize ()
+{
+
+    HRESULT hr = S_OK;
+
+    if (!GCToOSInterface::Initialize())
+    {
+        return E_FAIL;
+    }
+
+//Initialize the static members.
+#ifdef TRACE_GC
+    GcDuration = 0;
+    CreatedObjectCount = 0;
+#endif //TRACE_GC
+
+    size_t seg_size = get_valid_segment_size();
+    size_t large_seg_size = get_valid_segment_size(TRUE);
+    gc_heap::min_segment_size = min (seg_size, large_seg_size);
+
+#ifdef MULTIPLE_HEAPS
+    if (g_pConfig->GetGCNoAffinitize())
+        gc_heap::gc_thread_no_affinitize_p = true;
+
+    uint32_t nhp_from_config = g_pConfig->GetGCHeapCount();
+    // GetGCProcessCpuCount only returns up to 64 procs.
+    uint32_t nhp_from_process = CPUGroupInfo::CanEnableGCCPUGroups() ?
+                                CPUGroupInfo::GetNumActiveProcessors():
+                                GCToOSInterface::GetCurrentProcessCpuCount();
+
+    uint32_t nhp = ((nhp_from_config == 0) ? nhp_from_process :
+                                             (min (nhp_from_config, nhp_from_process)));
+
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*LHEAP_ALLOC*/, nhp);
+#else
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*LHEAP_ALLOC*/);
+#endif //MULTIPLE_HEAPS
+
+    if (hr != S_OK)
+        return hr;
+
+    gc_heap::total_physical_mem = GCToOSInterface::GetPhysicalMemoryLimit();
+
+    gc_heap::mem_one_percent = gc_heap::total_physical_mem / 100;
+#ifndef MULTIPLE_HEAPS
+    gc_heap::mem_one_percent /= g_SystemInfo.dwNumberOfProcessors;
+#endif //!MULTIPLE_HEAPS
+
+    // We should only use this if we are in the "many process" mode which really is only applicable
+    // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory. 
+    // For now I am using an estimate to calculate these numbers but this should really be obtained 
+    // programmatically going forward.
+    // I am assuming 47 processes using WKS GC and 3 using SVR GC.
+    // I am assuming 3 in part due to the "very high memory load" is 97%.
+    int available_mem_th = 10;
+    if (gc_heap::total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024))
+    {
+        int adjusted_available_mem_th = 3 + (int)((float)47 / (float)(g_SystemInfo.dwNumberOfProcessors));
+        available_mem_th = min (available_mem_th, adjusted_available_mem_th);
+    }
+
+    gc_heap::high_memory_load_th = 100 - available_mem_th;
+
+#if defined(BIT64) 
+    gc_heap::youngest_gen_desired_th = gc_heap::mem_one_percent;
+#endif // BIT64
+
+    WaitForGCEvent = new (nothrow) CLREvent;
+
+    if (!WaitForGCEvent)
+    {
+        return E_OUTOFMEMORY;
+    }
+
+    if (!WaitForGCEvent->CreateManualEventNoThrow(TRUE))
+    {
+        return E_FAIL;
+    }
+
+    StompWriteBarrierResize(true, false);
+
+#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
+#if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
+    if (GCStress<cfg_any>::IsEnabled())  {
+        for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
+            m_StressObjs[i] = CreateGlobalHandle(0);
+        m_CurStressObj = 0;
+    }
+#endif //STRESS_HEAP && !MULTIPLE_HEAPS
+#endif // FEATURE_REDHAWK
+
+    initGCShadow();         // If we are debugging write barriers, initialize heap shadow
+
+#ifdef MULTIPLE_HEAPS
+
+    for (unsigned i = 0; i < nhp; i++)
+    {
+        GCHeap* Hp = new (nothrow) GCHeap();
+        if (!Hp)
+            return E_OUTOFMEMORY;
+
+        if ((hr = Hp->Init (i))!= S_OK)
+        {
+            return hr;
+        }
+    }
+    // initialize numa node to heap map
+    heap_select::init_numa_node_to_heap_map(nhp);
+#else
+    hr = Init (0);
+#endif //MULTIPLE_HEAPS
+
+    if (hr == S_OK)
+    {
+        GCScan::GcRuntimeStructuresValid (TRUE);
+
+#ifdef GC_PROFILING
+        if (CORProfilerTrackGC())
+            UpdateGenerationBounds();
+#endif // GC_PROFILING
+    }
+
+    return hr;
+};
+
+////
+// GC callback functions
+BOOL GCHeap::IsPromoted(Object* object)
+{
+#ifdef _DEBUG
+    ((CObjectHeader*)object)->Validate();
+#endif //_DEBUG
+
+    uint8_t* o = (uint8_t*)object;
+
+    if (gc_heap::settings.condemned_generation == max_generation)
+    {
+#ifdef MULTIPLE_HEAPS
+        gc_heap* hp = gc_heap::g_heaps[0];
+#else
+        gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+        if (gc_heap::settings.concurrent)
+        {
+            BOOL is_marked = (!((o < hp->background_saved_highest_address) && (o >= hp->background_saved_lowest_address))||
+                            hp->background_marked (o));
+            return is_marked;
+        }
+        else
+#endif //BACKGROUND_GC
+        {
+            return (!((o < hp->highest_address) && (o >= hp->lowest_address))
+                    || hp->is_mark_set (o));
+        }
+    }
+    else
+    {
+        gc_heap* hp = gc_heap::heap_of (o);
+        return (!((o < hp->gc_high) && (o >= hp->gc_low))
+                || hp->is_mark_set (o));
+    }
+}
+
+size_t GCHeap::GetPromotedBytes(int heap_index)
+{
+#ifdef BACKGROUND_GC
+    if (gc_heap::settings.concurrent)
+    {
+        return gc_heap::bpromoted_bytes (heap_index);
+    }
+    else
+#endif //BACKGROUND_GC
+    {
+        return gc_heap::promoted_bytes (heap_index);
+    }
+}
+
+unsigned int GCHeap::WhichGeneration (Object* object)
+{
+    gc_heap* hp = gc_heap::heap_of ((uint8_t*)object);
+    unsigned int g = hp->object_gennum ((uint8_t*)object);
+    dprintf (3, ("%Ix is in gen %d", (size_t)object, g));
+    return g;
+}
+
+BOOL    GCHeap::IsEphemeral (Object* object)
+{
+    uint8_t* o = (uint8_t*)object;
+    gc_heap* hp = gc_heap::heap_of (o);
+    return hp->ephemeral_pointer_p (o);
+}
+
+#ifdef VERIFY_HEAP
+// Return NULL if can't find next object. When EE is not suspended,
+// the result is not accurate: if the input arg is in gen0, the function could 
+// return zeroed out memory as next object
+Object * GCHeap::NextObj (Object * object)
+{
+    uint8_t* o = (uint8_t*)object;
+
+#ifndef FEATURE_BASICFREEZE
+    if (!((o < g_highest_address) && (o >= g_lowest_address)))
+    {
+        return NULL;
+    }
+#endif //!FEATURE_BASICFREEZE
+
+    heap_segment * hs = gc_heap::find_segment (o, FALSE);
+    if (!hs)
+    {
+        return NULL;
+    }
+
+    BOOL large_object_p = heap_segment_loh_p (hs);
+    if (large_object_p)
+        return NULL; //could be racing with another core allocating. 
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = heap_segment_heap (hs);
+#else //MULTIPLE_HEAPS
+    gc_heap* hp = 0;
+#endif //MULTIPLE_HEAPS
+    unsigned int g = hp->object_gennum ((uint8_t*)object);
+    if ((g == 0) && hp->settings.demotion)
+        return NULL;//could be racing with another core allocating. 
+    int align_const = get_alignment_constant (!large_object_p);
+    uint8_t* nextobj = o + Align (size (o), align_const);
+    if (nextobj <= o) // either overflow or 0 sized object.
+    {
+        return NULL;
+    }
+
+    if ((nextobj < heap_segment_mem(hs)) || 
+        (nextobj >= heap_segment_allocated(hs) && hs != hp->ephemeral_heap_segment) || 
+        (nextobj >= hp->alloc_allocated))
+    {
+        return NULL;
+    }
+
+    return (Object *)nextobj;
+}
+
+#ifdef FEATURE_BASICFREEZE
+BOOL GCHeap::IsInFrozenSegment (Object * object)
+{
+    uint8_t* o = (uint8_t*)object;
+    heap_segment * hs = gc_heap::find_segment (o, FALSE);
+    //We create a frozen object for each frozen segment before the segment is inserted
+    //to segment list; during ngen, we could also create frozen objects in segments which
+    //don't belong to current GC heap.
+    //So we return true if hs is NULL. It might create a hole about detecting invalidate 
+    //object. But given all other checks present, the hole should be very small
+    return !hs || heap_segment_read_only_p (hs);
+}
+#endif //FEATURE_BASICFREEZE
+
+#endif //VERIFY_HEAP
+
+// returns TRUE if the pointer is in one of the GC heaps.
+BOOL GCHeap::IsHeapPointer (void* vpObject, BOOL small_heap_only)
+{
+    STATIC_CONTRACT_SO_TOLERANT;
+
+    // removed STATIC_CONTRACT_CAN_TAKE_LOCK here because find_segment 
+    // no longer calls CLREvent::Wait which eventually takes a lock.
+
+    uint8_t* object = (uint8_t*) vpObject;
+#ifndef FEATURE_BASICFREEZE
+    if (!((object < g_highest_address) && (object >= g_lowest_address)))
+        return FALSE;
+#endif //!FEATURE_BASICFREEZE
+
+    heap_segment * hs = gc_heap::find_segment (object, small_heap_only);
+    return !!hs;
+}
+
+#ifdef STRESS_PINNING
+static n_promote = 0;
+#endif //STRESS_PINNING
+// promote an object
+void GCHeap::Promote(Object** ppObject, ScanContext* sc, uint32_t flags)
+{
+    THREAD_NUMBER_FROM_CONTEXT;
+#ifndef MULTIPLE_HEAPS
+    const int thread = 0;
+#endif //!MULTIPLE_HEAPS
+
+    uint8_t* o = (uint8_t*)*ppObject;
+
+    if (o == 0)
+        return;
+
+#ifdef DEBUG_DestroyedHandleValue
+    // we can race with destroy handle during concurrent scan
+    if (o == (uint8_t*)DEBUG_DestroyedHandleValue)
+        return;
+#endif //DEBUG_DestroyedHandleValue
+
+    HEAP_FROM_THREAD;
+
+    gc_heap* hp = gc_heap::heap_of (o);
+
+    dprintf (3, ("Promote %Ix", (size_t)o));
+
+#ifdef INTERIOR_POINTERS
+    if (flags & GC_CALL_INTERIOR)
+    {
+        if ((o < hp->gc_low) || (o >= hp->gc_high))
+        {
+            return;
+        }
+        if ( (o = hp->find_object (o, hp->gc_low)) == 0)
+        {
+            return;
+        }
+
+    }
+#endif //INTERIOR_POINTERS
+
+#ifdef FEATURE_CONSERVATIVE_GC
+    // For conservative GC, a value on stack may point to middle of a free object.
+    // In this case, we don't need to promote the pointer.
+    if (g_pConfig->GetGCConservative()
+        && ((CObjectHeader*)o)->IsFree())
+    {
+        return;
+    }
+#endif
+
+#ifdef _DEBUG
+    ((CObjectHeader*)o)->ValidatePromote(sc, flags);
+#else 
+    UNREFERENCED_PARAMETER(sc);
+#endif //_DEBUG
+
+    if (flags & GC_CALL_PINNED)
+        hp->pin_object (o, (uint8_t**) ppObject, hp->gc_low, hp->gc_high);
+
+#ifdef STRESS_PINNING
+    if ((++n_promote % 20) == 1)
+            hp->pin_object (o, (uint8_t**) ppObject, hp->gc_low, hp->gc_high);
+#endif //STRESS_PINNING
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    size_t promoted_size_begin = hp->promoted_bytes (thread);
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+    if ((o >= hp->gc_low) && (o < hp->gc_high))
+    {
+        hpt->mark_object_simple (&o THREAD_NUMBER_ARG);
+    }
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    size_t promoted_size_end = hp->promoted_bytes (thread);
+    if (g_fEnableARM)
+    {
+        if (sc->pCurrentDomain)
+        {
+            sc->pCurrentDomain->RecordSurvivedBytes ((promoted_size_end - promoted_size_begin), thread);
+        }
+    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+    STRESS_LOG_ROOT_PROMOTE(ppObject, o, o ? header(o)->GetMethodTable() : NULL);
+}
+
+void GCHeap::Relocate (Object** ppObject, ScanContext* sc,
+                       uint32_t flags)
+{
+    UNREFERENCED_PARAMETER(sc);
+
+    uint8_t* object = (uint8_t*)(Object*)(*ppObject);
+    
+    THREAD_NUMBER_FROM_CONTEXT;
+
+    //dprintf (3, ("Relocate location %Ix\n", (size_t)ppObject));
+    dprintf (3, ("R: %Ix", (size_t)ppObject));
+    
+    if (object == 0)
+        return;
+
+    gc_heap* hp = gc_heap::heap_of (object);
+
+#ifdef _DEBUG
+    if (!(flags & GC_CALL_INTERIOR))
+    {
+        // We cannot validate this object if it's in the condemned gen because it could 
+        // be one of the objects that were overwritten by an artificial gap due to a pinned plug.
+        if (!((object >= hp->gc_low) && (object < hp->gc_high)))
+        {
+            ((CObjectHeader*)object)->Validate(FALSE);
+        }
+    }
+#endif //_DEBUG
+
+    dprintf (3, ("Relocate %Ix\n", (size_t)object));
+
+    uint8_t* pheader;
+
+    if ((flags & GC_CALL_INTERIOR) && gc_heap::settings.loh_compaction)
+    {
+        if (!((object >= hp->gc_low) && (object < hp->gc_high)))
+        {
+            return;
+        }
+
+        if (gc_heap::loh_object_p (object))
+        {
+            pheader = hp->find_object (object, 0);
+            if (pheader == 0)
+            {
+                return;
+            }
+
+            ptrdiff_t ref_offset = object - pheader;
+            hp->relocate_address(&pheader THREAD_NUMBER_ARG);
+            *ppObject = (Object*)(pheader + ref_offset);
+            return;
+        }
+    }
+
+    {
+        pheader = object;
+        hp->relocate_address(&pheader THREAD_NUMBER_ARG);
+        *ppObject = (Object*)pheader;
+    }
+
+    STRESS_LOG_ROOT_RELOCATE(ppObject, object, pheader, ((!(flags & GC_CALL_INTERIOR)) ? ((Object*)object)->GetGCSafeMethodTable() : 0));
+}
+
+/*static*/ BOOL GCHeap::IsObjectInFixedHeap(Object *pObj)
+{
+    // For now we simply look at the size of the object to determine if it in the
+    // fixed heap or not. If the bit indicating this gets set at some point
+    // we should key off that instead.
+    return size( pObj ) >= LARGE_OBJECT_SIZE;
+}
+
+#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
+#ifdef STRESS_HEAP
+
+void StressHeapDummy ();
+
+static int32_t GCStressStartCount = -1;
+static int32_t GCStressCurCount = 0;
+static int32_t GCStressStartAtJit = -1;
+
+// the maximum number of foreground GCs we'll induce during one BGC
+// (this number does not include "naturally" occuring GCs).
+static int32_t GCStressMaxFGCsPerBGC = -1;
+
+// CLRRandom implementation can produce FPU exceptions if 
+// the test/application run by CLR is enabling any FPU exceptions. 
+// We want to avoid any unexpected exception coming from stress 
+// infrastructure, so CLRRandom is not an option.
+// The code below is a replicate of CRT rand() implementation.
+// Using CRT rand() is not an option because we will interfere with the user application
+// that may also use it. 
+int StressRNG(int iMaxValue)
+{
+    static BOOL bisRandInit = FALSE;
+    static int lHoldrand = 1L;
+
+    if (!bisRandInit)
+    {
+        lHoldrand = (int)time(NULL);
+        bisRandInit = TRUE;
+    }
+    int randValue = (((lHoldrand = lHoldrand * 214013L + 2531011L) >> 16) & 0x7fff);
+    return randValue % iMaxValue;
+}
+
+// free up object so that things will move and then do a GC
+//return TRUE if GC actually happens, otherwise FALSE
+BOOL GCHeap::StressHeap(alloc_context * acontext)
+{
+    // if GC stress was dynamically disabled during this run we return FALSE
+    if (!GCStressPolicy::IsEnabled())
+        return FALSE;
+
+#ifdef _DEBUG
+    if (g_pConfig->FastGCStressLevel() && !GetThread()->StressHeapIsEnabled()) {
+        return FALSE;
+    }
+
+#endif //_DEBUG
+
+    if ((g_pConfig->GetGCStressLevel() & EEConfig::GCSTRESS_UNIQUE)
+#ifdef _DEBUG
+        || g_pConfig->FastGCStressLevel() > 1
+#endif //_DEBUG
+        ) {
+        if (!Thread::UniqueStack(&acontext)) {
+            return FALSE;
+        }
+    }
+
+#ifdef BACKGROUND_GC
+        // don't trigger a GC from the GC threads but still trigger GCs from user threads.
+        if (IsGCSpecialThread())
+        {
+            return FALSE;
+        }
+#endif //BACKGROUND_GC
+
+        if (GCStressStartAtJit == -1 || GCStressStartCount == -1)
+        {
+            GCStressStartCount = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_GCStressStart);
+            GCStressStartAtJit = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_GCStressStartAtJit);
+        }
+
+        if (GCStressMaxFGCsPerBGC == -1)
+        {
+            GCStressMaxFGCsPerBGC = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_GCStressMaxFGCsPerBGC);
+            if (g_pConfig->IsGCStressMix() && GCStressMaxFGCsPerBGC == -1)
+                GCStressMaxFGCsPerBGC = 6;
+        }
+
+#ifdef _DEBUG
+        if (g_JitCount < GCStressStartAtJit)
+            return FALSE;
+#endif //_DEBUG
+
+        // Allow programmer to skip the first N Stress GCs so that you can
+        // get to the interesting ones faster.
+        Interlocked::Increment(&GCStressCurCount);
+        if (GCStressCurCount < GCStressStartCount)
+            return FALSE;
+
+        // throttle the number of stress-induced GCs by a factor given by GCStressStep
+        if ((GCStressCurCount % g_pConfig->GetGCStressStep()) != 0)
+        {
+            return FALSE;
+        }
+
+#ifdef BACKGROUND_GC
+        if (IsConcurrentGCEnabled() && IsConcurrentGCInProgress())
+        {
+            // allow a maximum number of stress induced FGCs during one BGC
+            if (gc_stress_fgcs_in_bgc >= GCStressMaxFGCsPerBGC)
+                return FALSE;
+            ++gc_stress_fgcs_in_bgc;
+        }
+#endif // BACKGROUND_GC
+
+    if (g_pStringClass == 0)
+    {
+        // If the String class has not been loaded, dont do any stressing. This should
+        // be kept to a minimum to get as complete coverage as possible.
+        _ASSERTE(g_fEEInit);
+        return FALSE;
+    }
+
+#ifndef MULTIPLE_HEAPS
+    static int32_t OneAtATime = -1;
+
+    if (acontext == 0)
+        acontext = generation_alloc_context (pGenGCHeap->generation_of(0));
+
+    // Only bother with this if the stress level is big enough and if nobody else is
+    // doing it right now.  Note that some callers are inside the AllocLock and are
+    // guaranteed synchronized.  But others are using AllocationContexts and have no
+    // particular synchronization.
+    //
+    // For this latter case, we want a very high-speed way of limiting this to one
+    // at a time.  A secondary advantage is that we release part of our StressObjs
+    // buffer sparingly but just as effectively.
+
+    if (Interlocked::Increment(&OneAtATime) == 0 &&
+        !TrackAllocations()) // Messing with object sizes can confuse the profiler (see ICorProfilerInfo::GetObjectSize)
+    {
+        StringObject* str;
+
+        // If the current string is used up
+        if (ObjectFromHandle(m_StressObjs[m_CurStressObj]) == 0)
+        {
+            // Populate handles with strings
+            int i = m_CurStressObj;
+            while(ObjectFromHandle(m_StressObjs[i]) == 0)
+            {
+                _ASSERTE(m_StressObjs[i] != 0);
+                unsigned strLen = (LARGE_OBJECT_SIZE - 32) / sizeof(WCHAR);
+                unsigned strSize = PtrAlign(StringObject::GetSize(strLen));
+                
+                // update the cached type handle before allocating
+                SetTypeHandleOnThreadForAlloc(TypeHandle(g_pStringClass));
+                str = (StringObject*) pGenGCHeap->allocate (strSize, acontext);
+                if (str)
+                {
+                    str->SetMethodTable (g_pStringClass);
+                    str->SetStringLength (strLen);
+
+#if CHECK_APP_DOMAIN_LEAKS
+                    if (g_pConfig->AppDomainLeaks() && str->SetAppDomainNoThrow())
+                    {
+#endif
+                        StoreObjectInHandle(m_StressObjs[i], ObjectToOBJECTREF(str));
+#if CHECK_APP_DOMAIN_LEAKS
+                    }
+#endif
+                }
+                i = (i + 1) % NUM_HEAP_STRESS_OBJS;
+                if (i == m_CurStressObj) break;
+            }
+
+            // advance the current handle to the next string
+            m_CurStressObj = (m_CurStressObj + 1) % NUM_HEAP_STRESS_OBJS;
+        }
+
+        // Get the current string
+        str = (StringObject*) OBJECTREFToObject(ObjectFromHandle(m_StressObjs[m_CurStressObj]));
+        if (str)
+        {
+            // Chop off the end of the string and form a new object out of it.
+            // This will 'free' an object at the begining of the heap, which will
+            // force data movement.  Note that we can only do this so many times.
+            // before we have to move on to the next string.
+            unsigned sizeOfNewObj = (unsigned)Align(min_obj_size * 31);
+            if (str->GetStringLength() > sizeOfNewObj / sizeof(WCHAR))
+            {
+                unsigned sizeToNextObj = (unsigned)Align(size(str));
+                uint8_t* freeObj = ((uint8_t*) str) + sizeToNextObj - sizeOfNewObj;
+                pGenGCHeap->make_unused_array (freeObj, sizeOfNewObj);                    
+                str->SetStringLength(str->GetStringLength() - (sizeOfNewObj / sizeof(WCHAR)));
+            }
+            else
+            {
+                // Let the string itself become garbage.
+                // will be realloced next time around
+                StoreObjectInHandle(m_StressObjs[m_CurStressObj], 0);
+            }
+        }
+    }
+    Interlocked::Decrement(&OneAtATime);
+#endif // !MULTIPLE_HEAPS
+    if (IsConcurrentGCEnabled())
+    {
+        int rgen = StressRNG(10);
+
+        // gen0:gen1:gen2 distribution: 40:40:20
+        if (rgen >= 8)
+            rgen = 2;
+        else if (rgen >= 4)
+            rgen = 1;
+    else
+            rgen = 0;
+
+        GarbageCollectTry (rgen, FALSE, collection_gcstress);
+    }
+    else
+    {
+        GarbageCollect(max_generation, FALSE, collection_gcstress);
+    }
+
+    return TRUE;
+}
+
+#endif // STRESS_HEAP
+#endif // FEATURE_REDHAWK
+
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+#define REGISTER_FOR_FINALIZATION(_object, _size) \
+    hp->finalize_queue->RegisterForFinalization (0, (_object), (_size))
+#else // FEATURE_PREMORTEM_FINALIZATION
+#define REGISTER_FOR_FINALIZATION(_object, _size) true
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+#ifdef FEATURE_REDHAWK
+#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do {  \
+    if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size)))   \
+    {                                                                                       \
+        STRESS_LOG_OOM_STACK(_size);                                                        \
+        return NULL;                                                                        \
+    }                                                                                       \
+} while (false)
+#else // FEATURE_REDHAWK
+#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do {  \
+    if ((_object) == NULL)                                                                  \
+    {                                                                                       \
+        STRESS_LOG_OOM_STACK(_size);                                                        \
+        ThrowOutOfMemory();                                                                 \
+    }                                                                                       \
+    if (_register)                                                                          \
+    {                                                                                       \
+        REGISTER_FOR_FINALIZATION(_object, _size);                                          \
+    }                                                                                       \
+} while (false)
+#endif // FEATURE_REDHAWK
+
+//
+// Small Object Allocator
+//
+//
+Object *
+GCHeap::Alloc( size_t size, uint32_t flags REQD_ALIGN_DCL)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#if defined(_DEBUG) && !defined(FEATURE_REDHAWK)
+    if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
+    {
+        char *a = new char;
+        delete a;
+    }
+#endif //_DEBUG && !FEATURE_REDHAWK
+
+    TRIGGERSGC();
+
+    assert (!GCHeap::UseAllocationContexts());
+
+    Object* newAlloc = NULL;
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocStart = GetCycleCount32();
+    unsigned finish;
+#elif defined(ENABLE_INSTRUMENTATION)
+    unsigned AllocStart = GetInstLogTime();
+    unsigned finish;
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+#ifdef MULTIPLE_HEAPS
+    //take the first heap....
+    gc_heap* hp = gc_heap::g_heaps[0];
+#else
+    gc_heap* hp = pGenGCHeap;
+#ifdef _PREFAST_
+    // prefix complains about us dereferencing hp in wks build even though we only access static members
+    // this way. not sure how to shut it up except for this ugly workaround:
+    PREFIX_ASSUME(hp != NULL);
+#endif //_PREFAST_
+#endif //MULTIPLE_HEAPS
+
+    {
+        AllocLockHolder lh;
+
+#ifndef FEATURE_REDHAWK
+        GCStress<gc_on_alloc>::MaybeTrigger(generation_alloc_context(hp->generation_of(0)));
+#endif // FEATURE_REDHAWK
+
+        alloc_context* acontext = 0;
+
+        if (size < LARGE_OBJECT_SIZE)
+        {
+            acontext = generation_alloc_context (hp->generation_of (0));
+
+#ifdef TRACE_GC
+            AllocSmallCount++;
+#endif //TRACE_GC
+            newAlloc = (Object*) hp->allocate (size + ComputeMaxStructAlignPad(requiredAlignment), acontext);
+#ifdef FEATURE_STRUCTALIGN
+            newAlloc = (Object*) hp->pad_for_alignment ((uint8_t*) newAlloc, requiredAlignment, size, acontext);
+#endif // FEATURE_STRUCTALIGN
+            // ASSERT (newAlloc);
+        }
+        else
+        {
+            acontext = generation_alloc_context (hp->generation_of (max_generation+1));
+
+            newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), acontext->alloc_bytes_loh);
+#ifdef FEATURE_STRUCTALIGN
+            newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
+#endif // FEATURE_STRUCTALIGN
+        }
+    }
+
+    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    finish = GetCycleCount32();
+#elif defined(ENABLE_INSTRUMENTATION)
+    finish = GetInstLogTime();
+#endif //COUNT_CYCLES
+    AllocDuration += finish - AllocStart;
+    AllocCount++;
+#endif //TRACE_GC
+    return newAlloc;
+}
+
+#ifdef FEATURE_64BIT_ALIGNMENT
+// Allocate small object with an alignment requirement of 8-bytes. Non allocation context version.
+Object *
+GCHeap::AllocAlign8( size_t size, uint32_t flags)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    assert (!GCHeap::UseAllocationContexts());
+
+    Object* newAlloc = NULL;
+
+    {
+        AllocLockHolder lh;
+
+#ifdef MULTIPLE_HEAPS
+        //take the first heap....
+        gc_heap* hp = gc_heap::g_heaps[0];
+#else
+        gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+        newAlloc = AllocAlign8Common(hp, generation_alloc_context (hp->generation_of (0)), size, flags);
+    }
+
+    return newAlloc;
+}
+
+// Allocate small object with an alignment requirement of 8-bytes. Allocation context version.
+Object*
+GCHeap::AllocAlign8(alloc_context* acontext, size_t size, uint32_t flags )
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#ifdef MULTIPLE_HEAPS
+    if (acontext->alloc_heap == 0)
+    {
+        AssignHeap (acontext);
+        assert (acontext->alloc_heap);
+    }
+
+    gc_heap* hp = acontext->alloc_heap->pGenGCHeap;
+#else
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+    return AllocAlign8Common(hp, acontext, size, flags);
+}
+
+// Common code used by both variants of AllocAlign8 above.
+Object*
+GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint32_t flags)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+    gc_heap* hp = (gc_heap*)_hp;
+
+#if defined(_DEBUG) && !defined(FEATURE_REDHAWK)
+    if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
+    {
+        char *a = new char;
+        delete a;
+    }
+#endif //_DEBUG && !FEATURE_REDHAWK
+
+    TRIGGERSGC();
+
+    Object* newAlloc = NULL;
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocStart = GetCycleCount32();
+    unsigned finish;
+#elif defined(ENABLE_INSTRUMENTATION)
+    unsigned AllocStart = GetInstLogTime();
+    unsigned finish;
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+#ifndef FEATURE_REDHAWK
+    GCStress<gc_on_alloc>::MaybeTrigger(acontext);
+#endif // FEATURE_REDHAWK
+
+    if (size < LARGE_OBJECT_SIZE)
+    {
+#ifdef TRACE_GC
+        AllocSmallCount++;
+#endif //TRACE_GC
+
+        // Depending on where in the object the payload requiring 8-byte alignment resides we might have to
+        // align the object header on an 8-byte boundary or midway between two such boundaries. The unaligned
+        // case is indicated to the GC via the GC_ALLOC_ALIGN8_BIAS flag.
+        size_t desiredAlignment = (flags & GC_ALLOC_ALIGN8_BIAS) ? 4 : 0;
+
+        // Retrieve the address of the next allocation from the context (note that we're inside the alloc
+        // lock at this point).
+        uint8_t*  result = acontext->alloc_ptr;
+
+        // Will an allocation at this point yield the correct alignment and fit into the remainder of the
+        // context?
+        if ((((size_t)result & 7) == desiredAlignment) && ((result + size) <= acontext->alloc_limit))
+        {
+            // Yes, we can just go ahead and make the allocation.
+            newAlloc = (Object*) hp->allocate (size, acontext);
+            ASSERT(((size_t)newAlloc & 7) == desiredAlignment);
+        }
+        else
+        {
+            // No, either the next available address is not aligned in the way we require it or there's
+            // not enough space to allocate an object of the required size. In both cases we allocate a
+            // padding object (marked as a free object). This object's size is such that it will reverse
+            // the alignment of the next header (asserted below).
+            //
+            // We allocate both together then decide based on the result whether we'll format the space as
+            // free object + real object or real object + free object.
+            ASSERT((Align(min_obj_size) & 7) == 4);
+            CObjectHeader *freeobj = (CObjectHeader*) hp->allocate (Align(size) + Align(min_obj_size), acontext);
+            if (freeobj)
+            {
+                if (((size_t)freeobj & 7) == desiredAlignment)
+                {
+                    // New allocation has desired alignment, return this one and place the free object at the
+                    // end of the allocated space.
+                    newAlloc = (Object*)freeobj;
+                    freeobj = (CObjectHeader*)((uint8_t*)freeobj + Align(size));
+                }
+                else
+                {
+                    // New allocation is still mis-aligned, format the initial space as a free object and the
+                    // rest of the space should be correctly aligned for the real object.
+                    newAlloc = (Object*)((uint8_t*)freeobj + Align(min_obj_size));
+                    ASSERT(((size_t)newAlloc & 7) == desiredAlignment);
+                }
+                freeobj->SetFree(min_obj_size);
+            }
+        }
+    }
+    else
+    {
+        // The LOH always guarantees at least 8-byte alignment, regardless of platform. Moreover it doesn't
+        // support mis-aligned object headers so we can't support biased headers as above. Luckily for us
+        // we've managed to arrange things so the only case where we see a bias is for boxed value types and
+        // these can never get large enough to be allocated on the LOH.
+        ASSERT(65536 < LARGE_OBJECT_SIZE);
+        ASSERT((flags & GC_ALLOC_ALIGN8_BIAS) == 0);
+
+        alloc_context* acontext = generation_alloc_context (hp->generation_of (max_generation+1));
+
+        newAlloc = (Object*) hp->allocate_large_object (size, acontext->alloc_bytes_loh);
+        ASSERT(((size_t)newAlloc & 7) == 0);
+    }
+
+    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    finish = GetCycleCount32();
+#elif defined(ENABLE_INSTRUMENTATION)
+    finish = GetInstLogTime();
+#endif //COUNT_CYCLES
+    AllocDuration += finish - AllocStart;
+    AllocCount++;
+#endif //TRACE_GC
+    return newAlloc;
+}
+#endif // FEATURE_64BIT_ALIGNMENT
+
+Object *
+GCHeap::AllocLHeap( size_t size, uint32_t flags REQD_ALIGN_DCL)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#if defined(_DEBUG) && !defined(FEATURE_REDHAWK)
+    if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
+    {
+        char *a = new char;
+        delete a;
+    }
+#endif //_DEBUG && !FEATURE_REDHAWK
+
+    TRIGGERSGC();
+
+    Object* newAlloc = NULL;
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocStart = GetCycleCount32();
+    unsigned finish;
+#elif defined(ENABLE_INSTRUMENTATION)
+    unsigned AllocStart = GetInstLogTime();
+    unsigned finish;
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+#ifdef MULTIPLE_HEAPS
+    //take the first heap....
+    gc_heap* hp = gc_heap::g_heaps[0];
+#else
+    gc_heap* hp = pGenGCHeap;
+#ifdef _PREFAST_
+    // prefix complains about us dereferencing hp in wks build even though we only access static members
+    // this way. not sure how to shut it up except for this ugly workaround:
+    PREFIX_ASSUME(hp != NULL);
+#endif //_PREFAST_
+#endif //MULTIPLE_HEAPS
+
+#ifndef FEATURE_REDHAWK
+    GCStress<gc_on_alloc>::MaybeTrigger(generation_alloc_context(hp->generation_of(0)));
+#endif // FEATURE_REDHAWK
+
+    alloc_context* acontext = generation_alloc_context (hp->generation_of (max_generation+1));
+
+    newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), acontext->alloc_bytes_loh);
+#ifdef FEATURE_STRUCTALIGN
+    newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
+#endif // FEATURE_STRUCTALIGN
+    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    finish = GetCycleCount32();
+#elif defined(ENABLE_INSTRUMENTATION)
+    finish = GetInstLogTime();
+#endif //COUNT_CYCLES
+    AllocDuration += finish - AllocStart;
+    AllocCount++;
+#endif //TRACE_GC
+    return newAlloc;
+}
+
+Object*
+GCHeap::Alloc(alloc_context* acontext, size_t size, uint32_t flags REQD_ALIGN_DCL)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk NULL is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_TRIGGERS;
+    } CONTRACTL_END;
+
+#if defined(_DEBUG) && !defined(FEATURE_REDHAWK)
+    if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
+    {
+        char *a = new char;
+        delete a;
+    }
+#endif //_DEBUG && !FEATURE_REDHAWK
+
+    TRIGGERSGC();
+
+    Object* newAlloc = NULL;
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocStart = GetCycleCount32();
+    unsigned finish;
+#elif defined(ENABLE_INSTRUMENTATION)
+    unsigned AllocStart = GetInstLogTime();
+    unsigned finish;
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+#ifdef MULTIPLE_HEAPS
+    if (acontext->alloc_heap == 0)
+    {
+        AssignHeap (acontext);
+        assert (acontext->alloc_heap);
+    }
+#endif //MULTIPLE_HEAPS
+
+#ifndef FEATURE_REDHAWK
+    GCStress<gc_on_alloc>::MaybeTrigger(acontext);
+#endif // FEATURE_REDHAWK
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = acontext->alloc_heap->pGenGCHeap;
+#else
+    gc_heap* hp = pGenGCHeap;
+#ifdef _PREFAST_
+    // prefix complains about us dereferencing hp in wks build even though we only access static members
+    // this way. not sure how to shut it up except for this ugly workaround:
+    PREFIX_ASSUME(hp != NULL);
+#endif //_PREFAST_
+#endif //MULTIPLE_HEAPS
+
+    if (size < LARGE_OBJECT_SIZE)
+    {
+
+#ifdef TRACE_GC
+        AllocSmallCount++;
+#endif //TRACE_GC
+        newAlloc = (Object*) hp->allocate (size + ComputeMaxStructAlignPad(requiredAlignment), acontext);
+#ifdef FEATURE_STRUCTALIGN
+        newAlloc = (Object*) hp->pad_for_alignment ((uint8_t*) newAlloc, requiredAlignment, size, acontext);
+#endif // FEATURE_STRUCTALIGN
+//        ASSERT (newAlloc);
+    }
+    else 
+    {
+        newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), acontext->alloc_bytes_loh);
+#ifdef FEATURE_STRUCTALIGN
+        newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
+#endif // FEATURE_STRUCTALIGN
+    }
+
+    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    finish = GetCycleCount32();
+#elif defined(ENABLE_INSTRUMENTATION)
+    finish = GetInstLogTime();
+#endif //COUNT_CYCLES
+    AllocDuration += finish - AllocStart;
+    AllocCount++;
+#endif //TRACE_GC
+    return newAlloc;
+}
+
+void
+GCHeap::FixAllocContext (alloc_context* acontext, BOOL lockp, void* arg, void *heap)
+{
+#ifdef MULTIPLE_HEAPS
+
+    if (arg != 0)
+        acontext->alloc_count = 0;
+
+    uint8_t * alloc_ptr = acontext->alloc_ptr;
+
+    if (!alloc_ptr)
+        return;
+
+    // The acontext->alloc_heap can be out of sync with the ptrs because
+    // of heap re-assignment in allocate
+    gc_heap* hp = gc_heap::heap_of (alloc_ptr);
+#else
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+    if (heap == NULL || heap == hp)
+    {
+        if (lockp)
+        {
+            enter_spin_lock (&hp->more_space_lock);
+        }
+        hp->fix_allocation_context (acontext, ((arg != 0)? TRUE : FALSE),
+                                get_alignment_constant(TRUE));
+        if (lockp)
+        {
+            leave_spin_lock (&hp->more_space_lock);
+        }
+    }
+}
+
+Object*
+GCHeap::GetContainingObject (void *pInteriorPtr)
+{
+    uint8_t *o = (uint8_t*)pInteriorPtr;
+
+    gc_heap* hp = gc_heap::heap_of (o);
+    if (o >= hp->lowest_address && o < hp->highest_address)
+    {
+        o = hp->find_object (o, hp->gc_low);
+    }
+    else
+    {
+        o = NULL;
+    }
+    
+    return (Object *)o;
+}
+
+BOOL should_collect_optimized (dynamic_data* dd, BOOL low_memory_p)
+{
+    if (dd_new_allocation (dd) < 0)
+    {
+        return TRUE;
+    }
+
+    if (((float)(dd_new_allocation (dd)) / (float)dd_desired_allocation (dd)) < (low_memory_p ? 0.7 : 0.3))
+    {
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+//----------------------------------------------------------------------------
+// #GarbageCollector
+//
+//  API to ensure that a complete new garbage collection takes place
+//
+HRESULT
+GCHeap::GarbageCollect (int generation, BOOL low_memory_p, int mode)
+{
+#if defined(BIT64) 
+    if (low_memory_p)
+    {
+        size_t total_allocated = 0;
+        size_t total_desired = 0;
+#ifdef MULTIPLE_HEAPS
+        int hn = 0;
+        for (hn = 0; hn < gc_heap::n_heaps; hn++)
+        {
+            gc_heap* hp = gc_heap::g_heaps [hn];
+            total_desired += dd_desired_allocation (hp->dynamic_data_of (0));
+            total_allocated += dd_desired_allocation (hp->dynamic_data_of (0))-
+                dd_new_allocation (hp->dynamic_data_of (0));
+        }
+#else
+        gc_heap* hp = pGenGCHeap;
+        total_desired = dd_desired_allocation (hp->dynamic_data_of (0));
+        total_allocated = dd_desired_allocation (hp->dynamic_data_of (0))-
+            dd_new_allocation (hp->dynamic_data_of (0));
+#endif //MULTIPLE_HEAPS
+
+        if ((total_desired > gc_heap::mem_one_percent) && (total_allocated < gc_heap::mem_one_percent))
+        {
+            dprintf (2, ("Async low mem but we've only allocated %d (< 10%% of physical mem) out of %d, returning",
+                         total_allocated, total_desired));
+
+            return S_OK;
+        }
+    }
+#endif // BIT64 
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hpt = gc_heap::g_heaps[0];
+#else
+    gc_heap* hpt = 0;
+#endif //MULTIPLE_HEAPS
+
+    generation = (generation < 0) ? max_generation : min (generation, max_generation);
+    dynamic_data* dd = hpt->dynamic_data_of (generation);
+
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        if ((mode == collection_optimized) || (mode & collection_non_blocking))
+        {
+            return S_OK;
+        }
+        if (mode & collection_blocking)
+        {
+            pGenGCHeap->background_gc_wait();
+            if (mode & collection_optimized)
+            {
+                return S_OK;
+            }
+        }
+    }
+#endif //BACKGROUND_GC
+
+    if (mode & collection_optimized)
+    {
+        if (pGenGCHeap->gc_started)
+        {
+            return S_OK;
+        }
+        else 
+        {
+            BOOL should_collect = FALSE;
+            BOOL should_check_loh = (generation == max_generation);
+#ifdef MULTIPLE_HEAPS
+            for (int i = 0; i < gc_heap::n_heaps; i++)
+            {
+                dynamic_data* dd1 = gc_heap::g_heaps [i]->dynamic_data_of (generation);
+                dynamic_data* dd2 = (should_check_loh ? 
+                                     (gc_heap::g_heaps [i]->dynamic_data_of (max_generation + 1)) :
+                                     0);
+
+                if (should_collect_optimized (dd1, low_memory_p))
+                {
+                    should_collect = TRUE;
+                    break;
+                }
+                if (dd2 && should_collect_optimized (dd2, low_memory_p))
+                {
+                    should_collect = TRUE;
+                    break;
+                }
+            }
+#else
+            should_collect = should_collect_optimized (dd, low_memory_p);
+            if (!should_collect && should_check_loh)
+            {
+                should_collect = 
+                    should_collect_optimized (hpt->dynamic_data_of (max_generation + 1), low_memory_p);
+            }
+#endif //MULTIPLE_HEAPS
+            if (!should_collect)
+            {
+                return S_OK;
+            }
+        }
+    }
+
+    size_t CollectionCountAtEntry = dd_collection_count (dd);
+    size_t BlockingCollectionCountAtEntry = gc_heap::full_gc_counts[gc_type_blocking];
+    size_t CurrentCollectionCount = 0;
+
+retry:
+
+    CurrentCollectionCount = GarbageCollectTry(generation, low_memory_p, mode);
+    
+    if ((mode & collection_blocking) && 
+        (generation == max_generation) && 
+        (gc_heap::full_gc_counts[gc_type_blocking] == BlockingCollectionCountAtEntry))
+    {
+#ifdef BACKGROUND_GC
+        if (recursive_gc_sync::background_running_p())
+        {
+            pGenGCHeap->background_gc_wait();
+        }
+#endif //BACKGROUND_GC
+
+        goto retry;
+    }
+
+    if (CollectionCountAtEntry == CurrentCollectionCount)
+    {
+        goto retry;
+    }
+
+    return S_OK;
+}
+
+size_t
+GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode)
+{
+    int gen = (generation < 0) ? 
+               max_generation : min (generation, max_generation);
+
+    gc_reason reason = reason_empty;
+    
+    if (low_memory_p) 
+    {
+        if (mode & collection_blocking)
+            reason = reason_lowmemory_blocking;
+        else
+            reason = reason_lowmemory;
+    }
+    else
+        reason = reason_induced;
+
+    if (reason == reason_induced)
+    {
+        if (mode & collection_compacting)
+        {
+            reason = reason_induced_compacting;
+        }
+        else if (mode & collection_non_blocking)
+        {
+            reason = reason_induced_noforce;
+        }
+#ifdef STRESS_HEAP
+        else if (mode & collection_gcstress)
+        {
+            reason = reason_gcstress;
+        }
+#endif
+    }
+
+    return GarbageCollectGeneration (gen, reason);
+}
+
+void gc_heap::do_pre_gc()
+{
+    STRESS_LOG_GC_STACK;
+
+#ifdef STRESS_LOG
+    STRESS_LOG_GC_START(VolatileLoad(&settings.gc_index),
+                        (uint32_t)settings.condemned_generation,
+                        (uint32_t)settings.reason);
+#endif // STRESS_LOG
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = g_heaps[0];
+#else
+    gc_heap* hp = 0;
+#endif //MULTIPLE_HEAPS
+
+#ifdef BACKGROUND_GC
+    settings.b_state = hp->current_bgc_state;
+#endif //BACKGROUND_GC
+
+#ifdef BACKGROUND_GC
+    dprintf (1, ("*GC* %d(gen0:%d)(%d)(%s)(%d)", 
+        VolatileLoad(&settings.gc_index), 
+        dd_collection_count (hp->dynamic_data_of (0)),
+        settings.condemned_generation,
+        (settings.concurrent ? "BGC" : (recursive_gc_sync::background_running_p() ? "FGC" : "NGC")),
+        settings.b_state));
+#else
+    dprintf (1, ("*GC* %d(gen0:%d)(%d)", 
+        VolatileLoad(&settings.gc_index), 
+        dd_collection_count(hp->dynamic_data_of(0)),
+        settings.condemned_generation));
+#endif //BACKGROUND_GC
+
+    // TODO: this can happen...it's because of the way we are calling
+    // do_pre_gc, will fix later.
+    //if (last_gc_index > VolatileLoad(&settings.gc_index))
+    //{
+    //    FATAL_GC_ERROR();
+    //}
+
+    last_gc_index = VolatileLoad(&settings.gc_index);
+    GCHeap::UpdatePreGCCounters();
+
+    if (settings.concurrent)
+    {
+#ifdef BACKGROUND_GC
+        full_gc_counts[gc_type_background]++;
+#if defined(STRESS_HEAP) && !defined(FEATURE_REDHAWK)
+        GCHeap::gc_stress_fgcs_in_bgc = 0;
+#endif // STRESS_HEAP && !FEATURE_REDHAWK
+#endif // BACKGROUND_GC
+    }
+    else
+    {
+        if (settings.condemned_generation == max_generation)
+        {
+            full_gc_counts[gc_type_blocking]++;
+        }
+        else
+        {
+#ifdef BACKGROUND_GC
+            if (settings.background_p)
+            {
+                ephemeral_fgc_counts[settings.condemned_generation]++;
+            }
+#endif //BACKGROUND_GC
+        }
+    }
+
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    if (g_fEnableARM)
+    {
+        SystemDomain::ResetADSurvivedBytes();
+    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+}
+
+#ifdef GC_CONFIG_DRIVEN
+void gc_heap::record_interesting_info_per_heap()
+{
+    // datapoints are always from the last blocking GC so don't record again
+    // for BGCs.
+    if (!(settings.concurrent))
+    {
+        for (int i = 0; i < max_idp_count; i++)
+        {
+            interesting_data_per_heap[i] += interesting_data_per_gc[i];
+        }
+    }
+
+    int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
+    if (compact_reason >= 0)
+        (compact_reasons_per_heap[compact_reason])++;
+    int expand_mechanism = get_gc_data_per_heap()->get_mechanism (gc_heap_expand);
+    if (expand_mechanism >= 0)
+        (expand_mechanisms_per_heap[expand_mechanism])++;
+
+    for (int i = 0; i < max_gc_mechanism_bits_count; i++)
+    {
+        if (get_gc_data_per_heap()->is_mechanism_bit_set ((gc_mechanism_bit_per_heap)i))
+            (interesting_mechanism_bits_per_heap[i])++;
+    }
+
+    //         h#  | GC  | gen | C   | EX  | NF  | BF  | ML  | DM  || PreS | PostS | Merge | Conv | Pre | Post | PrPo | PreP | PostP |
+    cprintf (("%2d | %6d | %1d | %1s | %2s | %2s | %2s | %2s | %2s || %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id | %5Id |",
+            heap_number,
+            (size_t)settings.gc_index,
+            settings.condemned_generation,
+            // TEMP - I am just doing this for wks GC 'cuase I wanna see the pattern of doing C/S GCs.
+            (settings.compaction ? (((compact_reason >= 0) && gc_heap_compact_reason_mandatory_p[compact_reason]) ? "M" : "W") : ""), // compaction
+            ((expand_mechanism >= 0)? "X" : ""), // EX
+            ((expand_mechanism == expand_reuse_normal) ? "X" : ""), // NF
+            ((expand_mechanism == expand_reuse_bestfit) ? "X" : ""), // BF
+            (get_gc_data_per_heap()->is_mechanism_bit_set (gc_mark_list_bit) ? "X" : ""), // ML
+            (get_gc_data_per_heap()->is_mechanism_bit_set (gc_demotion_bit) ? "X" : ""), // DM
+            interesting_data_per_gc[idp_pre_short],
+            interesting_data_per_gc[idp_post_short],
+            interesting_data_per_gc[idp_merged_pin],
+            interesting_data_per_gc[idp_converted_pin],
+            interesting_data_per_gc[idp_pre_pin],
+            interesting_data_per_gc[idp_post_pin],
+            interesting_data_per_gc[idp_pre_and_post_pin],
+            interesting_data_per_gc[idp_pre_short_padded],
+            interesting_data_per_gc[idp_post_short_padded]));
+}
+
+void gc_heap::record_global_mechanisms()
+{
+    for (int i = 0; i < max_global_mechanisms_count; i++)
+    {
+        if (gc_data_global.get_mechanism_p ((gc_global_mechanism_p)i))
+        {
+            ::record_global_mechanism (i);
+        }
+    }
+}
+
+BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p)
+{
+    if (!compact_ratio)
+        return (!compact_p);
+
+    size_t compact_count = compact_or_sweep_gcs[0];
+    size_t sweep_count = compact_or_sweep_gcs[1];
+
+    size_t total_count = compact_count + sweep_count;
+    BOOL should_compact = compact_p;
+    if (total_count > 3)
+    {
+        if (compact_p)
+        {
+            int temp_ratio = (int)((compact_count + 1) * 100 / (total_count + 1));
+            if (temp_ratio > compact_ratio)
+            {
+                // cprintf (("compact would be: %d, total_count: %d, ratio would be %d%% > target\n",
+                //     (compact_count + 1), (total_count + 1), temp_ratio));
+                should_compact = FALSE;
+            }
+        }
+        else
+        {
+            int temp_ratio = (int)((sweep_count + 1) * 100 / (total_count + 1));
+            if (temp_ratio > (100 - compact_ratio))
+            {
+                // cprintf (("sweep would be: %d, total_count: %d, ratio would be %d%% > target\n",
+                //     (sweep_count + 1), (total_count + 1), temp_ratio));
+                should_compact = TRUE;
+            }
+        }
+    }
+
+    return !should_compact;
+}
+#endif //GC_CONFIG_DRIVEN
+
+void gc_heap::do_post_gc()
+{
+    if (!settings.concurrent)
+    {
+        GCProfileWalkHeap();
+        initGCShadow();
+    }
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocStart = GetCycleCount32();
+#else
+    AllocStart = clock();
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = g_heaps[0];
+#else
+    gc_heap* hp = 0;
+#endif //MULTIPLE_HEAPS
+    
+    GCToEEInterface::GcDone(settings.condemned_generation);
+
+#ifdef GC_PROFILING
+    if (!settings.concurrent)
+    {
+        UpdateGenerationBounds();
+        GarbageCollectionFinishedCallback();
+    }
+#endif // GC_PROFILING
+
+    //dprintf (1, (" ****end of Garbage Collection**** %d(gen0:%d)(%d)", 
+    dprintf (1, ("*EGC* %d(gen0:%d)(%d)(%s)", 
+        VolatileLoad(&settings.gc_index), 
+        dd_collection_count(hp->dynamic_data_of(0)),
+        settings.condemned_generation,
+        (settings.concurrent ? "BGC" : "GC")));
+
+    GCHeap::UpdatePostGCCounters();
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+    //if (g_fEnableARM)
+    //{
+    //    SystemDomain::GetADSurvivedBytes();
+    //}
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+#ifdef STRESS_LOG
+    STRESS_LOG_GC_END(VolatileLoad(&settings.gc_index),
+                      (uint32_t)settings.condemned_generation,
+                      (uint32_t)settings.reason);
+#endif // STRESS_LOG
+
+#ifdef GC_CONFIG_DRIVEN
+    if (!settings.concurrent)
+    {
+        if (settings.compaction)
+            (compact_or_sweep_gcs[0])++;
+        else
+            (compact_or_sweep_gcs[1])++;
+    }
+
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < n_heaps; i++)
+        g_heaps[i]->record_interesting_info_per_heap();
+#else
+    record_interesting_info_per_heap();
+#endif //MULTIPLE_HEAPS
+    record_global_mechanisms();
+#endif //GC_CONFIG_DRIVEN
+}
+
+unsigned GCHeap::GetGcCount()
+{
+    return (unsigned int)VolatileLoad(&pGenGCHeap->settings.gc_index);
+}
+
+size_t
+GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason)
+{
+    dprintf (2, ("triggered a GC!"));
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hpt = gc_heap::g_heaps[0];
+#else
+    gc_heap* hpt = 0;
+#endif //MULTIPLE_HEAPS
+    Thread* current_thread = GetThread();
+    BOOL cooperative_mode = TRUE;
+    dynamic_data* dd = hpt->dynamic_data_of (gen);
+    size_t localCount = dd_collection_count (dd);
+
+    enter_spin_lock (&gc_heap::gc_lock);
+    dprintf (SPINLOCK_LOG, ("GC Egc"));
+    ASSERT_HOLDING_SPIN_LOCK(&gc_heap::gc_lock);
+
+    //don't trigger another GC if one was already in progress
+    //while waiting for the lock
+    {
+        size_t col_count = dd_collection_count (dd);
+
+        if (localCount != col_count)
+        {
+#ifdef SYNCHRONIZATION_STATS
+            gc_lock_contended++;
+#endif //SYNCHRONIZATION_STATS
+            dprintf (SPINLOCK_LOG, ("no need GC Lgc"));
+            leave_spin_lock (&gc_heap::gc_lock);
+
+            // We don't need to release msl here 'cause this means a GC
+            // has happened and would have release all msl's.
+            return col_count;
+         }
+    }
+
+#ifdef COUNT_CYCLES
+    int gc_start = GetCycleCount32();
+#endif //COUNT_CYCLES
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    AllocDuration += GetCycleCount32() - AllocStart;
+#else
+    AllocDuration += clock() - AllocStart;
+#endif //COUNT_CYCLES
+#endif //TRACE_GC
+
+    gc_heap::g_low_memory_status = (reason == reason_lowmemory) || 
+                                   (reason == reason_lowmemory_blocking) ||
+                                   g_bLowMemoryFromHost;
+
+    if (g_bLowMemoryFromHost)
+        reason = reason_lowmemory_host;
+
+    gc_trigger_reason = reason;
+
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap::g_heaps[i]->reset_gc_done();
+    }
+#else
+    gc_heap::reset_gc_done();
+#endif //MULTIPLE_HEAPS
+
+    gc_heap::gc_started = TRUE;
+
+    {
+        init_sync_log_stats();
+
+#ifndef MULTIPLE_HEAPS
+        cooperative_mode = gc_heap::enable_preemptive (current_thread);
+
+        dprintf (2, ("Suspending EE"));
+        BEGIN_TIMING(suspend_ee_during_log);
+        GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_FOR_GC);
+        END_TIMING(suspend_ee_during_log);
+        gc_heap::proceed_with_gc_p = gc_heap::should_proceed_with_gc();
+        gc_heap::disable_preemptive (current_thread, cooperative_mode);
+        if (gc_heap::proceed_with_gc_p)
+            pGenGCHeap->settings.init_mechanisms();
+        else
+            gc_heap::update_collection_counts_for_no_gc();
+
+#endif //!MULTIPLE_HEAPS
+    }
+
+// MAP_EVENT_MONITORS(EE_MONITOR_GARBAGE_COLLECTIONS, NotifyEvent(EE_EVENT_TYPE_GC_STARTED, 0));
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    unsigned start;
+    unsigned finish;
+    start = GetCycleCount32();
+#else
+    clock_t start;
+    clock_t finish;
+    start = clock();
+#endif //COUNT_CYCLES
+    PromotedObjectCount = 0;
+#endif //TRACE_GC
+
+    unsigned int condemned_generation_number = gen;
+
+    // We want to get a stack from the user thread that triggered the GC
+    // instead of on the GC thread which is the case for Server GC.
+    // But we are doing it for Workstation GC as well to be uniform.
+    FireEtwGCTriggered((int) reason, GetClrInstanceId());
+
+#ifdef MULTIPLE_HEAPS
+    GcCondemnedGeneration = condemned_generation_number;
+
+    cooperative_mode = gc_heap::enable_preemptive (current_thread);
+
+    BEGIN_TIMING(gc_during_log);
+    gc_heap::ee_suspend_event.Set();
+    gc_heap::wait_for_gc_done();
+    END_TIMING(gc_during_log);
+
+    gc_heap::disable_preemptive (current_thread, cooperative_mode);
+
+    condemned_generation_number = GcCondemnedGeneration;
+#else
+    if (gc_heap::proceed_with_gc_p)
+    {
+        BEGIN_TIMING(gc_during_log);
+        pGenGCHeap->garbage_collect (condemned_generation_number);
+        END_TIMING(gc_during_log);
+    }
+#endif //MULTIPLE_HEAPS
+
+#ifdef TRACE_GC
+#ifdef COUNT_CYCLES
+    finish = GetCycleCount32();
+#else
+    finish = clock();
+#endif //COUNT_CYCLES
+    GcDuration += finish - start;
+    dprintf (3,
+             ("<GC# %d> Condemned: %d, Duration: %d, total: %d Alloc Avg: %d, Small Objects:%d Large Objects:%d",
+              VolatileLoad(&pGenGCHeap->settings.gc_index), condemned_generation_number,
+              finish - start, GcDuration,
+              AllocCount ? (AllocDuration / AllocCount) : 0,
+              AllocSmallCount, AllocBigCount));
+    AllocCount = 0;
+    AllocDuration = 0;
+#endif // TRACE_GC
+
+#ifdef BACKGROUND_GC
+    // We are deciding whether we should fire the alloc wait end event here
+    // because in begin_foreground we could be calling end_foreground 
+    // if we need to retry.
+    if (gc_heap::alloc_wait_event_p)
+    {
+        hpt->fire_alloc_wait_event_end (awr_fgc_wait_for_bgc);
+        gc_heap::alloc_wait_event_p = FALSE;
+    }
+#endif //BACKGROUND_GC
+
+#ifndef MULTIPLE_HEAPS
+#ifdef BACKGROUND_GC
+    if (!gc_heap::dont_restart_ee_p)
+    {
+#endif //BACKGROUND_GC
+        BEGIN_TIMING(restart_ee_during_log);
+        GCToEEInterface::RestartEE(TRUE);
+        END_TIMING(restart_ee_during_log);
+#ifdef BACKGROUND_GC
+    }
+#endif //BACKGROUND_GC
+#endif //!MULTIPLE_HEAPS
+
+#ifdef COUNT_CYCLES
+    printf ("GC: %d Time: %d\n", GcCondemnedGeneration,
+            GetCycleCount32() - gc_start);
+#endif //COUNT_CYCLES
+
+#ifndef MULTIPLE_HEAPS
+    process_sync_log_stats();
+    gc_heap::gc_started = FALSE;
+    gc_heap::set_gc_done();
+    dprintf (SPINLOCK_LOG, ("GC Lgc"));
+    leave_spin_lock (&gc_heap::gc_lock);    
+#endif //!MULTIPLE_HEAPS
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    if ((!pGenGCHeap->settings.concurrent && pGenGCHeap->settings.found_finalizers) || 
+        FinalizerThread::HaveExtraWorkForFinalizer())
+    {
+        FinalizerThread::EnableFinalization();
+    }
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+    return dd_collection_count (dd);
+}
+
+size_t      GCHeap::GetTotalBytesInUse ()
+{
+#ifdef MULTIPLE_HEAPS
+    //enumarate all the heaps and get their size.
+    size_t tot_size = 0;
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        GCHeap* Hp = gc_heap::g_heaps [i]->vm_heap;
+        tot_size += Hp->ApproxTotalBytesInUse (FALSE);
+    }
+    return tot_size;
+#else
+    return ApproxTotalBytesInUse ();
+#endif //MULTIPLE_HEAPS
+}
+
+int GCHeap::CollectionCount (int generation, int get_bgc_fgc_count)
+{
+    if (get_bgc_fgc_count != 0)
+    {
+#ifdef BACKGROUND_GC
+        if (generation == max_generation)
+        {
+            return (int)(gc_heap::full_gc_counts[gc_type_background]);
+        }
+        else
+        {
+            return (int)(gc_heap::ephemeral_fgc_counts[generation]);
+        }
+#else
+        return 0;
+#endif //BACKGROUND_GC
+    }
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = gc_heap::g_heaps [0];
+#else  //MULTIPLE_HEAPS
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+    if (generation > max_generation)
+        return 0;
+    else
+        return (int)dd_collection_count (hp->dynamic_data_of (generation));
+}
+
+size_t GCHeap::ApproxTotalBytesInUse(BOOL small_heap_only)
+{
+    size_t totsize = 0;
+    //GCTODO
+    //ASSERT(InMustComplete());
+    enter_spin_lock (&pGenGCHeap->gc_lock);
+
+    heap_segment* eph_seg = generation_allocation_segment (pGenGCHeap->generation_of (0));
+    // Get small block heap size info
+    totsize = (pGenGCHeap->alloc_allocated - heap_segment_mem (eph_seg));
+    heap_segment* seg1 = generation_start_segment (pGenGCHeap->generation_of (max_generation));
+    while (seg1 != eph_seg)
+    {
+        totsize += heap_segment_allocated (seg1) -
+            heap_segment_mem (seg1);
+        seg1 = heap_segment_next (seg1);
+    }
+
+    //discount the fragmentation
+    for (int i = 0; i <= max_generation; i++)
+    {
+        generation* gen = pGenGCHeap->generation_of (i);
+        totsize -= (generation_free_list_space (gen) + generation_free_obj_space (gen));
+    }
+
+    if (!small_heap_only)
+    {
+        heap_segment* seg2 = generation_start_segment (pGenGCHeap->generation_of (max_generation+1));
+
+        while (seg2 != 0)
+        {
+            totsize += heap_segment_allocated (seg2) -
+                heap_segment_mem (seg2);
+            seg2 = heap_segment_next (seg2);
+        }
+
+        //discount the fragmentation
+        generation* loh_gen = pGenGCHeap->generation_of (max_generation+1);
+        size_t frag = generation_free_list_space (loh_gen) + generation_free_obj_space (loh_gen);
+        totsize -= frag;
+    }
+    leave_spin_lock (&pGenGCHeap->gc_lock);
+    return totsize;
+}
+
+#ifdef MULTIPLE_HEAPS
+void GCHeap::AssignHeap (alloc_context* acontext)
+{
+    // Assign heap based on processor
+    acontext->alloc_heap = GetHeap(heap_select::select_heap(acontext, 0));
+    acontext->home_heap = acontext->alloc_heap;
+}
+GCHeap* GCHeap::GetHeap (int n)
+{
+    assert (n < gc_heap::n_heaps);
+    return gc_heap::g_heaps [n]->vm_heap;
+}
+#endif //MULTIPLE_HEAPS
+
+bool GCHeap::IsThreadUsingAllocationContextHeap(alloc_context* acontext, int thread_number)
+{
+#ifdef MULTIPLE_HEAPS
+    return ((acontext->home_heap == GetHeap(thread_number)) ||
+            ((acontext->home_heap == 0) && (thread_number == 0)));
+#else
+    UNREFERENCED_PARAMETER(acontext);
+    UNREFERENCED_PARAMETER(thread_number);
+    return true;
+#endif //MULTIPLE_HEAPS
+}
+
+// Returns the number of processors required to trigger the use of thread based allocation contexts
+int GCHeap::GetNumberOfHeaps ()
+{
+#ifdef MULTIPLE_HEAPS
+    return gc_heap::n_heaps;
+#else
+    return 1;
+#endif //MULTIPLE_HEAPS
+}
+
+/*
+  in this way we spend extra time cycling through all the heaps while create the handle
+  it ought to be changed by keeping alloc_context.home_heap as number (equals heap_number)
+*/
+int GCHeap::GetHomeHeapNumber ()
+{
+#ifdef MULTIPLE_HEAPS
+    Thread *pThread = GetThread();
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        if (pThread)
+        {
+            GCHeap *hp = GCToEEInterface::GetAllocContext(pThread)->home_heap;
+            if (hp == gc_heap::g_heaps[i]->vm_heap) return i;
+        }
+    }
+    return 0;
+#else
+    return 0;
+#endif //MULTIPLE_HEAPS
+}
+
+unsigned int GCHeap::GetCondemnedGeneration()
+{ 
+    return gc_heap::settings.condemned_generation;
+}
+
+int GCHeap::GetGcLatencyMode()
+{
+    return (int)(pGenGCHeap->settings.pause_mode);
+}
+
+int GCHeap::SetGcLatencyMode (int newLatencyMode)
+{
+    if (gc_heap::settings.pause_mode == pause_no_gc)
+        return (int)set_pause_mode_no_gc;
+
+    gc_pause_mode new_mode = (gc_pause_mode)newLatencyMode;
+
+    if (new_mode == pause_low_latency)
+    {
+#ifndef MULTIPLE_HEAPS
+        pGenGCHeap->settings.pause_mode = new_mode;
+#endif //!MULTIPLE_HEAPS
+    }
+    else if (new_mode == pause_sustained_low_latency)
+    {
+#ifdef BACKGROUND_GC
+        if (gc_heap::gc_can_use_concurrent)
+        {
+            pGenGCHeap->settings.pause_mode = new_mode;
+        }
+#endif //BACKGROUND_GC
+    }
+    else
+    {
+        pGenGCHeap->settings.pause_mode = new_mode;
+    }
+
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        // If we get here, it means we are doing an FGC. If the pause
+        // mode was altered we will need to save it in the BGC settings.
+        if (gc_heap::saved_bgc_settings.pause_mode != new_mode)
+        {
+            gc_heap::saved_bgc_settings.pause_mode = new_mode;
+        }
+    }
+#endif //BACKGROUND_GC
+
+    return (int)set_pause_mode_success;
+}
+
+int GCHeap::GetLOHCompactionMode()
+{
+    return pGenGCHeap->loh_compaction_mode;
+}
+
+void GCHeap::SetLOHCompactionMode (int newLOHCompactionyMode)
+{
+#ifdef FEATURE_LOH_COMPACTION
+    pGenGCHeap->loh_compaction_mode = (gc_loh_compaction_mode)newLOHCompactionyMode;
+#endif //FEATURE_LOH_COMPACTION
+}
+
+BOOL GCHeap::RegisterForFullGCNotification(uint32_t gen2Percentage,
+                                           uint32_t lohPercentage)
+{
+#ifdef MULTIPLE_HEAPS
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        hp->fgn_last_alloc = dd_new_allocation (hp->dynamic_data_of (0));
+    }
+#else //MULTIPLE_HEAPS
+    pGenGCHeap->fgn_last_alloc = dd_new_allocation (pGenGCHeap->dynamic_data_of (0));
+#endif //MULTIPLE_HEAPS
+
+    pGenGCHeap->full_gc_approach_event.Reset();
+    pGenGCHeap->full_gc_end_event.Reset();
+    pGenGCHeap->full_gc_approach_event_set = false;
+
+    pGenGCHeap->fgn_maxgen_percent = gen2Percentage;
+    pGenGCHeap->fgn_loh_percent = lohPercentage;
+
+    return TRUE;
+}
+
+BOOL GCHeap::CancelFullGCNotification()
+{
+    pGenGCHeap->fgn_maxgen_percent = 0;
+    pGenGCHeap->fgn_loh_percent = 0;
+
+    pGenGCHeap->full_gc_approach_event.Set();
+    pGenGCHeap->full_gc_end_event.Set();
+    
+    return TRUE;
+}
+
+int GCHeap::WaitForFullGCApproach(int millisecondsTimeout)
+{
+    dprintf (2, ("WFGA: Begin wait"));
+    int result = gc_heap::full_gc_wait (&(pGenGCHeap->full_gc_approach_event), millisecondsTimeout);
+    dprintf (2, ("WFGA: End wait"));
+    return result;
+}
+
+int GCHeap::WaitForFullGCComplete(int millisecondsTimeout)
+{
+    dprintf (2, ("WFGE: Begin wait"));
+    int result = gc_heap::full_gc_wait (&(pGenGCHeap->full_gc_end_event), millisecondsTimeout);
+    dprintf (2, ("WFGE: End wait"));
+    return result;
+}
+
+int GCHeap::StartNoGCRegion(uint64_t totalSize, BOOL lohSizeKnown, uint64_t lohSize, BOOL disallowFullBlockingGC)
+{
+    AllocLockHolder lh;
+
+    dprintf (1, ("begin no gc called"));
+    start_no_gc_region_status status = gc_heap::prepare_for_no_gc_region (totalSize, lohSizeKnown, lohSize, disallowFullBlockingGC);
+    if (status == start_no_gc_success)
+    {
+        GarbageCollect (max_generation);
+        status = gc_heap::get_start_no_gc_region_status();
+    }
+
+    if (status != start_no_gc_success)
+        gc_heap::handle_failure_for_no_gc();
+
+    return (int)status;
+}
+
+int GCHeap::EndNoGCRegion()
+{
+    AllocLockHolder lh;
+    return (int)gc_heap::end_no_gc_region();
+}
+
+void GCHeap::PublishObject (uint8_t* Obj)
+{
+#ifdef BACKGROUND_GC
+    gc_heap* hp = gc_heap::heap_of (Obj);
+    hp->bgc_alloc_lock->loh_alloc_done (Obj);
+#endif //BACKGROUND_GC
+}
+
+// The spec for this one isn't clear. This function
+// returns the size that can be allocated without
+// triggering a GC of any kind.
+size_t GCHeap::ApproxFreeBytes()
+{
+    //GCTODO
+    //ASSERT(InMustComplete());
+    enter_spin_lock (&pGenGCHeap->gc_lock);
+
+    generation* gen = pGenGCHeap->generation_of (0);
+    size_t res = generation_allocation_limit (gen) - generation_allocation_pointer (gen);
+
+    leave_spin_lock (&pGenGCHeap->gc_lock);
+
+    return res;
+}
+
+HRESULT GCHeap::GetGcCounters(int gen, gc_counters* counters)
+{
+    if ((gen < 0) || (gen > max_generation))
+        return E_FAIL;
+#ifdef MULTIPLE_HEAPS
+    counters->current_size = 0;
+    counters->promoted_size = 0;
+    counters->collection_count = 0;
+
+    //enumarate all the heaps and get their counters.
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        dynamic_data* dd = gc_heap::g_heaps [i]->dynamic_data_of (gen);
+
+        counters->current_size += dd_current_size (dd);
+        counters->promoted_size += dd_promoted_size (dd);
+        if (i == 0)
+        counters->collection_count += dd_collection_count (dd);
+    }
+#else
+    dynamic_data* dd = pGenGCHeap->dynamic_data_of (gen);
+    counters->current_size = dd_current_size (dd);
+    counters->promoted_size = dd_promoted_size (dd);
+    counters->collection_count = dd_collection_count (dd);
+#endif //MULTIPLE_HEAPS
+    return S_OK;
+}
+
+// Get the segment size to use, making sure it conforms.
+size_t GCHeap::GetValidSegmentSize(BOOL large_seg)
+{
+    return get_valid_segment_size (large_seg);
+}
+
+// Get the max gen0 heap size, making sure it conforms.
+size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
+{
+    size_t gen0size = g_pConfig->GetGCgen0size();
+
+    if ((gen0size == 0) || !GCHeap::IsValidGen0MaxSize(gen0size))
+    {
+#ifdef SERVER_GC
+        // performance data seems to indicate halving the size results
+        // in optimal perf.  Ask for adjusted gen0 size.
+        gen0size = max(GCToOSInterface::GetLargestOnDieCacheSize(FALSE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
+#if (defined(_TARGET_AMD64_))
+        // if gen0 size is too large given the available memory, reduce it.
+        // Get true cache size, as we don't want to reduce below this.
+        size_t trueSize = max(GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
+        dprintf (2, ("cache: %Id-%Id, cpu: %Id", 
+            GCToOSInterface::GetLargestOnDieCacheSize(FALSE),
+            GCToOSInterface::GetLargestOnDieCacheSize(TRUE),
+            GCToOSInterface::GetLogicalCpuCount()));
+
+        // if the total min GC across heaps will exceed 1/6th of available memory,
+        // then reduce the min GC size until it either fits or has been reduced to cache size.
+        while ((gen0size * gc_heap::n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
+        {
+            gen0size = gen0size / 2;
+            if (gen0size <= trueSize)
+            {
+                gen0size = trueSize;
+                break;
+            }
+        }
+#endif //_TARGET_AMD64_
+
+#else //SERVER_GC
+        gen0size = max((4*GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/5),(256*1024));
+#endif //SERVER_GC
+    }
+
+    // Generation 0 must never be more than 1/2 the segment size.
+    if (gen0size >= (seg_size / 2))
+        gen0size = seg_size / 2;
+
+    return (gen0size);
+}
+
+void GCHeap::SetReservedVMLimit (size_t vmlimit)
+{
+    gc_heap::reserved_memory_limit = vmlimit;
+}
+
+
+//versions of same method on each heap
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+
+Object* GCHeap::GetNextFinalizableObject()
+{
+
+#ifdef MULTIPLE_HEAPS
+
+    //return the first non critical one in the first queue.
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        Object* O = hp->finalize_queue->GetNextFinalizableObject(TRUE);
+        if (O)
+            return O;
+    }
+    //return the first non crtitical/critical one in the first queue.
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        Object* O = hp->finalize_queue->GetNextFinalizableObject(FALSE);
+        if (O)
+            return O;
+    }
+    return 0;
+
+
+#else //MULTIPLE_HEAPS
+    return pGenGCHeap->finalize_queue->GetNextFinalizableObject();
+#endif //MULTIPLE_HEAPS
+
+}
+
+size_t GCHeap::GetNumberFinalizableObjects()
+{
+#ifdef MULTIPLE_HEAPS
+    size_t cnt = 0;
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        cnt += hp->finalize_queue->GetNumberFinalizableObjects();
+    }
+    return cnt;
+
+
+#else //MULTIPLE_HEAPS
+    return pGenGCHeap->finalize_queue->GetNumberFinalizableObjects();
+#endif //MULTIPLE_HEAPS
+}
+
+size_t GCHeap::GetFinalizablePromotedCount()
+{
+#ifdef MULTIPLE_HEAPS
+    size_t cnt = 0;
+
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        cnt += hp->finalize_queue->GetPromotedCount();
+    }
+    return cnt;
+
+#else //MULTIPLE_HEAPS
+    return pGenGCHeap->finalize_queue->GetPromotedCount();
+#endif //MULTIPLE_HEAPS
+}
+
+BOOL GCHeap::FinalizeAppDomain(AppDomain *pDomain, BOOL fRunFinalizers)
+{
+#ifdef MULTIPLE_HEAPS
+    BOOL foundp = FALSE;
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        if (hp->finalize_queue->FinalizeAppDomain (pDomain, fRunFinalizers))
+            foundp = TRUE;
+    }
+    return foundp;
+
+#else //MULTIPLE_HEAPS
+    return pGenGCHeap->finalize_queue->FinalizeAppDomain (pDomain, fRunFinalizers);
+#endif //MULTIPLE_HEAPS
+}
+
+BOOL GCHeap::ShouldRestartFinalizerWatchDog()
+{
+    // This condition was historically used as part of the condition to detect finalizer thread timeouts
+    return gc_heap::gc_lock.lock != -1;
+}
+
+void GCHeap::SetFinalizeQueueForShutdown(BOOL fHasLock)
+{
+#ifdef MULTIPLE_HEAPS
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps [hn];
+        hp->finalize_queue->SetSegForShutDown(fHasLock);
+    }
+
+#else //MULTIPLE_HEAPS
+    pGenGCHeap->finalize_queue->SetSegForShutDown(fHasLock);
+#endif //MULTIPLE_HEAPS
+}
+
+//---------------------------------------------------------------------------
+// Finalized class tracking
+//---------------------------------------------------------------------------
+
+bool GCHeap::RegisterForFinalization (int gen, Object* obj)
+{
+    if (gen == -1)
+        gen = 0;
+    if (((((CObjectHeader*)obj)->GetHeader()->GetBits()) & BIT_SBLK_FINALIZER_RUN))
+    {
+        //just reset the bit
+        ((CObjectHeader*)obj)->GetHeader()->ClrBit(BIT_SBLK_FINALIZER_RUN);
+        return true;
+    }
+    else
+    {
+        gc_heap* hp = gc_heap::heap_of ((uint8_t*)obj);
+        return hp->finalize_queue->RegisterForFinalization (gen, obj);
+    }
+}
+
+void GCHeap::SetFinalizationRun (Object* obj)
+{
+    ((CObjectHeader*)obj)->GetHeader()->SetBit(BIT_SBLK_FINALIZER_RUN);
+}
+
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+//----------------------------------------------------------------------------
+//
+// Write Barrier Support for bulk copy ("Clone") operations
+//
+// StartPoint is the target bulk copy start point
+// len is the length of the bulk copy (in bytes)
+//
+//
+// Performance Note:
+//
+// This is implemented somewhat "conservatively", that is we
+// assume that all the contents of the bulk copy are object
+// references.  If they are not, and the value lies in the
+// ephemeral range, we will set false positives in the card table.
+//
+// We could use the pointer maps and do this more accurately if necessary
+
+#if defined(_MSC_VER) && defined(_TARGET_X86_)
+#pragma optimize("y", on)        // Small critical routines, don't put in EBP frame 
+#endif //_MSC_VER && _TARGET_X86_
+
+void
+GCHeap::SetCardsAfterBulkCopy( Object **StartPoint, size_t len )
+{
+    Object **rover;
+    Object **end;
+
+    // Target should aligned
+    assert(Aligned ((size_t)StartPoint));
+
+
+    // Don't optimize the Generation 0 case if we are checking for write barrier voilations
+    // since we need to update the shadow heap even in the generation 0 case.
+#if defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
+    if (g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK)
+        for(unsigned i=0; i < len / sizeof(Object*); i++)
+            updateGCShadow(&StartPoint[i], StartPoint[i]);
+#endif //WRITE_BARRIER_CHECK && !SERVER_GC
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    if (SoftwareWriteWatch::IsEnabledForGCHeap())
+    {
+        SoftwareWriteWatch::SetDirtyRegion(StartPoint, len);
+    }
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+    // If destination is in Gen 0 don't bother
+    if (
+#ifdef BACKGROUND_GC
+        (!gc_heap::settings.concurrent) &&
+#endif //BACKGROUND_GC
+        (GCHeap::GetGCHeap()->WhichGeneration( (Object*) StartPoint ) == 0))
+        return;
+
+    rover = StartPoint;
+    end = StartPoint + (len/sizeof(Object*));
+    while (rover < end)
+    {
+        if ( (((uint8_t*)*rover) >= g_ephemeral_low) && (((uint8_t*)*rover) < g_ephemeral_high) )
+        {
+            // Set Bit For Card and advance to next card
+            size_t card = gcard_of ((uint8_t*)rover);
+
+            Interlocked::Or (&g_card_table[card/card_word_width], (1U << (card % card_word_width)));
+            // Skip to next card for the object
+            rover = (Object**)align_on_card ((uint8_t*)(rover+1));
+        }
+        else
+        {
+            rover++;
+        }
+    }
+}
+
+#if defined(_MSC_VER) && defined(_TARGET_X86_)
+#pragma optimize("", on)        // Go back to command line default optimizations
+#endif //_MSC_VER && _TARGET_X86_
+
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+
+//--------------------------------------------------------------------
+//
+//          Support for finalization
+//
+//--------------------------------------------------------------------
+
+inline
+unsigned int gen_segment (int gen)
+{
+    assert (((signed)NUMBERGENERATIONS - gen - 1)>=0);
+    return (NUMBERGENERATIONS - gen - 1);
+}
+
+bool CFinalize::Initialize()
+{
+    CONTRACTL {
+        NOTHROW;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
+
+    m_Array = new (nothrow)(Object*[100]);
+
+    if (!m_Array)
+    {
+        ASSERT (m_Array);
+        STRESS_LOG_OOM_STACK(sizeof(Object*[100]));
+        if (g_pConfig->IsGCBreakOnOOMEnabled())
+        {
+            GCToOSInterface::DebugBreak();
+        }
+        return false;
+    }
+    m_EndArray = &m_Array[100];
+
+    for (int i =0; i < FreeList; i++)
+    {
+        SegQueueLimit (i) = m_Array;
+    }
+    m_PromotedCount = 0;
+    lock = -1;
+#ifdef _DEBUG
+    lockowner_threadid.Clear();
+#endif // _DEBUG
+
+    return true;
+}
+
+CFinalize::~CFinalize()
+{
+    delete m_Array;
+}
+
+size_t CFinalize::GetPromotedCount ()
+{
+    return m_PromotedCount;
+}
+
+inline
+void CFinalize::EnterFinalizeLock()
+{
+    _ASSERTE(dbgOnly_IsSpecialEEThread() ||
+             GetThread() == 0 ||
+             GCToEEInterface::IsPreemptiveGCDisabled(GetThread()));
+
+retry:
+    if (Interlocked::Exchange (&lock, 0) >= 0)
+    {
+        unsigned int i = 0;
+        while (lock >= 0)
+        {
+            YieldProcessor();           // indicate to the processor that we are spining
+            if (++i & 7)
+                GCToOSInterface::YieldThread (0);
+            else
+                GCToOSInterface::Sleep (5);
+        }
+        goto retry;
+    }
+
+#ifdef _DEBUG
+    lockowner_threadid.SetToCurrentThread();
+#endif // _DEBUG
+}
+
+inline
+void CFinalize::LeaveFinalizeLock()
+{
+    _ASSERTE(dbgOnly_IsSpecialEEThread() ||
+             GetThread() == 0 ||
+             GCToEEInterface::IsPreemptiveGCDisabled(GetThread()));
+
+#ifdef _DEBUG
+    lockowner_threadid.Clear();
+#endif // _DEBUG
+    lock = -1;
+}
+
+bool
+CFinalize::RegisterForFinalization (int gen, Object* obj, size_t size)
+{
+    CONTRACTL {
+#ifdef FEATURE_REDHAWK
+        // Under Redhawk false is returned on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
+
+    EnterFinalizeLock();
+    // Adjust gen
+    unsigned int dest = 0;
+
+    if (g_fFinalizerRunOnShutDown)
+    {
+        //no method table available yet,
+        //put it in the finalizer queue and sort out when
+        //dequeueing
+        dest = FinalizerListSeg;
+    }
+
+    else
+        dest = gen_segment (gen);
+
+    // Adjust boundary for segments so that GC will keep objects alive.
+    Object*** s_i = &SegQueue (FreeList);
+    if ((*s_i) == m_EndArray)
+    {
+        if (!GrowArray())
+        {
+            LeaveFinalizeLock();
+            if (method_table(obj) == NULL)
+            {
+                // If the object is uninitialized, a valid size should have been passed.
+                assert (size >= Align (min_obj_size));
+                dprintf (3, ("Making unused array [%Ix, %Ix[", (size_t)obj, (size_t)(obj+size)));
+                ((CObjectHeader*)obj)->SetFree(size);
+            }
+            STRESS_LOG_OOM_STACK(0);
+            if (g_pConfig->IsGCBreakOnOOMEnabled())
+            {
+                GCToOSInterface::DebugBreak();
+            }
+#ifdef FEATURE_REDHAWK
+            return false;
+#else
+            ThrowOutOfMemory();
+#endif
+        }
+    }
+    Object*** end_si = &SegQueueLimit (dest);
+    do
+    {
+        //is the segment empty?
+        if (!(*s_i == *(s_i-1)))
+        {
+            //no, swap the end elements.
+            *(*s_i) = *(*(s_i-1));
+        }
+        //increment the fill pointer
+        (*s_i)++;
+        //go to the next segment.
+        s_i--;
+    } while (s_i > end_si);
+
+    // We have reached the destination segment
+    // store the object
+    **s_i = obj;
+    // increment the fill pointer
+    (*s_i)++;
+
+    LeaveFinalizeLock();
+
+    return true;
+}
+
+Object*
+CFinalize::GetNextFinalizableObject (BOOL only_non_critical)
+{
+    Object* obj = 0;
+    //serialize
+    EnterFinalizeLock();
+
+retry:
+    if (!IsSegEmpty(FinalizerListSeg))
+    {
+        if (g_fFinalizerRunOnShutDown)
+        {
+            obj = *(SegQueueLimit (FinalizerListSeg)-1);
+            if (method_table(obj)->HasCriticalFinalizer())
+            {
+                MoveItem ((SegQueueLimit (FinalizerListSeg)-1),
+                          FinalizerListSeg, CriticalFinalizerListSeg);
+                goto retry;
+            }
+            else
+                --SegQueueLimit (FinalizerListSeg);
+        }
+        else
+            obj =  *(--SegQueueLimit (FinalizerListSeg));
+
+    }
+    else if (!only_non_critical && !IsSegEmpty(CriticalFinalizerListSeg))
+    {
+        //the FinalizerList is empty, we can adjust both
+        // limit instead of moving the object to the free list
+        obj =  *(--SegQueueLimit (CriticalFinalizerListSeg));
+        --SegQueueLimit (FinalizerListSeg);
+    }
+    if (obj)
+    {
+        dprintf (3, ("running finalizer for %Ix (mt: %Ix)", obj, method_table (obj)));
+    }
+    LeaveFinalizeLock();
+    return obj;
+}
+
+void
+CFinalize::SetSegForShutDown(BOOL fHasLock)
+{
+    int i;
+
+    if (!fHasLock)
+        EnterFinalizeLock();
+    for (i = 0; i <= max_generation; i++)
+    {
+        unsigned int seg = gen_segment (i);
+        Object** startIndex = SegQueueLimit (seg)-1;
+        Object** stopIndex  = SegQueue (seg);
+        for (Object** po = startIndex; po >= stopIndex; po--)
+        {
+            Object* obj = *po;
+            if (method_table(obj)->HasCriticalFinalizer())
+            {
+                MoveItem (po, seg, CriticalFinalizerListSeg);
+            }
+            else
+            {
+                MoveItem (po, seg, FinalizerListSeg);
+            }
+        }
+    }
+    if (!fHasLock)
+        LeaveFinalizeLock();
+}
+
+void
+CFinalize::DiscardNonCriticalObjects()
+{
+    //empty the finalization queue
+    Object** startIndex = SegQueueLimit (FinalizerListSeg)-1;
+    Object** stopIndex  = SegQueue (FinalizerListSeg);
+    for (Object** po = startIndex; po >= stopIndex; po--)
+    {
+        MoveItem (po, FinalizerListSeg, FreeList);
+    }
+}
+
+size_t
+CFinalize::GetNumberFinalizableObjects()
+{
+    return SegQueueLimit (FinalizerListSeg) -
+        (g_fFinalizerRunOnShutDown ? m_Array : SegQueue(FinalizerListSeg));
+}
+
+BOOL
+CFinalize::FinalizeSegForAppDomain (AppDomain *pDomain, 
+                                    BOOL fRunFinalizers, 
+                                    unsigned int Seg)
+{
+    BOOL finalizedFound = FALSE;
+    Object** endIndex = SegQueue (Seg);
+    for (Object** i = SegQueueLimit (Seg)-1; i >= endIndex ;i--)
+    {
+        CObjectHeader* obj = (CObjectHeader*)*i;
+
+        // Objects are put into the finalization queue before they are complete (ie their methodtable
+        // may be null) so we must check that the object we found has a method table before checking
+        // if it has the index we are looking for. If the methodtable is null, it can't be from the
+        // unloading domain, so skip it.
+        if (method_table(obj) == NULL)
+            continue;
+
+        // eagerly finalize all objects except those that may be agile.
+        if (obj->GetAppDomainIndex() != pDomain->GetIndex())
+            continue;
+
+#ifndef FEATURE_REDHAWK
+        if (method_table(obj)->IsAgileAndFinalizable())
+        {
+            // If an object is both agile & finalizable, we leave it in the
+            // finalization queue during unload.  This is OK, since it's agile.
+            // Right now only threads can be this way, so if that ever changes, change
+            // the assert to just continue if not a thread.
+            _ASSERTE(method_table(obj) == g_pThreadClass);
+
+            if (method_table(obj) == g_pThreadClass)
+            {
+                // However, an unstarted thread should be finalized. It could be holding a delegate
+                // in the domain we want to unload. Once the thread has been started, its
+                // delegate is cleared so only unstarted threads are a problem.
+                Thread *pThread = ((THREADBASEREF)ObjectToOBJECTREF(obj))->GetInternal();
+                if (! pThread || ! pThread->IsUnstarted())
+                {
+                    // This appdomain is going to be gone soon so let us assign
+                    // it the appdomain that's guaranteed to exist
+                    // The object is agile and the delegate should be null so we can do it
+                    obj->GetHeader()->ResetAppDomainIndexNoFailure(SystemDomain::System()->DefaultDomain()->GetIndex());
+                    continue;
+                }
+            }
+            else
+            {
+                obj->GetHeader()->ResetAppDomainIndexNoFailure(SystemDomain::System()->DefaultDomain()->GetIndex());
+                continue;
+            }
+        }
+#endif //!FEATURE_REDHAWK
+
+        if (!fRunFinalizers || (obj->GetHeader()->GetBits()) & BIT_SBLK_FINALIZER_RUN)
+        {
+            //remove the object because we don't want to
+            //run the finalizer
+            MoveItem (i, Seg, FreeList);
+            //Reset the bit so it will be put back on the queue
+            //if resurrected and re-registered.
+            obj->GetHeader()->ClrBit (BIT_SBLK_FINALIZER_RUN);
+        }
+        else
+        {
+            if (method_table(obj)->HasCriticalFinalizer())
+            {
+                finalizedFound = TRUE;
+                MoveItem (i, Seg, CriticalFinalizerListSeg);
+            }
+            else
+            {
+                if (pDomain->IsRudeUnload())
+                {
+                    MoveItem (i, Seg, FreeList);
+                }
+                else
+                {
+                    finalizedFound = TRUE;
+                    MoveItem (i, Seg, FinalizerListSeg);
+                }
+            }
+        }
+    }
+
+    return finalizedFound;
+}
+
+BOOL
+CFinalize::FinalizeAppDomain (AppDomain *pDomain, BOOL fRunFinalizers)
+{
+    BOOL finalizedFound = FALSE;
+
+    unsigned int startSeg = gen_segment (max_generation);
+
+    EnterFinalizeLock();
+
+    for (unsigned int Seg = startSeg; Seg <= gen_segment (0); Seg++)
+    {
+        if (FinalizeSegForAppDomain (pDomain, fRunFinalizers, Seg))
+        {
+            finalizedFound = TRUE;
+        }
+    }
+
+    LeaveFinalizeLock();
+
+    return finalizedFound;
+}
+
+void
+CFinalize::MoveItem (Object** fromIndex,
+                     unsigned int fromSeg,
+                     unsigned int toSeg)
+{
+
+    int step;
+    ASSERT (fromSeg != toSeg);
+    if (fromSeg > toSeg)
+        step = -1;
+    else
+        step = +1;
+    // Place the element at the boundary closest to dest
+    Object** srcIndex = fromIndex;
+    for (unsigned int i = fromSeg; i != toSeg; i+= step)
+    {
+        Object**& destFill = m_FillPointers[i+(step - 1 )/2];
+        Object** destIndex = destFill - (step + 1)/2;
+        if (srcIndex != destIndex)
+        {
+            Object* tmp = *srcIndex;
+            *srcIndex = *destIndex;
+            *destIndex = tmp;
+        }
+        destFill -= step;
+        srcIndex = destIndex;
+    }
+}
+
+void
+CFinalize::GcScanRoots (promote_func* fn, int hn, ScanContext *pSC)
+{
+    ScanContext sc;
+    if (pSC == 0)
+        pSC = &sc;
+
+    pSC->thread_number = hn;
+
+    //scan the finalization queue
+    Object** startIndex  = SegQueue (CriticalFinalizerListSeg);
+    Object** stopIndex  = SegQueueLimit (FinalizerListSeg);
+
+    for (Object** po = startIndex; po < stopIndex; po++)
+    {
+        Object* o = *po;
+        //dprintf (3, ("scan freacheable %Ix", (size_t)o));
+        dprintf (3, ("scan f %Ix", (size_t)o));
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+        if (g_fEnableARM)
+        {
+            pSC->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(o->GetAppDomainIndex());
+        }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+        (*fn)(po, pSC, 0);
+    }
+}
+
+#ifdef GC_PROFILING
+void CFinalize::WalkFReachableObjects (gc_heap* hp)
+{
+    BEGIN_PIN_PROFILER(CORProfilerPresent());
+    Object** startIndex = SegQueue (CriticalFinalizerListSeg);
+    Object** stopCriticalIndex = SegQueueLimit (CriticalFinalizerListSeg);
+    Object** stopIndex  = SegQueueLimit (FinalizerListSeg);
+    for (Object** po = startIndex; po < stopIndex; po++)
+    {
+        //report *po
+        g_profControlBlock.pProfInterface->FinalizeableObjectQueued(po < stopCriticalIndex, (ObjectID)*po);
+    }
+    END_PIN_PROFILER();
+}
+#endif //GC_PROFILING
+
+BOOL
+CFinalize::ScanForFinalization (promote_func* pfn, int gen, BOOL mark_only_p,
+                                gc_heap* hp)
+{
+    ScanContext sc;
+    sc.promotion = TRUE;
+#ifdef MULTIPLE_HEAPS
+    sc.thread_number = hp->heap_number;
+#else
+    UNREFERENCED_PARAMETER(hp);
+#endif //MULTIPLE_HEAPS
+
+    BOOL finalizedFound = FALSE;
+
+    //start with gen and explore all the younger generations.
+    unsigned int startSeg = gen_segment (gen);
+    {
+        m_PromotedCount = 0;
+        for (unsigned int Seg = startSeg; Seg <= gen_segment(0); Seg++)
+        {
+            Object** endIndex = SegQueue (Seg);
+            for (Object** i = SegQueueLimit (Seg)-1; i >= endIndex ;i--)
+            {
+                CObjectHeader* obj = (CObjectHeader*)*i;
+                dprintf (3, ("scanning: %Ix", (size_t)obj));
+                if (!GCHeap::GetGCHeap()->IsPromoted (obj))
+                {
+                    dprintf (3, ("freacheable: %Ix", (size_t)obj));
+
+                    assert (method_table(obj)->HasFinalizer());
+
+#ifndef FEATURE_REDHAWK
+                    if (method_table(obj) == pWeakReferenceMT || method_table(obj)->GetCanonicalMethodTable() == pWeakReferenceOfTCanonMT)
+                    {
+                        //destruct the handle right there.
+                        FinalizeWeakReference (obj);
+                        MoveItem (i, Seg, FreeList);
+                    }
+                    else
+#endif //!FEATURE_REDHAWK
+                    if ((obj->GetHeader()->GetBits()) & BIT_SBLK_FINALIZER_RUN)
+                    {
+                        //remove the object because we don't want to
+                        //run the finalizer
+
+                        MoveItem (i, Seg, FreeList);
+
+                        //Reset the bit so it will be put back on the queue
+                        //if resurrected and re-registered.
+                        obj->GetHeader()->ClrBit (BIT_SBLK_FINALIZER_RUN);
+
+                    }
+                    else
+                    {
+                        m_PromotedCount++;
+
+                        if (method_table(obj)->HasCriticalFinalizer())
+                        {
+                            MoveItem (i, Seg, CriticalFinalizerListSeg);
+                        }
+                        else
+                        {
+                            MoveItem (i, Seg, FinalizerListSeg);
+                        }
+                    }
+                }
+#ifdef BACKGROUND_GC
+                else
+                {
+                    if ((gen == max_generation) && (recursive_gc_sync::background_running_p()))
+                    {
+                        // TODO - fix the following line.
+                        //assert (gc_heap::background_object_marked ((uint8_t*)obj, FALSE));
+                        dprintf (3, ("%Ix is marked", (size_t)obj));
+                    }
+                }
+#endif //BACKGROUND_GC
+            }
+        }
+    }
+    finalizedFound = !IsSegEmpty(FinalizerListSeg) ||
+                     !IsSegEmpty(CriticalFinalizerListSeg);
+                    
+    if (finalizedFound)
+    {
+        //Promote the f-reachable objects
+        GcScanRoots (pfn,
+#ifdef MULTIPLE_HEAPS
+                     hp->heap_number
+#else
+                     0
+#endif //MULTIPLE_HEAPS
+                     , 0);
+
+        hp->settings.found_finalizers = TRUE;
+
+#ifdef BACKGROUND_GC
+        if (hp->settings.concurrent)
+        {
+            hp->settings.found_finalizers = !(IsSegEmpty(FinalizerListSeg) && IsSegEmpty(CriticalFinalizerListSeg));
+        }
+#endif //BACKGROUND_GC
+        if (hp->settings.concurrent && hp->settings.found_finalizers)
+        {
+            if (!mark_only_p)
+                FinalizerThread::EnableFinalization();
+        }
+    }
+
+    return finalizedFound;
+}
+
+//Relocates all of the objects in the finalization array
+void
+CFinalize::RelocateFinalizationData (int gen, gc_heap* hp)
+{
+    ScanContext sc;
+    sc.promotion = FALSE;
+#ifdef MULTIPLE_HEAPS
+    sc.thread_number = hp->heap_number;
+#else
+    UNREFERENCED_PARAMETER(hp);
+#endif //MULTIPLE_HEAPS
+
+    unsigned int Seg = gen_segment (gen);
+
+    Object** startIndex = SegQueue (Seg);
+    for (Object** po = startIndex; po < SegQueue (FreeList);po++)
+    {
+        GCHeap::Relocate (po, &sc);
+    }
+}
+
+void
+CFinalize::UpdatePromotedGenerations (int gen, BOOL gen_0_empty_p)
+{
+    // update the generation fill pointers.
+    // if gen_0_empty is FALSE, test each object to find out if
+    // it was promoted or not
+    if (gen_0_empty_p)
+    {
+        for (int i = min (gen+1, max_generation); i > 0; i--)
+        {
+            m_FillPointers [gen_segment(i)] = m_FillPointers [gen_segment(i-1)];
+        }
+    }
+    else
+    {
+        //Look for demoted or promoted plugs
+
+        for (int i = gen; i >= 0; i--)
+        {
+            unsigned int Seg = gen_segment (i);
+            Object** startIndex = SegQueue (Seg);
+
+            for (Object** po = startIndex;
+                 po < SegQueueLimit (gen_segment(i)); po++)
+            {
+                int new_gen = GCHeap::GetGCHeap()->WhichGeneration (*po);
+                if (new_gen != i)
+                {
+                    if (new_gen > i)
+                    {
+                        //promotion
+                        MoveItem (po, gen_segment (i), gen_segment (new_gen));
+                    }
+                    else
+                    {
+                        //demotion
+                        MoveItem (po, gen_segment (i), gen_segment (new_gen));
+                        //back down in order to see all objects.
+                        po--;
+                    }
+                }
+
+            }
+        }
+    }
+}
+
+BOOL
+CFinalize::GrowArray()
+{
+    size_t oldArraySize = (m_EndArray - m_Array);
+    size_t newArraySize =  (size_t)(((float)oldArraySize / 10) * 12);
+
+    Object** newArray = new (nothrow) Object*[newArraySize];
+    if (!newArray)
+    {
+        // It's not safe to throw here, because of the FinalizeLock.  Tell our caller
+        // to throw for us.
+//        ASSERT (newArray);
+        return FALSE;
+    }
+    memcpy (newArray, m_Array, oldArraySize*sizeof(Object*));
+
+    //adjust the fill pointers
+    for (int i = 0; i < FreeList; i++)
+    {
+        m_FillPointers [i] += (newArray - m_Array);
+    }
+    delete m_Array;
+    m_Array = newArray;
+    m_EndArray = &m_Array [newArraySize];
+
+    return TRUE;
+}
+
+#ifdef VERIFY_HEAP
+void CFinalize::CheckFinalizerObjects()
+{
+    for (int i = 0; i <= max_generation; i++)
+    {
+        Object **startIndex = SegQueue (gen_segment (i));
+        Object **stopIndex  = SegQueueLimit (gen_segment (i));
+
+        for (Object **po = startIndex; po < stopIndex; po++)
+        {
+            if ((int)GCHeap::GetGCHeap()->WhichGeneration (*po) < i)
+                FATAL_GC_ERROR ();
+            ((CObjectHeader*)*po)->Validate();
+        }
+    }
+}
+#endif //VERIFY_HEAP
+
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+
+//------------------------------------------------------------------------------
+//
+//                      End of VM specific support
+//
+//------------------------------------------------------------------------------
+
+void gc_heap::walk_heap (walk_fn fn, void* context, int gen_number, BOOL walk_large_object_heap_p)
+{
+    generation* gen = gc_heap::generation_of (gen_number);
+    heap_segment*    seg = generation_start_segment (gen);
+    uint8_t*       x = ((gen_number == max_generation) ? heap_segment_mem (seg) :
+                     generation_allocation_start (gen));
+
+    uint8_t*       end = heap_segment_allocated (seg);
+    BOOL small_object_segments = TRUE;
+    int align_const = get_alignment_constant (small_object_segments);
+
+    while (1)
+
+    {
+        if (x >= end)
+        {
+            if ((seg = heap_segment_next (seg)) != 0)
+            {
+                x = heap_segment_mem (seg);
+                end = heap_segment_allocated (seg);
+                continue;
+            }
+            else
+            {
+                if (small_object_segments && walk_large_object_heap_p)
+
+                {
+                    small_object_segments = FALSE;
+                    align_const = get_alignment_constant (small_object_segments);
+                    seg = generation_start_segment (large_object_generation);
+                    x = heap_segment_mem (seg);
+                    end = heap_segment_allocated (seg);
+                    continue;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+
+        size_t s = size (x);
+        CObjectHeader* o = (CObjectHeader*)x;
+
+        if (!o->IsFree())
+
+        {
+            _ASSERTE(((size_t)o & 0x3) == 0); // Last two bits should never be set at this point
+
+            if (!fn (o->GetObjectBase(), context))
+                return;
+        }
+        x = x + Align (s, align_const);
+    }
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void GCHeap::WalkObject (Object* obj, walk_fn fn, void* context)
+{
+    uint8_t* o = (uint8_t*)obj;
+    if (o)
+    {
+        go_through_object_cl (method_table (o), o, size(o), oo,
+                                    {
+                                        if (*oo)
+                                        {
+                                            Object *oh = (Object*)*oo;
+                                            if (!fn (oh, context))
+                                                return;
+                                        }
+                                    }
+            );
+    }
+}
+#endif //defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+// Go through and touch (read) each page straddled by a memory block.
+void TouchPages(void * pStart, size_t cb)
+{
+    const uint32_t pagesize = OS_PAGE_SIZE;
+    _ASSERTE(0 == (pagesize & (pagesize-1))); // Must be a power of 2.
+    if (cb)
+    {
+        VOLATILE(char)* pEnd = (VOLATILE(char)*)(cb + (char*)pStart);
+        VOLATILE(char)* p = (VOLATILE(char)*)(((char*)pStart) -  (((size_t)pStart) & (pagesize-1)));
+        while (p < pEnd)
+        {
+            char a;
+            a = VolatileLoad(p);
+            //printf("Touching page %lxh\n", (uint32_t)p);
+            p += pagesize;
+        }
+    }
+}
+
+#if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
+    // This code is designed to catch the failure to update the write barrier
+    // The way it works is to copy the whole heap right after every GC.  The write
+    // barrier code has been modified so that it updates the shadow as well as the
+    // real GC heap.  Before doing the next GC, we walk the heap, looking for pointers
+    // that were updated in the real heap, but not the shadow.  A mismatch indicates
+    // an error.  The offending code can be found by breaking after the correct GC,
+    // and then placing a data breakpoint on the Heap location that was updated without
+    // going through the write barrier.
+
+    // Called at process shutdown
+void deleteGCShadow()
+{
+    if (g_GCShadow != 0)
+        GCToOSInterface::VirtualRelease (g_GCShadow, g_GCShadowEnd - g_GCShadow);
+    g_GCShadow = 0;
+    g_GCShadowEnd = 0;
+}
+
+    // Called at startup and right after a GC, get a snapshot of the GC Heap
+void initGCShadow()
+{
+    if (!(g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK))
+        return;
+
+    size_t len = g_highest_address - g_lowest_address;
+    if (len > (size_t)(g_GCShadowEnd - g_GCShadow)) 
+    {
+        deleteGCShadow();
+        g_GCShadowEnd = g_GCShadow = (uint8_t *)GCToOSInterface::VirtualReserve(0, len, 0, VirtualReserveFlags::None);
+        if (g_GCShadow == NULL || !GCToOSInterface::VirtualCommit(g_GCShadow, len))
+        {
+            _ASSERTE(!"Not enough memory to run HeapVerify level 2");
+            // If after the assert we decide to allow the program to continue 
+            // running we need to be in a state that will not trigger any 
+            // additional AVs while we fail to allocate a shadow segment, i.e. 
+            // ensure calls to updateGCShadow() checkGCWriteBarrier() don't AV
+            deleteGCShadow();
+            return;
+        }
+
+        g_GCShadowEnd += len;
+    }
+
+    // save the value of g_lowest_address at this time.  If this value changes before
+    // the next call to checkGCWriteBarrier() it means we extended the heap (with a
+    // large object segment most probably), and the whole shadow segment is inconsistent.
+    g_shadow_lowest_address = g_lowest_address;
+
+        //****** Copy the whole GC heap ******
+    //
+    // NOTE: This is the one situation where the combination of heap_segment_rw(gen_start_segment())
+    // can produce a NULL result.  This is because the initialization has not completed.
+    //
+    generation* gen = gc_heap::generation_of (max_generation);
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+    ptrdiff_t delta = g_GCShadow - g_lowest_address;
+    BOOL small_object_segments = TRUE;
+    while(1)
+    {
+        if (!seg)
+        {
+            if (small_object_segments)
+            {
+                small_object_segments = FALSE;
+                seg = heap_segment_rw (generation_start_segment (gc_heap::generation_of (max_generation+1)));
+                continue;
+            }
+            else
+                break;
+        }
+            // Copy the segment
+        uint8_t* start = heap_segment_mem(seg);
+        uint8_t* end = heap_segment_allocated (seg);
+        memcpy(start + delta, start, end - start);
+        seg = heap_segment_next_rw (seg);
+    }
+}
+
+#define INVALIDGCVALUE (void*)((size_t)0xcccccccd)
+
+    // test to see if 'ptr' was only updated via the write barrier.
+inline void testGCShadow(Object** ptr)
+{
+    Object** shadow = (Object**) &g_GCShadow[((uint8_t*) ptr - g_lowest_address)];
+    if (*ptr != 0 && (uint8_t*) shadow < g_GCShadowEnd && *ptr != *shadow)
+    {
+
+        // If you get this assertion, someone updated a GC poitner in the heap without
+        // using the write barrier.  To find out who, check the value of 
+        // dd_collection_count (dynamic_data_of (0)). Also
+        // note the value of 'ptr'.  Rerun the App that the previous GC just occurred.
+        // Then put a data breakpoint for the value of 'ptr'  Then check every write
+        // to pointer between the two GCs.  The last one is not using the write barrier.
+
+        // If the memory of interest does not exist at system startup,
+        // you need to set the data breakpoint right after the memory gets committed
+        // Set a breakpoint at the end of grow_heap_segment, and put the value of 'ptr'
+        // in the memory window.  run until the memory gets mapped. Then you can set
+        // your breakpoint
+
+        // Note a recent change, we've identified race conditions when updating the gc shadow.
+        // Throughout the runtime, code will update an address in the gc heap, then erect the
+        // write barrier, which calls updateGCShadow. With an app that pounds one heap location
+        // from multiple threads, you can hit this assert even though all involved are using the
+        // write barrier properly. Thusly, we detect the race and set this location to INVALIDGCVALUE.
+        // TODO: the code in jithelp.asm doesn't call updateGCShadow, and hasn't been
+        // TODO: fixed to detect the race. We've only seen this race from VolatileWritePtr,
+        // TODO: so elect not to fix jithelp.asm at this time. It should be done if we start hitting
+        // TODO: erroneous asserts in here.
+
+        if(*shadow!=INVALIDGCVALUE)
+        {
+#ifdef FEATURE_BASICFREEZE
+            // Write barriers for stores of references to frozen objects may be optimized away.
+            if (!gc_heap::frozen_object_p(*ptr))
+#endif // FEATURE_BASICFREEZE
+            {
+                _ASSERTE(!"Pointer updated without using write barrier");
+            }
+        }
+        /*
+        else
+        {
+             printf("saw a INVALIDGCVALUE. (just to let you know)\n");
+        }
+        */
+    }
+}
+
+void testGCShadowHelper (uint8_t* x)
+{
+    size_t s = size (x);
+    if (contain_pointers (x))
+    {
+        go_through_object_nostart (method_table(x), x, s, oo,
+                           { testGCShadow((Object**) oo); });
+    }
+}
+
+    // Walk the whole heap, looking for pointers that were not updated with the write barrier.
+void checkGCWriteBarrier()
+{
+    // g_shadow_lowest_address != g_lowest_address means the GC heap was extended by a segment
+    // and the GC shadow segment did not track that change!
+    if (g_GCShadowEnd <= g_GCShadow || g_shadow_lowest_address != g_lowest_address)
+    {
+        // No shadow stack, nothing to check.
+        return;
+    }
+
+    {
+        generation* gen = gc_heap::generation_of (max_generation);
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        while(seg)
+        {
+            uint8_t* x = heap_segment_mem(seg);
+            while (x < heap_segment_allocated (seg))
+            {
+                size_t s = size (x);
+                testGCShadowHelper (x);
+                x = x + Align (s);
+            }
+            seg = heap_segment_next_rw (seg);
+        }
+    }
+
+    {
+        // go through large object heap
+        int alignment = get_alignment_constant(FALSE);
+        generation* gen = gc_heap::generation_of (max_generation+1);
+        heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+
+        PREFIX_ASSUME(seg != NULL);
+
+        while(seg)
+        {
+            uint8_t* x = heap_segment_mem(seg);
+            while (x < heap_segment_allocated (seg))
+            {
+                size_t s = size (x);
+                testGCShadowHelper (x);
+                x = x + Align (s, alignment);
+            }
+            seg = heap_segment_next_rw (seg);
+        }
+    }
+}
+#endif //WRITE_BARRIER_CHECK && !SERVER_GC
+
+#endif // !DACCESS_COMPILE
+
+#ifdef FEATURE_BASICFREEZE
+void gc_heap::walk_read_only_segment(heap_segment *seg, void *pvContext, object_callback_func pfnMethodTable, object_callback_func pfnObjRef)
+{
+#ifdef DACCESS_COMPILE
+    UNREFERENCED_PARAMETER(seg);
+    UNREFERENCED_PARAMETER(pvContext);
+    UNREFERENCED_PARAMETER(pfnMethodTable);
+    UNREFERENCED_PARAMETER(pfnObjRef);
+#else
+    uint8_t *o = heap_segment_mem(seg);
+
+    // small heap alignment constant
+    int alignment = get_alignment_constant(TRUE);
+
+    while (o < heap_segment_allocated(seg))
+    {
+        pfnMethodTable(pvContext, o);
+
+        if (contain_pointers (o))
+        {
+            go_through_object_nostart (method_table (o), o, size(o), oo,
+                   {
+                       if (*oo)
+                           pfnObjRef(pvContext, oo);
+                   }
+            );
+        }
+
+        o += Align(size(o), alignment);
+    }
+#endif //!DACCESS_COMPILE
+}
+#endif // FEATURE_BASICFREEZE
+
+#ifndef DACCESS_COMPILE
+HRESULT GCHeap::WaitUntilConcurrentGCCompleteAsync(int millisecondsTimeout)
+{
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        uint32_t dwRet = pGenGCHeap->background_gc_wait(awr_ignored, millisecondsTimeout);
+        if (dwRet == WAIT_OBJECT_0)
+            return S_OK;
+        else if (dwRet == WAIT_TIMEOUT)
+            return HRESULT_FROM_WIN32(ERROR_TIMEOUT);
+        else
+            return E_FAIL;      // It is not clear if what the last error would be if the wait failed,
+                                // as there are too many layers in between. The best we can do is to return E_FAIL;
+    }
+#endif
+
+    return S_OK;
+}
+#endif // !DACCESS_COMPILE
+
+void GCHeap::TemporaryEnableConcurrentGC()
+{
+#ifdef BACKGROUND_GC
+    gc_heap::temp_disable_concurrent_p = false;
+#endif //BACKGROUND_GC
+}
+
+void GCHeap::TemporaryDisableConcurrentGC()
+{
+#ifdef BACKGROUND_GC
+    gc_heap::temp_disable_concurrent_p = true;
+#endif //BACKGROUND_GC
+}
+
+BOOL GCHeap::IsConcurrentGCEnabled()
+{
+#ifdef BACKGROUND_GC
+    return (gc_heap::gc_can_use_concurrent && !(gc_heap::temp_disable_concurrent_p));
+#else
+    return FALSE;
+#endif //BACKGROUND_GC
+}
diff --git a/src/gc/gc.h b/src/gc/gc.h
new file mode 100644
index 0000000000..14c6baee83
--- /dev/null
+++ b/src/gc/gc.h
@@ -0,0 +1,680 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+/*++
+
+Module Name:
+
+    gc.h
+
+--*/
+
+#ifndef __GC_H
+#define __GC_H
+
+#ifdef PROFILING_SUPPORTED
+#define GC_PROFILING       //Turn on profiling
+#endif // PROFILING_SUPPORTED
+
+/*
+ * Promotion Function Prototypes
+ */
+typedef void enum_func (Object*);
+
+// callback functions for heap walkers
+typedef void object_callback_func(void * pvContext, void * pvDataLoc);
+
+// stub type to abstract a heap segment
+struct gc_heap_segment_stub;
+typedef gc_heap_segment_stub *segment_handle;
+
+struct segment_info
+{
+    void * pvMem; // base of the allocation, not the first object (must add ibFirstObject)
+    size_t ibFirstObject;   // offset to the base of the first object in the segment
+    size_t ibAllocated; // limit of allocated memory in the segment (>= firstobject)
+    size_t ibCommit; // limit of committed memory in the segment (>= alllocated)
+    size_t ibReserved; // limit of reserved memory in the segment (>= commit)
+};
+
+/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
+/* If you modify failure_get_memory and         */
+/* oom_reason be sure to make the corresponding */
+/* changes in toolbox\sos\strike\strike.cpp.    */
+/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
+enum failure_get_memory
+{
+    fgm_no_failure = 0,
+    fgm_reserve_segment = 1,
+    fgm_commit_segment_beg = 2,
+    fgm_commit_eph_segment = 3,
+    fgm_grow_table = 4,
+    fgm_commit_table = 5
+};
+
+struct fgm_history
+{
+    failure_get_memory fgm;
+    size_t size;
+    size_t available_pagefile_mb;
+    BOOL loh_p;
+
+    void set_fgm (failure_get_memory f, size_t s, BOOL l)
+    {
+        fgm = f;
+        size = s;
+        loh_p = l;
+    }
+};
+
+enum oom_reason
+{
+    oom_no_failure = 0,
+    oom_budget = 1,
+    oom_cant_commit = 2,
+    oom_cant_reserve = 3,
+    oom_loh = 4,
+    oom_low_mem = 5,
+    oom_unproductive_full_gc = 6
+};
+
+struct oom_history
+{
+    oom_reason reason;
+    size_t alloc_size;
+    uint8_t* reserved;
+    uint8_t* allocated;
+    size_t gc_index;
+    failure_get_memory fgm;
+    size_t size;
+    size_t available_pagefile_mb;
+    BOOL loh_p;
+};
+
+/* forward declerations */
+class CObjectHeader;
+class Object;
+
+class GCHeap;
+
+/* misc defines */
+#define LARGE_OBJECT_SIZE ((size_t)(85000))
+
+GPTR_DECL(GCHeap, g_pGCHeap);
+
+#ifdef GC_CONFIG_DRIVEN
+#define MAX_GLOBAL_GC_MECHANISMS_COUNT 6
+GARY_DECL(size_t, gc_global_mechanisms, MAX_GLOBAL_GC_MECHANISMS_COUNT);
+#endif //GC_CONFIG_DRIVEN
+
+#ifndef DACCESS_COMPILE
+extern "C" {
+#endif
+GPTR_DECL(uint8_t,g_lowest_address);
+GPTR_DECL(uint8_t,g_highest_address);
+GPTR_DECL(uint32_t,g_card_table);
+#ifndef DACCESS_COMPILE
+}
+#endif
+
+#ifdef DACCESS_COMPILE
+class DacHeapWalker;
+#endif
+
+#ifdef _DEBUG
+#define  _LOGALLOC
+#endif
+
+#ifdef WRITE_BARRIER_CHECK
+//always defined, but should be 0 in Server GC
+extern uint8_t* g_GCShadow;
+extern uint8_t* g_GCShadowEnd;
+// saves the g_lowest_address in between GCs to verify the consistency of the shadow segment
+extern uint8_t* g_shadow_lowest_address;
+#endif
+
+#define MP_LOCKS
+
+extern "C" uint8_t* g_ephemeral_low;
+extern "C" uint8_t* g_ephemeral_high;
+
+namespace WKS {
+    ::GCHeap* CreateGCHeap();
+    class GCHeap;
+    class gc_heap;
+    }
+
+#if defined(FEATURE_SVR_GC)
+namespace SVR {
+    ::GCHeap* CreateGCHeap();
+    class GCHeap;
+    class gc_heap;
+}
+#endif // defined(FEATURE_SVR_GC)
+
+/*
+ * Ephemeral Garbage Collected Heap Interface
+ */
+
+
+struct alloc_context 
+{
+    friend class WKS::gc_heap;
+#if defined(FEATURE_SVR_GC)
+    friend class SVR::gc_heap;
+    friend class SVR::GCHeap;
+#endif // defined(FEATURE_SVR_GC)
+    friend struct ClassDumpInfo;
+
+    uint8_t*       alloc_ptr;
+    uint8_t*       alloc_limit;
+    int64_t        alloc_bytes; //Number of bytes allocated on SOH by this context
+    int64_t        alloc_bytes_loh; //Number of bytes allocated on LOH by this context
+#if defined(FEATURE_SVR_GC)
+    SVR::GCHeap*   alloc_heap;
+    SVR::GCHeap*   home_heap;
+#endif // defined(FEATURE_SVR_GC)
+    int            alloc_count;
+public:
+
+    void init()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        alloc_ptr = 0;
+        alloc_limit = 0;
+        alloc_bytes = 0;
+        alloc_bytes_loh = 0;
+#if defined(FEATURE_SVR_GC)
+        alloc_heap = 0;
+        home_heap = 0;
+#endif // defined(FEATURE_SVR_GC)
+        alloc_count = 0;
+    }
+};
+
+struct ScanContext
+{
+    Thread* thread_under_crawl;
+    int thread_number;
+    uintptr_t stack_limit; // Lowest point on the thread stack that the scanning logic is permitted to read
+    BOOL promotion; //TRUE: Promotion, FALSE: Relocation.
+    BOOL concurrent; //TRUE: concurrent scanning 
+#if CHECK_APP_DOMAIN_LEAKS || defined (FEATURE_APPDOMAIN_RESOURCE_MONITORING) || defined (DACCESS_COMPILE)
+    AppDomain *pCurrentDomain;
+#endif //CHECK_APP_DOMAIN_LEAKS || FEATURE_APPDOMAIN_RESOURCE_MONITORING || DACCESS_COMPILE
+
+#ifndef FEATURE_REDHAWK
+#if defined(GC_PROFILING) || defined (DACCESS_COMPILE)
+    MethodDesc *pMD;
+#endif //GC_PROFILING || DACCESS_COMPILE
+#endif // FEATURE_REDHAWK
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    EtwGCRootKind dwEtwRootKind;
+#endif // GC_PROFILING || FEATURE_EVENT_TRACE
+    
+    ScanContext()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        thread_under_crawl = 0;
+        thread_number = -1;
+        stack_limit = 0;
+        promotion = FALSE;
+        concurrent = FALSE;
+#ifdef GC_PROFILING
+        pMD = NULL;
+#endif //GC_PROFILING
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+        dwEtwRootKind = kEtwGCRootKindOther;
+#endif // GC_PROFILING || FEATURE_EVENT_TRACE
+    }
+};
+
+typedef BOOL (* walk_fn)(Object*, void*);
+typedef void (* gen_walk_fn)(void *context, int generation, uint8_t *range_start, uint8_t * range_end, uint8_t *range_reserved);
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+struct ProfilingScanContext : ScanContext
+{
+    BOOL fProfilerPinned;
+    void * pvEtwContext;
+    void *pHeapId;
+    
+    ProfilingScanContext(BOOL fProfilerPinnedParam) : ScanContext()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        pHeapId = NULL;
+        fProfilerPinned = fProfilerPinnedParam;
+        pvEtwContext = NULL;
+#ifdef FEATURE_CONSERVATIVE_GC
+        // To not confuse GCScan::GcScanRoots
+        promotion = g_pConfig->GetGCConservative();
+#endif
+    }
+};
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#ifdef STRESS_HEAP
+#define IN_STRESS_HEAP(x) x
+#define STRESS_HEAP_ARG(x) ,x
+#else // STRESS_HEAP
+#define IN_STRESS_HEAP(x)
+#define STRESS_HEAP_ARG(x)
+#endif // STRESS_HEAP
+
+
+//dynamic data interface
+struct gc_counters
+{
+    size_t current_size;
+    size_t promoted_size;
+    size_t collection_count;
+};
+
+// !!!!!!!!!!!!!!!!!!!!!!!
+// make sure you change the def in bcl\system\gc.cs 
+// if you change this!
+enum collection_mode
+{
+    collection_non_blocking = 0x00000001,
+    collection_blocking = 0x00000002,
+    collection_optimized = 0x00000004,
+    collection_compacting = 0x00000008
+#ifdef STRESS_HEAP
+    , collection_gcstress = 0x80000000
+#endif // STRESS_HEAP
+};
+
+// !!!!!!!!!!!!!!!!!!!!!!!
+// make sure you change the def in bcl\system\gc.cs 
+// if you change this!
+enum wait_full_gc_status
+{
+    wait_full_gc_success = 0,
+    wait_full_gc_failed = 1,
+    wait_full_gc_cancelled = 2,
+    wait_full_gc_timeout = 3,
+    wait_full_gc_na = 4
+};
+
+// !!!!!!!!!!!!!!!!!!!!!!!
+// make sure you change the def in bcl\system\gc.cs 
+// if you change this!
+enum start_no_gc_region_status
+{
+    start_no_gc_success = 0,
+    start_no_gc_no_memory = 1,
+    start_no_gc_too_large = 2,
+    start_no_gc_in_progress = 3
+};
+
+enum end_no_gc_region_status
+{
+    end_no_gc_success = 0,
+    end_no_gc_not_in_progress = 1,
+    end_no_gc_induced = 2,
+    end_no_gc_alloc_exceeded = 3
+};
+
+enum bgc_state
+{
+    bgc_not_in_process = 0,
+    bgc_initialized,
+    bgc_reset_ww,
+    bgc_mark_handles,
+    bgc_mark_stack,
+    bgc_revisit_soh,
+    bgc_revisit_loh,
+    bgc_overflow_soh,
+    bgc_overflow_loh,
+    bgc_final_marking,
+    bgc_sweep_soh,
+    bgc_sweep_loh,
+    bgc_plan_phase
+};
+
+enum changed_seg_state
+{
+    seg_deleted,
+    seg_added
+};
+
+void record_changed_seg (uint8_t* start, uint8_t* end,
+                         size_t current_gc_index,
+                         bgc_state current_bgc_state,
+                         changed_seg_state changed_state);
+
+#ifdef GC_CONFIG_DRIVEN
+void record_global_mechanism (int mech_index);
+#endif //GC_CONFIG_DRIVEN
+
+//constants for the flags parameter to the gc call back
+
+#define GC_CALL_INTERIOR            0x1
+#define GC_CALL_PINNED              0x2
+#define GC_CALL_CHECK_APP_DOMAIN    0x4
+
+//flags for GCHeap::Alloc(...)
+#define GC_ALLOC_FINALIZE 0x1
+#define GC_ALLOC_CONTAINS_REF 0x2
+#define GC_ALLOC_ALIGN8_BIAS 0x4
+#define GC_ALLOC_ALIGN8 0x8
+
+class GCHeap {
+    friend struct ::_DacGlobals;
+#ifdef DACCESS_COMPILE
+    friend class ClrDataAccess;
+#endif
+    
+public:
+
+    virtual ~GCHeap() {}
+
+    static GCHeap *GetGCHeap()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(g_pGCHeap != NULL);
+        return g_pGCHeap;
+    }
+
+#ifndef DACCESS_COMPILE
+    static BOOL IsGCInProgress(BOOL bConsiderGCStart = FALSE)
+    {
+        WRAPPER_NO_CONTRACT;
+
+        return (IsGCHeapInitialized() ? GetGCHeap()->IsGCInProgressHelper(bConsiderGCStart) : false);
+    }   
+#endif
+    
+    static BOOL IsGCHeapInitialized()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return (g_pGCHeap != NULL);
+    }
+
+    static void WaitForGCCompletion(BOOL bConsiderGCStart = FALSE)
+    {
+        WRAPPER_NO_CONTRACT;
+
+        if (IsGCHeapInitialized())
+            GetGCHeap()->WaitUntilGCComplete(bConsiderGCStart);
+    }   
+
+    // The runtime needs to know whether we're using workstation or server GC 
+    // long before the GCHeap is created.  So IsServerHeap cannot be a virtual 
+    // method on GCHeap.  Instead we make it a static method and initialize 
+    // gcHeapType before any of the calls to IsServerHeap.  Note that this also 
+    // has the advantage of getting the answer without an indirection
+    // (virtual call), which is important for perf critical codepaths.
+
+    #ifndef DACCESS_COMPILE
+    static void InitializeHeapType(bool bServerHeap)
+    {
+        LIMITED_METHOD_CONTRACT;
+#ifdef FEATURE_SVR_GC
+        gcHeapType = bServerHeap ? GC_HEAP_SVR : GC_HEAP_WKS;
+#ifdef WRITE_BARRIER_CHECK
+        if (gcHeapType == GC_HEAP_SVR)
+        {
+            g_GCShadow = 0;
+            g_GCShadowEnd = 0;
+        }
+#endif
+#else // FEATURE_SVR_GC
+        UNREFERENCED_PARAMETER(bServerHeap);
+        CONSISTENCY_CHECK(bServerHeap == false);
+#endif // FEATURE_SVR_GC
+    }
+    #endif
+    
+    static BOOL IsValidSegmentSize(size_t cbSize)
+    {
+        //Must be aligned on a Mb and greater than 4Mb
+        return (((cbSize & (1024*1024-1)) ==0) && (cbSize >> 22));
+    }
+
+    static BOOL IsValidGen0MaxSize(size_t cbSize)
+    {
+        return (cbSize >= 64*1024);
+    }
+
+    inline static bool IsServerHeap()
+    {
+        LIMITED_METHOD_CONTRACT;
+#ifdef FEATURE_SVR_GC
+        _ASSERTE(gcHeapType != GC_HEAP_INVALID);
+        return (gcHeapType == GC_HEAP_SVR);
+#else // FEATURE_SVR_GC
+        return false;
+#endif // FEATURE_SVR_GC
+    }
+
+    inline static bool UseAllocationContexts()
+    {
+        WRAPPER_NO_CONTRACT;
+#ifdef FEATURE_REDHAWK
+        // SIMPLIFY:  only use allocation contexts
+        return true;
+#else
+#if defined(_TARGET_ARM_) || defined(FEATURE_PAL)
+        return true;
+#else
+        return ((IsServerHeap() ? true : (g_SystemInfo.dwNumberOfProcessors >= 2)));
+#endif
+#endif 
+    }
+
+   inline static bool MarkShouldCompeteForStatics()
+    {
+        WRAPPER_NO_CONTRACT;
+
+        return IsServerHeap() && g_SystemInfo.dwNumberOfProcessors >= 2;
+    }
+    
+#ifndef DACCESS_COMPILE
+    static GCHeap * CreateGCHeap()
+    {
+        WRAPPER_NO_CONTRACT;
+
+        GCHeap * pGCHeap;
+
+#if defined(FEATURE_SVR_GC)
+        pGCHeap = (IsServerHeap() ? SVR::CreateGCHeap() : WKS::CreateGCHeap());
+#else
+        pGCHeap = WKS::CreateGCHeap();
+#endif // defined(FEATURE_SVR_GC)
+
+        g_pGCHeap = pGCHeap;
+        return pGCHeap;
+    }
+#endif // DACCESS_COMPILE
+
+private:
+    typedef enum
+    {
+        GC_HEAP_INVALID = 0,
+        GC_HEAP_WKS     = 1,
+        GC_HEAP_SVR     = 2
+    } GC_HEAP_TYPE;
+    
+#ifdef FEATURE_SVR_GC
+    SVAL_DECL(uint32_t,gcHeapType);
+#endif // FEATURE_SVR_GC
+
+public:
+        // TODO Synchronization, should be moved out
+    virtual BOOL    IsGCInProgressHelper (BOOL bConsiderGCStart = FALSE) = 0;
+    virtual uint32_t    WaitUntilGCComplete (BOOL bConsiderGCStart = FALSE) = 0;
+    virtual void SetGCInProgress(BOOL fInProgress) = 0;
+    virtual CLREventStatic * GetWaitForGCEvent() = 0;
+
+    virtual void    SetFinalizationRun (Object* obj) = 0;
+    virtual Object* GetNextFinalizable() = 0;
+    virtual size_t GetNumberOfFinalizable() = 0;
+
+    virtual void SetFinalizeQueueForShutdown(BOOL fHasLock) = 0;
+    virtual BOOL FinalizeAppDomain(AppDomain *pDomain, BOOL fRunFinalizers) = 0;
+    virtual BOOL ShouldRestartFinalizerWatchDog() = 0;
+
+    //wait for concurrent GC to finish
+    virtual void WaitUntilConcurrentGCComplete () = 0;                                  // Use in managed threads
+#ifndef DACCESS_COMPILE    
+    virtual HRESULT WaitUntilConcurrentGCCompleteAsync(int millisecondsTimeout) = 0;    // Use in native threads. TRUE if succeed. FALSE if failed or timeout
+#endif    
+    virtual BOOL IsConcurrentGCInProgress() = 0;
+
+    // Enable/disable concurrent GC    
+    virtual void TemporaryEnableConcurrentGC() = 0;
+    virtual void TemporaryDisableConcurrentGC() = 0;
+    virtual BOOL IsConcurrentGCEnabled() = 0;
+
+    virtual void FixAllocContext (alloc_context* acontext, BOOL lockp, void* arg, void *heap) = 0;
+    virtual Object* Alloc (alloc_context* acontext, size_t size, uint32_t flags) = 0;
+
+    // This is safe to call only when EE is suspended.
+    virtual Object* GetContainingObject(void *pInteriorPtr) = 0;
+
+        // TODO Should be folded into constructor
+    virtual HRESULT Initialize () = 0;
+
+    virtual HRESULT GarbageCollect (int generation = -1, BOOL low_memory_p=FALSE, int mode = collection_blocking) = 0;
+    virtual Object*  Alloc (size_t size, uint32_t flags) = 0;
+#ifdef FEATURE_64BIT_ALIGNMENT
+    virtual Object*  AllocAlign8 (size_t size, uint32_t flags) = 0;
+    virtual Object*  AllocAlign8 (alloc_context* acontext, size_t size, uint32_t flags) = 0;
+private:
+    virtual Object*  AllocAlign8Common (void* hp, alloc_context* acontext, size_t size, uint32_t flags) = 0;
+public:
+#endif // FEATURE_64BIT_ALIGNMENT
+    virtual Object*  AllocLHeap (size_t size, uint32_t flags) = 0;
+    virtual void     SetReservedVMLimit (size_t vmlimit) = 0;
+    virtual void SetCardsAfterBulkCopy( Object**, size_t ) = 0;
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    virtual void WalkObject (Object* obj, walk_fn fn, void* context) = 0;
+#endif //defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+    virtual bool IsThreadUsingAllocationContextHeap(alloc_context* acontext, int thread_number) = 0;
+    virtual int GetNumberOfHeaps () = 0; 
+    virtual int GetHomeHeapNumber () = 0;
+    
+    virtual int CollectionCount (int generation, int get_bgc_fgc_count = 0) = 0;
+
+        // Finalizer queue stuff (should stay)
+    virtual bool    RegisterForFinalization (int gen, Object* obj) = 0;
+
+        // General queries to the GC
+    virtual BOOL    IsPromoted (Object *object) = 0;
+    virtual unsigned WhichGeneration (Object* object) = 0;
+    virtual BOOL    IsEphemeral (Object* object) = 0;
+    virtual BOOL    IsHeapPointer (void* object, BOOL small_heap_only = FALSE) = 0;
+
+    virtual unsigned GetCondemnedGeneration() = 0;
+    virtual int GetGcLatencyMode() = 0;
+    virtual int SetGcLatencyMode(int newLatencyMode) = 0;
+
+    virtual int GetLOHCompactionMode() = 0;
+    virtual void SetLOHCompactionMode(int newLOHCompactionyMode) = 0;
+
+    virtual BOOL RegisterForFullGCNotification(uint32_t gen2Percentage,
+                                               uint32_t lohPercentage) = 0;
+    virtual BOOL CancelFullGCNotification() = 0;
+    virtual int WaitForFullGCApproach(int millisecondsTimeout) = 0;
+    virtual int WaitForFullGCComplete(int millisecondsTimeout) = 0;
+
+    virtual int StartNoGCRegion(uint64_t totalSize, BOOL lohSizeKnown, uint64_t lohSize, BOOL disallowFullBlockingGC) = 0;
+    virtual int EndNoGCRegion() = 0;
+
+    virtual BOOL IsObjectInFixedHeap(Object *pObj) = 0;
+    virtual size_t  GetTotalBytesInUse () = 0;
+    virtual size_t  GetCurrentObjSize() = 0;
+    virtual size_t  GetLastGCStartTime(int generation) = 0;
+    virtual size_t  GetLastGCDuration(int generation) = 0;
+    virtual size_t  GetNow() = 0;
+    virtual unsigned GetGcCount() = 0;
+    virtual void TraceGCSegments() = 0;
+
+    virtual void PublishObject(uint8_t* obj) = 0;
+
+    // static if since restricting for all heaps is fine
+    virtual size_t GetValidSegmentSize(BOOL large_seg = FALSE) = 0;
+
+    static BOOL IsLargeObject(MethodTable *mt) {
+        WRAPPER_NO_CONTRACT;
+
+        return mt->GetBaseSize() >= LARGE_OBJECT_SIZE;
+    }
+
+    static unsigned GetMaxGeneration() {
+        LIMITED_METHOD_DAC_CONTRACT;  
+        return max_generation;
+    }
+
+    virtual size_t GetPromotedBytes(int heap_index) = 0;
+
+private:
+    enum {
+        max_generation  = 2,
+    };
+    
+public:
+
+#ifdef FEATURE_BASICFREEZE
+    // frozen segment management functions
+    virtual segment_handle RegisterFrozenSegment(segment_info *pseginfo) = 0;
+    virtual void UnregisterFrozenSegment(segment_handle seg) = 0;
+#endif //FEATURE_BASICFREEZE
+
+        // debug support 
+#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
+#ifdef STRESS_HEAP
+    //return TRUE if GC actually happens, otherwise FALSE
+    virtual BOOL    StressHeap(alloc_context * acontext = 0) = 0;
+#endif
+#endif // FEATURE_REDHAWK
+#ifdef VERIFY_HEAP
+    virtual void    ValidateObjectMember (Object *obj) = 0;
+#endif
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    virtual void DescrGenerationsToProfiler (gen_walk_fn fn, void *context) = 0;
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+protected: 
+#ifdef VERIFY_HEAP
+public:
+    // Return NULL if can't find next object. When EE is not suspended,
+    // the result is not accurate: if the input arg is in gen0, the function could 
+    // return zeroed out memory as next object
+    virtual Object * NextObj (Object * object) = 0;
+#ifdef FEATURE_BASICFREEZE
+    // Return TRUE if object lives in frozen segment
+    virtual BOOL IsInFrozenSegment (Object * object) = 0;
+#endif //FEATURE_BASICFREEZE
+#endif //VERIFY_HEAP    
+};
+
+extern VOLATILE(int32_t) m_GCLock;
+
+// Go through and touch (read) each page straddled by a memory block.
+void TouchPages(void * pStart, size_t cb);
+
+// For low memory notification from host
+extern int32_t g_bLowMemoryFromHost;
+
+#ifdef WRITE_BARRIER_CHECK
+void updateGCShadow(Object** ptr, Object* val);
+#endif
+
+// the method table for the WeakReference class
+extern MethodTable  *pWeakReferenceMT;
+// The canonical method table for WeakReference<T>
+extern MethodTable  *pWeakReferenceOfTCanonMT;
+extern void FinalizeWeakReference(Object * obj);
+
+#endif // __GC_H
diff --git a/src/gc/gccommon.cpp b/src/gc/gccommon.cpp
new file mode 100644
index 0000000000..779aac7296
--- /dev/null
+++ b/src/gc/gccommon.cpp
@@ -0,0 +1,115 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+/*
+ * GCCOMMON.CPP 
+ *
+ * Code common to both SVR and WKS gcs
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+#include "gc.h"
+
+#ifdef FEATURE_SVR_GC
+SVAL_IMPL_INIT(uint32_t,GCHeap,gcHeapType,GCHeap::GC_HEAP_INVALID);
+#endif // FEATURE_SVR_GC
+
+GPTR_IMPL(GCHeap,g_pGCHeap);
+
+/* global versions of the card table and brick table */ 
+GPTR_IMPL(uint32_t,g_card_table);
+
+/* absolute bounds of the GC memory */
+GPTR_IMPL_INIT(uint8_t,g_lowest_address,0);
+GPTR_IMPL_INIT(uint8_t,g_highest_address,0);
+
+#ifdef GC_CONFIG_DRIVEN
+GARY_IMPL(size_t, gc_global_mechanisms, MAX_GLOBAL_GC_MECHANISMS_COUNT);
+#endif //GC_CONFIG_DRIVEN
+
+#ifndef DACCESS_COMPILE
+
+uint8_t* g_ephemeral_low = (uint8_t*)1;
+uint8_t* g_ephemeral_high = (uint8_t*)~0;
+
+#ifdef WRITE_BARRIER_CHECK
+uint8_t* g_GCShadow;
+uint8_t* g_GCShadowEnd;
+uint8_t* g_shadow_lowest_address = NULL;
+#endif
+
+VOLATILE(int32_t) m_GCLock = -1;
+
+#ifdef GC_CONFIG_DRIVEN
+void record_global_mechanism (int mech_index)
+{
+	(gc_global_mechanisms[mech_index])++;
+}
+#endif //GC_CONFIG_DRIVEN
+
+int32_t g_bLowMemoryFromHost = 0;
+
+#ifdef WRITE_BARRIER_CHECK
+
+#define INVALIDGCVALUE (void *)((size_t)0xcccccccd)
+
+    // called by the write barrier to update the shadow heap
+void updateGCShadow(Object** ptr, Object* val)
+{
+    Object** shadow = (Object**) &g_GCShadow[((uint8_t*) ptr - g_lowest_address)];
+    if ((uint8_t*) shadow < g_GCShadowEnd)
+    {
+        *shadow = val;
+
+        // Ensure that the write to the shadow heap occurs before the read from
+        // the GC heap so that race conditions are caught by INVALIDGCVALUE.
+        MemoryBarrier();
+
+        if(*ptr!=val)
+            *shadow = (Object *) INVALIDGCVALUE;
+    }
+}
+
+#endif // WRITE_BARRIER_CHECK
+
+
+struct changed_seg
+{
+    uint8_t           * start;
+    uint8_t           * end;
+    size_t              gc_index;
+    bgc_state           bgc;
+    changed_seg_state   changed;
+};
+
+
+const int max_saved_changed_segs = 128;
+
+changed_seg saved_changed_segs[max_saved_changed_segs];
+int saved_changed_segs_count = 0;
+
+void record_changed_seg (uint8_t* start, uint8_t* end,
+                         size_t current_gc_index,
+                         bgc_state current_bgc_state,
+                         changed_seg_state changed_state)
+{
+    if (saved_changed_segs_count < max_saved_changed_segs)
+    {
+        saved_changed_segs[saved_changed_segs_count].start = start;
+        saved_changed_segs[saved_changed_segs_count].end = end;
+        saved_changed_segs[saved_changed_segs_count].gc_index = current_gc_index;
+        saved_changed_segs[saved_changed_segs_count].bgc = current_bgc_state;
+        saved_changed_segs[saved_changed_segs_count].changed = changed_state;
+        saved_changed_segs_count++;
+    }
+    else
+    {
+        saved_changed_segs_count = 0;
+    }
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/gc/gcdesc.h b/src/gc/gcdesc.h
new file mode 100644
index 0000000000..7cc132a640
--- /dev/null
+++ b/src/gc/gcdesc.h
@@ -0,0 +1,263 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+//
+// GC Object Pointer Location Series Stuff
+//
+
+
+
+#ifndef _GCDESC_H_
+#define _GCDESC_H_
+
+#ifdef BIT64
+typedef uint32_t HALF_SIZE_T;
+#else   // BIT64
+typedef uint16_t HALF_SIZE_T;
+#endif
+
+
+typedef size_t *JSlot;
+
+
+//
+// These two classes make up the apparatus with which the object references
+// within an object can be found.
+//
+// CGCDescSeries:
+//
+// The CGCDescSeries class describes a series of object references within an
+// object by describing the size of the series (which has an adjustment which
+// will be explained later) and the starting point of the series.
+//
+// The series size is adjusted when the map is created by subtracting the
+// GetBaseSize() of the object.   On retieval of the size the total size
+// of the object is added back.   For non-array objects the total object
+// size is equal to the base size, so this returns the same value.   For
+// array objects this will yield the size of the data portion of the array.
+// Since arrays containing object references will contain ONLY object references
+// this is a fast way of handling arrays and normal objects without a
+// conditional test
+//
+//
+//
+// CGCDesc:
+//
+// The CGCDesc is a collection of CGCDescSeries objects to describe all the
+// different runs of pointers in a particular object.   <TODO> [add more on the strange
+// way the CGCDesc grows backwards in memory behind the MethodTable]
+//</TODO>
+
+struct val_serie_item
+{
+    HALF_SIZE_T nptrs;
+    HALF_SIZE_T skip;
+    void set_val_serie_item (HALF_SIZE_T nptrs, HALF_SIZE_T skip)
+    {
+        this->nptrs = nptrs;
+        this->skip = skip;
+    }
+};
+
+struct val_array_series
+{
+    val_serie_item  items[1];
+    size_t          m_startOffset;
+    size_t          m_count;
+};
+
+typedef DPTR(class CGCDescSeries) PTR_CGCDescSeries;
+typedef DPTR(class MethodTable) PTR_MethodTable;
+class CGCDescSeries
+{
+public:
+    union 
+    {
+        size_t seriessize;              // adjusted length of series (see above) in bytes
+        val_serie_item val_serie[1];    //coded serie for value class array
+    };
+
+    size_t startoffset;
+
+    size_t GetSeriesCount () 
+    { 
+        return seriessize/sizeof(JSlot); 
+    }
+
+    void SetSeriesCount (size_t newcount)
+    {
+        seriessize = newcount * sizeof(JSlot);
+    }
+
+    void IncSeriesCount (size_t increment = 1)
+    {
+        seriessize += increment * sizeof(JSlot);
+    }
+
+    size_t GetSeriesSize ()
+    {
+        return seriessize;
+    }
+
+    void SetSeriesSize (size_t newsize)
+    {
+        seriessize = newsize;
+    }
+
+    void SetSeriesValItem (val_serie_item item, int index)
+    {
+        val_serie [index] = item;
+    }
+
+    void SetSeriesOffset (size_t newoffset)
+    {
+        startoffset = newoffset;
+    }
+
+    size_t GetSeriesOffset ()
+    {
+        return startoffset;
+    }
+};
+
+
+
+
+
+typedef DPTR(class CGCDesc) PTR_CGCDesc;
+class CGCDesc
+{
+    // Don't construct me, you have to hand me a ptr to the *top* of my storage in Init.
+    CGCDesc () {}
+
+    //
+    // NOTE: for alignment reasons, NumSeries is stored as a size_t.
+    //       This makes everything nicely 8-byte aligned on IA64.
+    //
+public:
+    static size_t ComputeSize (size_t NumSeries)
+    {
+        _ASSERTE (ptrdiff_t(NumSeries) > 0);
+        
+        return sizeof(size_t) + NumSeries*sizeof(CGCDescSeries);
+    }
+
+    // For value type array
+    static size_t ComputeSizeRepeating (size_t NumSeries)
+    {
+        _ASSERTE (ptrdiff_t(NumSeries) > 0);
+        
+        return sizeof(size_t) + sizeof(CGCDescSeries) +
+               (NumSeries-1)*sizeof(val_serie_item);
+    }
+
+#ifndef DACCESS_COMPILE
+    static void Init (void* mem, size_t NumSeries)
+    {
+        *((size_t*)mem-1) = NumSeries;
+    }
+
+    static void InitValueClassSeries (void* mem, size_t NumSeries)
+    {
+        *((ptrdiff_t*)mem-1) = -((ptrdiff_t)NumSeries);
+    }
+#endif
+
+    static PTR_CGCDesc GetCGCDescFromMT (MethodTable * pMT)
+    {
+        // If it doesn't contain pointers, there isn't a GCDesc
+        PTR_MethodTable mt(pMT);
+
+        _ASSERTE(mt->ContainsPointersOrCollectible());
+
+        return PTR_CGCDesc(mt);
+    }
+
+    size_t GetNumSeries ()
+    {
+        return *(PTR_size_t(PTR_CGCDesc(this))-1);
+    }
+
+    // Returns lowest series in memory.
+    // Cannot be used for valuetype arrays
+    PTR_CGCDescSeries GetLowestSeries ()
+    {
+        _ASSERTE (ptrdiff_t(GetNumSeries()) > 0);
+        return PTR_CGCDescSeries(PTR_uint8_t(PTR_CGCDesc(this))
+                                 - ComputeSize(GetNumSeries()));
+    }
+
+    // Returns highest series in memory.
+    PTR_CGCDescSeries GetHighestSeries ()
+    {
+        return PTR_CGCDescSeries(PTR_size_t(PTR_CGCDesc(this))-1)-1;
+    }
+
+    // Returns number of immediate pointers this object has.
+    // size is only used if you have an array of value types.
+#ifndef DACCESS_COMPILE
+    static size_t GetNumPointers (MethodTable* pMT, size_t ObjectSize, size_t NumComponents)
+    {
+        size_t NumOfPointers = 0;
+        CGCDesc* map = GetCGCDescFromMT(pMT);
+        CGCDescSeries* cur = map->GetHighestSeries();
+        ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries();
+
+        if (cnt > 0)
+        {
+            CGCDescSeries* last = map->GetLowestSeries();
+            while (cur >= last)
+            {
+                NumOfPointers += (cur->GetSeriesSize() + ObjectSize) / sizeof(JSlot);
+                cur--;
+            }
+        }
+        else
+        {
+            /* Handle the repeating case - array of valuetypes */
+            for (ptrdiff_t __i = 0; __i > cnt; __i--)
+            {
+                NumOfPointers += cur->val_serie[__i].nptrs;
+            }
+
+            NumOfPointers *= NumComponents;
+        }
+
+        return NumOfPointers;
+    }
+#endif
+
+    // Size of the entire slot map.
+    size_t GetSize ()
+    {
+        ptrdiff_t numSeries = (ptrdiff_t) GetNumSeries();
+        if (numSeries < 0)
+        {
+            return ComputeSizeRepeating(-numSeries);
+        }
+        else
+        {
+            return ComputeSize(numSeries);
+        }
+    }
+
+    uint8_t *GetStartOfGCData()
+    {
+        return ((uint8_t *)this) - GetSize();
+    }
+
+private:
+    
+    BOOL IsValueClassSeries()
+    {
+        return ((ptrdiff_t) GetNumSeries()) < 0;
+    }
+
+};
+
+#define MAX_SIZE_FOR_VALUECLASS_IN_ARRAY 0xffff
+#define MAX_PTRS_FOR_VALUECLASSS_IN_ARRAY 0xffff
+
+
+#endif // _GCDESC_H_
diff --git a/src/gc/gcee.cpp b/src/gc/gcee.cpp
new file mode 100644
index 0000000000..d37eaf4de9
--- /dev/null
+++ b/src/gc/gcee.cpp
@@ -0,0 +1,883 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// 
+
+// 
+
+
+// sets up vars for GC
+
+#include "gcpriv.h"
+
+#ifndef DACCESS_COMPILE
+
+COUNTER_ONLY(PERF_COUNTER_TIMER_PRECISION g_TotalTimeInGC = 0);
+COUNTER_ONLY(PERF_COUNTER_TIMER_PRECISION g_TotalTimeSinceLastGCEnd = 0);
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+size_t g_GenerationSizes[NUMBERGENERATIONS];
+size_t g_GenerationPromotedSizes[NUMBERGENERATIONS];
+#endif // ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+void GCHeap::UpdatePreGCCounters()
+{
+#if defined(ENABLE_PERF_COUNTERS)
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = 0;
+#else
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+    size_t allocation_0 = 0;
+    size_t allocation_3 = 0; 
+    
+    // Publish perf stats
+    g_TotalTimeInGC = GET_CYCLE_COUNT();
+
+#ifdef MULTIPLE_HEAPS
+    int hn = 0;
+    for (hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        hp = gc_heap::g_heaps [hn];
+            
+        allocation_0 += 
+            dd_desired_allocation (hp->dynamic_data_of (0))-
+            dd_new_allocation (hp->dynamic_data_of (0));
+        allocation_3 += 
+            dd_desired_allocation (hp->dynamic_data_of (max_generation+1))-
+            dd_new_allocation (hp->dynamic_data_of (max_generation+1));
+    }
+#else
+    allocation_0 = 
+        dd_desired_allocation (hp->dynamic_data_of (0))-
+        dd_new_allocation (hp->dynamic_data_of (0));
+    allocation_3 = 
+        dd_desired_allocation (hp->dynamic_data_of (max_generation+1))-
+        dd_new_allocation (hp->dynamic_data_of (max_generation+1));
+        
+#endif //MULTIPLE_HEAPS
+
+    GetPerfCounters().m_GC.cbAlloc += allocation_0;
+    GetPerfCounters().m_GC.cbAlloc += allocation_3;
+    GetPerfCounters().m_GC.cbLargeAlloc += allocation_3;
+
+#ifdef _PREFAST_
+    // prefix complains about us dereferencing hp in wks build even though we only access static members
+    // this way. not sure how to shut it up except for this ugly workaround:
+    PREFIX_ASSUME( hp != NULL);
+#endif //_PREFAST_
+    if (hp->settings.reason == reason_induced IN_STRESS_HEAP( && !hp->settings.stress_induced))
+    {
+        COUNTER_ONLY(GetPerfCounters().m_GC.cInducedGCs++);
+    }
+
+    GetPerfCounters().m_Security.timeRTchecks = 0;
+    GetPerfCounters().m_Security.timeRTchecksBase = 1; // To avoid divide by zero
+
+#endif //ENABLE_PERF_COUNTERS
+
+#ifdef FEATURE_EVENT_TRACE
+#ifdef MULTIPLE_HEAPS
+        //take the first heap....
+    gc_mechanisms *pSettings = &gc_heap::g_heaps[0]->settings;
+#else
+    gc_mechanisms *pSettings = &gc_heap::settings;
+#endif //MULTIPLE_HEAPS
+
+    ETW::GCLog::ETW_GC_INFO Info;
+
+    Info.GCStart.Count = (uint32_t)pSettings->gc_index;
+    Info.GCStart.Depth = (uint32_t)pSettings->condemned_generation;
+    Info.GCStart.Reason = (ETW::GCLog::ETW_GC_INFO::GC_REASON)((int)(pSettings->reason));
+
+    Info.GCStart.Type = ETW::GCLog::ETW_GC_INFO::GC_NGC;
+    if (pSettings->concurrent)
+    {
+        Info.GCStart.Type = ETW::GCLog::ETW_GC_INFO::GC_BGC;
+    }
+#ifdef BACKGROUND_GC
+    else if (Info.GCStart.Depth < max_generation)
+    {
+        if (pSettings->background_p)
+            Info.GCStart.Type = ETW::GCLog::ETW_GC_INFO::GC_FGC;
+    }
+#endif //BACKGROUND_GC
+
+    ETW::GCLog::FireGcStartAndGenerationRanges(&Info);
+#endif // FEATURE_EVENT_TRACE
+}
+
+void GCHeap::UpdatePostGCCounters()
+{
+    totalSurvivedSize = gc_heap::get_total_survived_size();
+
+    //
+    // The following is for instrumentation.
+    //
+    // Calculate the common ones for ETW and perf counters.
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+#ifdef MULTIPLE_HEAPS
+    //take the first heap....
+    gc_heap* hp1 = gc_heap::g_heaps[0];
+    gc_mechanisms *pSettings = &hp1->settings;
+#else
+    gc_heap* hp1 = pGenGCHeap;
+    gc_mechanisms *pSettings = &gc_heap::settings;
+#endif //MULTIPLE_HEAPS
+
+    int condemned_gen = pSettings->condemned_generation;
+
+    memset (g_GenerationSizes, 0, sizeof (g_GenerationSizes));
+    memset (g_GenerationPromotedSizes, 0, sizeof (g_GenerationPromotedSizes));
+    
+    size_t total_num_gc_handles = g_dwHandles;
+    uint32_t total_num_sync_blocks = SyncBlockCache::GetSyncBlockCache()->GetActiveCount();
+
+    // Note this is however for perf counter only, for legacy reasons. What we showed 
+    // in perf counters for "gen0 size" was really the gen0 budget which made
+    // sense (somewhat) at the time. For backward compatibility we are keeping
+    // this calculated the same way. For ETW we use the true gen0 size (and 
+    // gen0 budget is also reported in an event).
+    size_t youngest_budget = 0;
+
+    size_t promoted_finalization_mem = 0;
+    size_t total_num_pinned_objects = gc_heap::get_total_pinned_objects();
+
+#ifndef FEATURE_REDHAWK
+    // if a max gen garbage collection was performed, resync the GC Handle counter; 
+    // if threads are currently suspended, we do not need to obtain a lock on each handle table
+    if (condemned_gen == max_generation)
+        total_num_gc_handles = HndCountAllHandles(!GCHeap::IsGCInProgress());
+#endif //FEATURE_REDHAWK
+
+    // per generation calculation.
+    for (int gen_index = 0; gen_index <= (max_generation+1); gen_index++)
+    {
+#ifdef MULTIPLE_HEAPS
+        int hn = 0;
+        for (hn = 0; hn < gc_heap::n_heaps; hn++)
+        {
+            gc_heap* hp = gc_heap::g_heaps[hn];
+#else
+            gc_heap* hp = pGenGCHeap;
+            {
+#endif //MULTIPLE_HEAPS
+                dynamic_data* dd = hp->dynamic_data_of (gen_index);
+
+                if (gen_index == 0)
+                {
+                    youngest_budget += dd_desired_allocation (hp->dynamic_data_of (gen_index));
+                }
+
+                g_GenerationSizes[gen_index] += hp->generation_size (gen_index);
+
+                if (gen_index <= condemned_gen)
+                {
+                    g_GenerationPromotedSizes[gen_index] += dd_promoted_size (dd);
+                }
+
+                if ((gen_index == (max_generation+1)) && (condemned_gen == max_generation))
+                {
+                    g_GenerationPromotedSizes[gen_index] += dd_promoted_size (dd);
+                }
+
+                if (gen_index == 0)
+                {
+                    promoted_finalization_mem +=  dd_freach_previous_promotion (dd);
+                }
+#ifdef MULTIPLE_HEAPS
+            }
+#else
+        }
+#endif //MULTIPLE_HEAPS
+    }
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef FEATURE_EVENT_TRACE
+    ETW::GCLog::ETW_GC_INFO Info;
+
+    Info.GCEnd.Depth = condemned_gen;
+    Info.GCEnd.Count = (uint32_t)pSettings->gc_index;
+    ETW::GCLog::FireGcEndAndGenerationRanges(Info.GCEnd.Count, Info.GCEnd.Depth);
+
+    ETW::GCLog::ETW_GC_INFO HeapInfo;
+    ZeroMemory(&HeapInfo, sizeof(HeapInfo));
+
+    for (int gen_index = 0; gen_index <= (max_generation+1); gen_index++)
+    {
+        HeapInfo.HeapStats.GenInfo[gen_index].GenerationSize = g_GenerationSizes[gen_index];
+        HeapInfo.HeapStats.GenInfo[gen_index].TotalPromotedSize = g_GenerationPromotedSizes[gen_index];
+    }
+
+#ifdef SIMPLE_DPRINTF
+    dprintf (2, ("GC#%d: 0: %Id(%Id); 1: %Id(%Id); 2: %Id(%Id); 3: %Id(%Id)", 
+        Info.GCEnd.Count,
+        HeapInfo.HeapStats.GenInfo[0].GenerationSize,
+        HeapInfo.HeapStats.GenInfo[0].TotalPromotedSize,
+        HeapInfo.HeapStats.GenInfo[1].GenerationSize,
+        HeapInfo.HeapStats.GenInfo[1].TotalPromotedSize,
+        HeapInfo.HeapStats.GenInfo[2].GenerationSize,
+        HeapInfo.HeapStats.GenInfo[2].TotalPromotedSize,
+        HeapInfo.HeapStats.GenInfo[3].GenerationSize,
+        HeapInfo.HeapStats.GenInfo[3].TotalPromotedSize));
+#endif //SIMPLE_DPRINTF
+
+    HeapInfo.HeapStats.FinalizationPromotedSize = promoted_finalization_mem;
+    HeapInfo.HeapStats.FinalizationPromotedCount = GetFinalizablePromotedCount();
+    HeapInfo.HeapStats.PinnedObjectCount = (uint32_t)total_num_pinned_objects;
+    HeapInfo.HeapStats.SinkBlockCount =  total_num_sync_blocks;
+    HeapInfo.HeapStats.GCHandleCount =  (uint32_t)total_num_gc_handles;
+
+    FireEtwGCHeapStats_V1(HeapInfo.HeapStats.GenInfo[0].GenerationSize, HeapInfo.HeapStats.GenInfo[0].TotalPromotedSize,
+                    HeapInfo.HeapStats.GenInfo[1].GenerationSize, HeapInfo.HeapStats.GenInfo[1].TotalPromotedSize,
+                    HeapInfo.HeapStats.GenInfo[2].GenerationSize, HeapInfo.HeapStats.GenInfo[2].TotalPromotedSize,
+                    HeapInfo.HeapStats.GenInfo[3].GenerationSize, HeapInfo.HeapStats.GenInfo[3].TotalPromotedSize,
+                    HeapInfo.HeapStats.FinalizationPromotedSize,
+                    HeapInfo.HeapStats.FinalizationPromotedCount,
+                    HeapInfo.HeapStats.PinnedObjectCount,
+                    HeapInfo.HeapStats.SinkBlockCount,
+                    HeapInfo.HeapStats.GCHandleCount, 
+                    GetClrInstanceId());
+#endif // FEATURE_EVENT_TRACE
+
+#if defined(ENABLE_PERF_COUNTERS)
+    for (int gen_index = 0; gen_index <= (max_generation+1); gen_index++)
+    {
+        _ASSERTE(FitsIn<size_t>(g_GenerationSizes[gen_index]));
+        _ASSERTE(FitsIn<size_t>(g_GenerationPromotedSizes[gen_index]));
+
+        if (gen_index == (max_generation+1))
+        {
+            GetPerfCounters().m_GC.cLrgObjSize = static_cast<size_t>(g_GenerationSizes[gen_index]);
+        }
+        else
+        {
+            GetPerfCounters().m_GC.cGenHeapSize[gen_index] = ((gen_index == 0) ? 
+                                                                youngest_budget : 
+                                                                static_cast<size_t>(g_GenerationSizes[gen_index]));
+        }
+
+        // the perf counters only count the promoted size for gen0 and gen1.
+        if (gen_index < max_generation)
+        {
+            GetPerfCounters().m_GC.cbPromotedMem[gen_index] = static_cast<size_t>(g_GenerationPromotedSizes[gen_index]);
+        }
+
+        if (gen_index <= max_generation)
+        {
+            GetPerfCounters().m_GC.cGenCollections[gen_index] =
+                dd_collection_count (hp1->dynamic_data_of (gen_index));
+        }
+    }
+
+    // Committed and reserved memory 
+    {
+        size_t committed_mem = 0;
+        size_t reserved_mem = 0;
+#ifdef MULTIPLE_HEAPS
+        int hn = 0;
+        for (hn = 0; hn < gc_heap::n_heaps; hn++)
+        {
+            gc_heap* hp = gc_heap::g_heaps [hn];
+#else
+            gc_heap* hp = pGenGCHeap;
+            {
+#endif //MULTIPLE_HEAPS
+                heap_segment* seg = generation_start_segment (hp->generation_of (max_generation));
+                while (seg)
+                {
+                    committed_mem += heap_segment_committed (seg) - heap_segment_mem (seg);
+                    reserved_mem += heap_segment_reserved (seg) - heap_segment_mem (seg);
+                    seg = heap_segment_next (seg);
+                }
+                //same for large segments
+                seg = generation_start_segment (hp->generation_of (max_generation + 1));
+                while (seg)
+                {
+                    committed_mem += heap_segment_committed (seg) - 
+                        heap_segment_mem (seg);
+                    reserved_mem += heap_segment_reserved (seg) - 
+                        heap_segment_mem (seg);
+                    seg = heap_segment_next (seg);
+                }
+#ifdef MULTIPLE_HEAPS
+            }
+#else
+        }
+#endif //MULTIPLE_HEAPS
+
+        GetPerfCounters().m_GC.cTotalCommittedBytes = committed_mem;
+        GetPerfCounters().m_GC.cTotalReservedBytes = reserved_mem;
+    }
+
+    _ASSERTE(FitsIn<size_t>(HeapInfo.HeapStats.FinalizationPromotedSize));
+    _ASSERTE(FitsIn<size_t>(HeapInfo.HeapStats.FinalizationPromotedCount));
+    GetPerfCounters().m_GC.cbPromotedFinalizationMem = static_cast<size_t>(HeapInfo.HeapStats.FinalizationPromotedSize);
+    GetPerfCounters().m_GC.cSurviveFinalize = static_cast<size_t>(HeapInfo.HeapStats.FinalizationPromotedCount);
+    
+    // Compute Time in GC
+    PERF_COUNTER_TIMER_PRECISION _currentPerfCounterTimer = GET_CYCLE_COUNT();
+
+    g_TotalTimeInGC = _currentPerfCounterTimer - g_TotalTimeInGC;
+    PERF_COUNTER_TIMER_PRECISION _timeInGCBase = (_currentPerfCounterTimer - g_TotalTimeSinceLastGCEnd);
+
+    if (_timeInGCBase < g_TotalTimeInGC)
+        g_TotalTimeInGC = 0;        // isn't likely except on some SMP machines-- perhaps make sure that
+                                    //  _timeInGCBase >= g_TotalTimeInGC by setting affinity in GET_CYCLE_COUNT
+                                    
+    while (_timeInGCBase > UINT_MAX) 
+    {
+        _timeInGCBase = _timeInGCBase >> 8;
+        g_TotalTimeInGC = g_TotalTimeInGC >> 8;
+    }
+
+    // Update Total Time    
+    GetPerfCounters().m_GC.timeInGC = (uint32_t)g_TotalTimeInGC;
+    GetPerfCounters().m_GC.timeInGCBase = (uint32_t)_timeInGCBase;
+
+    if (!GetPerfCounters().m_GC.cProcessID)
+        GetPerfCounters().m_GC.cProcessID = (size_t)GetCurrentProcessId();
+    
+    g_TotalTimeSinceLastGCEnd = _currentPerfCounterTimer;
+
+    GetPerfCounters().m_GC.cPinnedObj = total_num_pinned_objects;
+    GetPerfCounters().m_GC.cHandles = total_num_gc_handles;
+    GetPerfCounters().m_GC.cSinkBlocks = total_num_sync_blocks;
+#endif //ENABLE_PERF_COUNTERS
+}
+
+size_t GCHeap::GetCurrentObjSize()
+{
+    return (totalSurvivedSize + gc_heap::get_total_allocated());
+}
+
+size_t GCHeap::GetLastGCStartTime(int generation)
+{
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = gc_heap::g_heaps[0];
+#else
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+    return dd_time_clock (hp->dynamic_data_of (generation));
+}
+
+size_t GCHeap::GetLastGCDuration(int generation)
+{
+#ifdef MULTIPLE_HEAPS
+    gc_heap* hp = gc_heap::g_heaps[0];
+#else
+    gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+    return dd_gc_elapsed_time (hp->dynamic_data_of (generation));
+}
+
+size_t GetHighPrecisionTimeStamp();
+
+size_t GCHeap::GetNow()
+{
+    return GetHighPrecisionTimeStamp();
+}
+
+void ProfScanRootsHelper(Object** ppObject, ScanContext *pSC, uint32_t dwFlags)
+{
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    Object *pObj = *ppObject;
+#ifdef INTERIOR_POINTERS
+    if (dwFlags & GC_CALL_INTERIOR)
+    {
+        uint8_t *o = (uint8_t*)pObj;
+        gc_heap* hp = gc_heap::heap_of (o);
+
+        if ((o < hp->gc_low) || (o >= hp->gc_high))
+        {
+            return;
+        }
+        pObj = (Object*) hp->find_object(o, hp->gc_low);
+    }
+#endif //INTERIOR_POINTERS
+    ScanRootsHelper(pObj, ppObject, pSC, dwFlags);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+}
+
+// TODO - at some point we would like to completely decouple profiling
+// from ETW tracing using a pattern similar to this, where the
+// ProfilingScanContext has flags about whether or not certain things
+// should be tracked, and each one of these ProfilerShouldXYZ functions
+// will check these flags and determine what to do based upon that.
+// GCProfileWalkHeapWorker can, in turn, call those methods without fear
+// of things being ifdef'd out.
+
+// Returns TRUE if GC profiling is enabled and the profiler
+// should scan dependent handles, FALSE otherwise.
+BOOL ProfilerShouldTrackConditionalWeakTableElements() 
+{
+#if defined(GC_PROFILING)
+    return CORProfilerTrackConditionalWeakTableElements();
+#else
+    return FALSE;
+#endif // defined (GC_PROFILING)
+}
+
+// If GC profiling is enabled, informs the profiler that we are done
+// tracing dependent handles.
+void ProfilerEndConditionalWeakTableElementReferences(void* heapId)
+{
+#if defined (GC_PROFILING)
+    g_profControlBlock.pProfInterface->EndConditionalWeakTableElementReferences(heapId);
+#else
+    UNREFERENCED_PARAMETER(heapId);
+#endif // defined (GC_PROFILING)
+}
+
+// If GC profiling is enabled, informs the profiler that we are done
+// tracing root references.
+void ProfilerEndRootReferences2(void* heapId) 
+{
+#if defined (GC_PROFILING)
+    g_profControlBlock.pProfInterface->EndRootReferences2(heapId);
+#else
+    UNREFERENCED_PARAMETER(heapId);
+#endif // defined (GC_PROFILING)
+}
+
+// This is called only if we've determined that either:
+//     a) The Profiling API wants to do a walk of the heap, and it has pinned the
+//     profiler in place (so it cannot be detached), and it's thus safe to call into the
+//     profiler, OR
+//     b) ETW infrastructure wants to do a walk of the heap either to log roots,
+//     objects, or both.
+// This can also be called to do a single walk for BOTH a) and b) simultaneously.  Since
+// ETW can ask for roots, but not objects
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+void GCProfileWalkHeapWorker(BOOL fProfilerPinned, BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw)
+{
+    {
+        ProfilingScanContext SC(fProfilerPinned);
+
+        // **** Scan roots:  Only scan roots if profiling API wants them or ETW wants them.
+        if (fProfilerPinned || fShouldWalkHeapRootsForEtw)
+        {
+#ifdef MULTIPLE_HEAPS
+            int hn;
+
+            // Must emulate each GC thread number so we can hit each
+            // heap for enumerating the roots.
+            for (hn = 0; hn < gc_heap::n_heaps; hn++)
+            {
+                // Ask the vm to go over all of the roots for this specific
+                // heap.
+                gc_heap* hp = gc_heap::g_heaps [hn];
+                SC.thread_number = hn;
+                GCScan::GcScanRoots(&ProfScanRootsHelper, max_generation, max_generation, &SC);
+
+                // The finalizer queue is also a source of roots
+                SC.dwEtwRootKind = kEtwGCRootKindFinalizer;
+                hp->finalize_queue->GcScanRoots(&ProfScanRootsHelper, hn, &SC);
+            }
+#else
+            // Ask the vm to go over all of the roots
+            GCScan::GcScanRoots(&ProfScanRootsHelper, max_generation, max_generation, &SC);
+
+            // The finalizer queue is also a source of roots
+            SC.dwEtwRootKind = kEtwGCRootKindFinalizer;
+            pGenGCHeap->finalize_queue->GcScanRoots(&ProfScanRootsHelper, 0, &SC);
+
+#endif // MULTIPLE_HEAPS
+            // Handles are kept independent of wks/svr/concurrent builds
+            SC.dwEtwRootKind = kEtwGCRootKindHandle;
+            GCScan::GcScanHandlesForProfilerAndETW(max_generation, &SC);
+
+            // indicate that regular handle scanning is over, so we can flush the buffered roots
+            // to the profiler.  (This is for profapi only.  ETW will flush after the
+            // entire heap was is complete, via ETW::GCLog::EndHeapDump.)
+            if (fProfilerPinned)
+            {
+                ProfilerEndRootReferences2(&SC.pHeapId);
+            }
+        }
+
+        // **** Scan dependent handles: only if the profiler supports it or ETW wants roots
+        if ((fProfilerPinned && ProfilerShouldTrackConditionalWeakTableElements()) ||
+            fShouldWalkHeapRootsForEtw)
+        {
+            // GcScanDependentHandlesForProfiler double-checks
+            // CORProfilerTrackConditionalWeakTableElements() before calling into the profiler
+
+            GCScan::GcScanDependentHandlesForProfilerAndETW(max_generation, &SC);
+
+            // indicate that dependent handle scanning is over, so we can flush the buffered roots
+            // to the profiler.  (This is for profapi only.  ETW will flush after the
+            // entire heap was is complete, via ETW::GCLog::EndHeapDump.)
+            if (fProfilerPinned && ProfilerShouldTrackConditionalWeakTableElements())
+            {
+                ProfilerEndConditionalWeakTableElementReferences(&SC.pHeapId);
+            }
+        }
+
+        ProfilerWalkHeapContext profilerWalkHeapContext(fProfilerPinned, SC.pvEtwContext);
+
+        // **** Walk objects on heap: only if profiling API wants them or ETW wants them.
+        if (fProfilerPinned || fShouldWalkHeapObjectsForEtw)
+        {
+#ifdef MULTIPLE_HEAPS
+            int hn;
+
+            // Walk the heap and provide the objref to the profiler
+            for (hn = 0; hn < gc_heap::n_heaps; hn++)
+            {
+                gc_heap* hp = gc_heap::g_heaps [hn];         
+                hp->walk_heap(&HeapWalkHelper, &profilerWalkHeapContext, max_generation, TRUE /* walk the large object heap */);
+            }
+#else
+            gc_heap::walk_heap(&HeapWalkHelper, &profilerWalkHeapContext, max_generation, TRUE);
+#endif //MULTIPLE_HEAPS
+        }
+
+#ifdef FEATURE_EVENT_TRACE
+        // **** Done! Indicate to ETW helpers that the heap walk is done, so any buffers
+        // should be flushed into the ETW stream
+        if (fShouldWalkHeapObjectsForEtw || fShouldWalkHeapRootsForEtw)
+        {
+            ETW::GCLog::EndHeapDump(&profilerWalkHeapContext);
+        }
+#endif // FEATURE_EVENT_TRACE
+    }
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+void GCProfileWalkHeap()
+{
+    BOOL fWalkedHeapForProfiler = FALSE;
+
+#ifdef FEATURE_EVENT_TRACE
+    if (ETW::GCLog::ShouldWalkStaticsAndCOMForEtw())
+        ETW::GCLog::WalkStaticsAndCOMForETW();
+    
+    BOOL fShouldWalkHeapRootsForEtw = ETW::GCLog::ShouldWalkHeapRootsForEtw();
+    BOOL fShouldWalkHeapObjectsForEtw = ETW::GCLog::ShouldWalkHeapObjectsForEtw();
+#else // !FEATURE_EVENT_TRACE
+    BOOL fShouldWalkHeapRootsForEtw = FALSE;
+    BOOL fShouldWalkHeapObjectsForEtw = FALSE;
+#endif // FEATURE_EVENT_TRACE
+
+#if defined (GC_PROFILING)
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        GCProfileWalkHeapWorker(TRUE /* fProfilerPinned */, fShouldWalkHeapRootsForEtw, fShouldWalkHeapObjectsForEtw);
+        fWalkedHeapForProfiler = TRUE;
+        END_PIN_PROFILER();
+    }
+#endif // defined (GC_PROFILING)
+
+#if defined (GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    // we need to walk the heap if one of GC_PROFILING or FEATURE_EVENT_TRACE
+    // is defined, since both of them make use of the walk heap worker.
+    if (!fWalkedHeapForProfiler &&
+        (fShouldWalkHeapRootsForEtw || fShouldWalkHeapObjectsForEtw))
+    {
+        GCProfileWalkHeapWorker(FALSE /* fProfilerPinned */, fShouldWalkHeapRootsForEtw, fShouldWalkHeapObjectsForEtw);
+    }
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+}
+
+BOOL GCHeap::IsGCInProgressHelper (BOOL bConsiderGCStart)
+{
+    return GcInProgress || (bConsiderGCStart? VolatileLoad(&gc_heap::gc_started) : FALSE);
+}
+
+uint32_t GCHeap::WaitUntilGCComplete(BOOL bConsiderGCStart)
+{
+    if (bConsiderGCStart)
+    {
+        if (gc_heap::gc_started)
+        {
+            gc_heap::wait_for_gc_done();
+        }
+    }
+
+    uint32_t dwWaitResult = NOERROR;
+
+    if (GcInProgress) 
+    {
+        ASSERT( WaitForGCEvent->IsValid() );
+
+#ifdef DETECT_DEADLOCK
+        // wait for GC to complete
+BlockAgain:
+        dwWaitResult = WaitForGCEvent->Wait(DETECT_DEADLOCK_TIMEOUT, FALSE );
+
+        if (dwWaitResult == WAIT_TIMEOUT) {
+            //  Even in retail, stop in the debugger if available.  Ideally, the
+            //  following would use DebugBreak, but debspew.h makes this a null
+            //  macro in retail.  Note that in debug, we don't use the debspew.h
+            //  macros because these take a critical section that may have been
+            //  taken by a suspended thread.
+            FreeBuildDebugBreak();
+            goto BlockAgain;
+        }
+
+#else  //DETECT_DEADLOCK
+        
+        dwWaitResult = WaitForGCEvent->Wait(INFINITE, FALSE );
+        
+#endif //DETECT_DEADLOCK
+    }
+
+    return dwWaitResult;
+}
+
+void GCHeap::SetGCInProgress(BOOL fInProgress)
+{
+    GcInProgress = fInProgress;
+}
+
+CLREvent * GCHeap::GetWaitForGCEvent()
+{
+    return WaitForGCEvent;
+}
+
+void GCHeap::WaitUntilConcurrentGCComplete()
+{
+#ifdef BACKGROUND_GC
+    if (pGenGCHeap->settings.concurrent)
+        pGenGCHeap->background_gc_wait();
+#endif //BACKGROUND_GC
+}
+
+BOOL GCHeap::IsConcurrentGCInProgress()
+{
+#ifdef BACKGROUND_GC
+    return pGenGCHeap->settings.concurrent;
+#else
+    return FALSE;
+#endif //BACKGROUND_GC
+}
+
+#ifdef FEATURE_EVENT_TRACE
+void gc_heap::fire_etw_allocation_event (size_t allocation_amount, int gen_number, uint8_t* object_address)
+{
+    void * typeId = nullptr;
+    const WCHAR * name = nullptr;
+#ifdef FEATURE_REDHAWK
+    typeId = RedhawkGCInterface::GetLastAllocEEType();
+#else
+    InlineSString<MAX_CLASSNAME_LENGTH> strTypeName;
+
+    EX_TRY
+    {
+        TypeHandle th = GetThread()->GetTHAllocContextObj();
+
+        if (th != 0)
+        {
+            th.GetName(strTypeName);
+            name = strTypeName.GetUnicode();
+            typeId = th.GetMethodTable();
+        }
+    }
+    EX_CATCH {}
+    EX_END_CATCH(SwallowAllExceptions)
+#endif
+
+    if (typeId != nullptr)
+    {
+        FireEtwGCAllocationTick_V3((uint32_t)allocation_amount,
+                                   ((gen_number == 0) ? ETW::GCLog::ETW_GC_INFO::AllocationSmall : ETW::GCLog::ETW_GC_INFO::AllocationLarge), 
+                                   GetClrInstanceId(),
+                                   allocation_amount,
+                                   typeId, 
+                                   name,
+                                   heap_number,
+                                   object_address
+                                   );
+    }
+}
+void gc_heap::fire_etw_pin_object_event (uint8_t* object, uint8_t** ppObject)
+{
+#ifdef FEATURE_REDHAWK
+    UNREFERENCED_PARAMETER(object);
+    UNREFERENCED_PARAMETER(ppObject);
+#else
+    Object* obj = (Object*)object;
+
+    InlineSString<MAX_CLASSNAME_LENGTH> strTypeName; 
+   
+    EX_TRY
+    {
+        FAULT_NOT_FATAL();
+
+        TypeHandle th = obj->GetGCSafeTypeHandleIfPossible();
+        if(th != NULL)
+        {
+            th.GetName(strTypeName);
+        }
+
+        FireEtwPinObjectAtGCTime(ppObject,
+                             object,
+                             obj->GetSize(),
+                             strTypeName.GetUnicode(),
+                             GetClrInstanceId());
+    }
+    EX_CATCH {}
+    EX_END_CATCH(SwallowAllExceptions)
+#endif // FEATURE_REDHAWK
+}
+#endif // FEATURE_EVENT_TRACE
+
+uint32_t gc_heap::user_thread_wait (CLREvent *event, BOOL no_mode_change, int time_out_ms)
+{
+    Thread* pCurThread = NULL;
+    bool mode = false;
+    uint32_t dwWaitResult = NOERROR;
+    
+    if (!no_mode_change)
+    {
+        pCurThread = GetThread();
+        mode = pCurThread ? GCToEEInterface::IsPreemptiveGCDisabled(pCurThread) : false;
+        if (mode)
+        {
+            GCToEEInterface::EnablePreemptiveGC(pCurThread);
+        }
+    }
+
+    dwWaitResult = event->Wait(time_out_ms, FALSE);
+
+    if (!no_mode_change && mode)
+    {
+        GCToEEInterface::DisablePreemptiveGC(pCurThread);
+    }
+
+    return dwWaitResult;
+}
+
+#ifdef BACKGROUND_GC
+// Wait for background gc to finish
+uint32_t gc_heap::background_gc_wait (alloc_wait_reason awr, int time_out_ms)
+{
+    dprintf(2, ("Waiting end of background gc"));
+    assert (background_gc_done_event.IsValid());
+    fire_alloc_wait_event_begin (awr);
+    uint32_t dwRet = user_thread_wait (&background_gc_done_event, FALSE, time_out_ms);
+    fire_alloc_wait_event_end (awr);
+    dprintf(2, ("Waiting end of background gc is done"));
+
+    return dwRet;
+}
+
+// Wait for background gc to finish sweeping large objects
+void gc_heap::background_gc_wait_lh (alloc_wait_reason awr)
+{
+    dprintf(2, ("Waiting end of background large sweep"));
+    assert (gc_lh_block_event.IsValid());
+    fire_alloc_wait_event_begin (awr);
+    user_thread_wait (&gc_lh_block_event, FALSE);
+    fire_alloc_wait_event_end (awr);
+    dprintf(2, ("Waiting end of background large sweep is done"));
+}
+
+#endif //BACKGROUND_GC
+
+
+/******************************************************************************/
+::GCHeap* CreateGCHeap() {
+    return new(nothrow) GCHeap();   // we return wks or svr 
+}
+
+void GCHeap::TraceGCSegments()
+{
+#ifdef FEATURE_EVENT_TRACE
+    heap_segment* seg = 0;
+#ifdef MULTIPLE_HEAPS
+    // walk segments in each heap
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* h = gc_heap::g_heaps [i];
+#else
+    {
+        gc_heap* h = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+        for (seg = generation_start_segment (h->generation_of (max_generation)); seg != 0; seg = heap_segment_next(seg))
+        {
+            ETW::GCLog::ETW_GC_INFO Info;
+            Info.GCCreateSegment.Address = (size_t)heap_segment_mem(seg);
+            Info.GCCreateSegment.Size = (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg));
+            Info.GCCreateSegment.Type = (heap_segment_read_only_p (seg) ? 
+                                         ETW::GCLog::ETW_GC_INFO::READ_ONLY_HEAP :
+                                         ETW::GCLog::ETW_GC_INFO::SMALL_OBJECT_HEAP);
+            FireEtwGCCreateSegment_V1(Info.GCCreateSegment.Address, Info.GCCreateSegment.Size, Info.GCCreateSegment.Type, GetClrInstanceId());
+        }
+
+        // large obj segments
+        for (seg = generation_start_segment (h->generation_of (max_generation+1)); seg != 0; seg = heap_segment_next(seg))
+        {
+            FireEtwGCCreateSegment_V1((size_t)heap_segment_mem(seg), 
+                                   (size_t)(heap_segment_reserved (seg) - heap_segment_mem(seg)), 
+                                   ETW::GCLog::ETW_GC_INFO::LARGE_OBJECT_HEAP, 
+                                   GetClrInstanceId());
+        }
+    }
+#endif // FEATURE_EVENT_TRACE
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+void GCHeap::DescrGenerationsToProfiler (gen_walk_fn fn, void *context)
+{
+    pGenGCHeap->descr_generations_to_profiler(fn, context);
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#ifdef FEATURE_BASICFREEZE
+segment_handle GCHeap::RegisterFrozenSegment(segment_info *pseginfo)
+{
+    heap_segment * seg = new (nothrow) heap_segment;
+    if (!seg)
+    {
+        return NULL;
+    }
+
+    uint8_t* base_mem = (uint8_t*)pseginfo->pvMem;
+    heap_segment_mem(seg) = base_mem + pseginfo->ibFirstObject;
+    heap_segment_allocated(seg) = base_mem + pseginfo->ibAllocated;
+    heap_segment_committed(seg) = base_mem + pseginfo->ibCommit;
+    heap_segment_reserved(seg) = base_mem + pseginfo->ibReserved;
+    heap_segment_next(seg) = 0;
+    heap_segment_used(seg) = heap_segment_allocated(seg);
+    heap_segment_plan_allocated(seg) = 0;
+    seg->flags = heap_segment_flags_readonly;
+
+#if defined (MULTIPLE_HEAPS) && !defined (ISOLATED_HEAPS)
+    gc_heap* heap = gc_heap::g_heaps[0];
+    heap_segment_heap(seg) = heap;
+#else
+    gc_heap* heap = pGenGCHeap;
+#endif //MULTIPLE_HEAPS && !ISOLATED_HEAPS
+
+    if (heap->insert_ro_segment(seg) == FALSE)
+    {
+        delete seg;
+        return NULL;
+    }
+
+    return reinterpret_cast< segment_handle >(seg);
+}
+
+void GCHeap::UnregisterFrozenSegment(segment_handle seg)
+{
+#if defined (MULTIPLE_HEAPS) && !defined (ISOLATED_HEAPS)
+    gc_heap* heap = gc_heap::g_heaps[0];
+#else
+    gc_heap* heap = pGenGCHeap;
+#endif //MULTIPLE_HEAPS && !ISOLATED_HEAPS
+
+    heap->remove_ro_segment(reinterpret_cast<heap_segment*>(seg));
+}
+#endif // FEATURE_BASICFREEZE
+
+
+#endif // !DACCESS_COMPILE
+
+
diff --git a/src/gc/gceesvr.cpp b/src/gc/gceesvr.cpp
new file mode 100644
index 0000000000..aacae486f5
--- /dev/null
+++ b/src/gc/gceesvr.cpp
@@ -0,0 +1,34 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#include "common.h"
+
+#if defined(FEATURE_SVR_GC)
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+
+#define SERVER_GC 1
+
+namespace SVR { 
+#include "gcimpl.h"
+#include "gcee.cpp"
+}
+
+#if defined(FEATURE_PAL) && !defined(DACCESS_COMPILE)
+ 
+// Initializes the SVR DAC table entries
+void DacGlobals::InitializeSVREntries(TADDR baseAddress)
+{
+#define DEFINE_DACVAR_SVR(id_type, size, id, var)   id = PTR_TO_TADDR(&var) - baseAddress;
+#include "dacvars.h"
+}
+
+#endif // FEATURE_PAL && !DACCESS_COMPILE
+
+#endif // FEATURE_SVR_GC
diff --git a/src/gc/gceewks.cpp b/src/gc/gceewks.cpp
new file mode 100644
index 0000000000..72a7d3bdb9
--- /dev/null
+++ b/src/gc/gceewks.cpp
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+
+#ifdef SERVER_GC
+#undef SERVER_GC
+#endif
+
+namespace WKS { 
+#include "gcimpl.h"
+#include "gcee.cpp"
+}
+
diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h
new file mode 100644
index 0000000000..6a4ee86cd8
--- /dev/null
+++ b/src/gc/gcimpl.h
@@ -0,0 +1,316 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+#ifndef GCIMPL_H_
+#define GCIMPL_H_
+
+#define CLREvent CLREventStatic
+
+#ifdef SERVER_GC
+#define MULTIPLE_HEAPS 1
+#endif  // SERVER_GC
+
+#ifdef MULTIPLE_HEAPS
+
+#define PER_HEAP
+
+#else //MULTIPLE_HEAPS
+
+#define PER_HEAP static
+
+#endif // MULTIPLE_HEAPS
+
+#define PER_HEAP_ISOLATED static
+
+#if defined(WRITE_BARRIER_CHECK) && !defined (MULTIPLE_HEAPS)
+void initGCShadow();
+void deleteGCShadow();
+void checkGCWriteBarrier();
+#else
+inline void initGCShadow() {}
+inline void deleteGCShadow() {}
+inline void checkGCWriteBarrier() {}
+#endif
+
+void GCProfileWalkHeap();
+
+class GCHeap;
+class gc_heap;
+class CFinalize;
+
+// TODO : it would be easier to make this an ORed value
+enum gc_reason
+{
+    reason_alloc_soh = 0,
+    reason_induced = 1,
+    reason_lowmemory = 2,
+    reason_empty = 3,
+    reason_alloc_loh = 4,
+    reason_oos_soh = 5,
+    reason_oos_loh = 6,
+    reason_induced_noforce = 7, // it's an induced GC and doesn't have to be blocking.
+    reason_gcstress = 8,        // this turns into reason_induced & gc_mechanisms.stress_induced = true
+    reason_lowmemory_blocking = 9,
+    reason_induced_compacting = 10,
+    reason_lowmemory_host = 11,
+    reason_max
+};
+
+class GCHeap : public ::GCHeap
+{
+protected:
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap*    pGenGCHeap;
+#else
+    #define pGenGCHeap ((gc_heap*)0)
+#endif //MULTIPLE_HEAPS
+    
+    friend class CFinalize;
+    friend class gc_heap;
+    friend struct ::alloc_context;
+    friend void EnterAllocLock();
+    friend void LeaveAllocLock();
+    friend void ProfScanRootsHelper(Object** object, ScanContext *pSC, uint32_t dwFlags);
+    friend void GCProfileWalkHeap();
+
+public:
+    //In order to keep gc.cpp cleaner, ugly EE specific code is relegated to methods. 
+    static void UpdatePreGCCounters();
+    static void UpdatePostGCCounters();
+
+public:
+    GCHeap(){};
+    ~GCHeap(){};
+
+    /* BaseGCHeap Methods*/
+    PER_HEAP_ISOLATED   HRESULT Shutdown ();
+
+    size_t  GetTotalBytesInUse ();
+    // Gets the amount of bytes objects currently occupy on the GC heap.
+    size_t  GetCurrentObjSize();
+
+    size_t  GetLastGCStartTime(int generation);
+    size_t  GetLastGCDuration(int generation);
+    size_t  GetNow();
+
+    void  TraceGCSegments ();    
+    void PublishObject(uint8_t* obj);
+    
+    BOOL    IsGCInProgressHelper (BOOL bConsiderGCStart = FALSE);
+
+    uint32_t    WaitUntilGCComplete (BOOL bConsiderGCStart = FALSE);
+
+    void     SetGCInProgress(BOOL fInProgress);
+
+    CLREvent * GetWaitForGCEvent();
+
+    HRESULT Initialize ();
+
+    //flags can be GC_ALLOC_CONTAINS_REF GC_ALLOC_FINALIZE
+    Object*  Alloc (size_t size, uint32_t flags);
+#ifdef FEATURE_64BIT_ALIGNMENT
+    Object*  AllocAlign8 (size_t size, uint32_t flags);
+    Object*  AllocAlign8 (alloc_context* acontext, size_t size, uint32_t flags);
+private:
+    Object*  AllocAlign8Common (void* hp, alloc_context* acontext, size_t size, uint32_t flags);
+public:
+#endif // FEATURE_64BIT_ALIGNMENT
+    Object*  AllocLHeap (size_t size, uint32_t flags);
+    Object* Alloc (alloc_context* acontext, size_t size, uint32_t flags);
+
+    void FixAllocContext (alloc_context* acontext,
+                                            BOOL lockp, void* arg, void *heap);
+
+    Object* GetContainingObject(void *pInteriorPtr);
+
+#ifdef MULTIPLE_HEAPS
+    static void AssignHeap (alloc_context* acontext);
+    static GCHeap* GetHeap (int);
+#endif //MULTIPLE_HEAPS
+
+    int GetHomeHeapNumber ();
+    bool IsThreadUsingAllocationContextHeap(alloc_context* acontext, int thread_number);
+    int GetNumberOfHeaps ();
+    void HideAllocContext(alloc_context*);
+    void RevealAllocContext(alloc_context*);
+
+    BOOL IsObjectInFixedHeap(Object *pObj);
+
+    HRESULT GarbageCollect (int generation = -1, BOOL low_memory_p=FALSE, int mode=collection_blocking);
+
+    ////
+    // GC callback functions
+    // Check if an argument is promoted (ONLY CALL DURING
+    // THE PROMOTIONSGRANTED CALLBACK.)
+    BOOL    IsPromoted (Object *object);
+
+    size_t GetPromotedBytes (int heap_index);
+    
+    int CollectionCount (int generation, int get_bgc_fgc_count = 0);
+
+    // promote an object
+    PER_HEAP_ISOLATED void    Promote (Object** object, 
+                                          ScanContext* sc,
+                                          uint32_t flags=0);
+
+    // Find the relocation address for an object
+    PER_HEAP_ISOLATED void    Relocate (Object** object,
+                                           ScanContext* sc, 
+                                           uint32_t flags=0);
+
+
+    HRESULT Init (size_t heapSize);
+
+    //Register an object for finalization
+    bool    RegisterForFinalization (int gen, Object* obj); 
+    
+    //Unregister an object for finalization
+    void    SetFinalizationRun (Object* obj); 
+    
+    //returns the generation number of an object (not valid during relocation)
+    unsigned WhichGeneration (Object* object);
+    // returns TRUE is the object is ephemeral 
+    BOOL    IsEphemeral (Object* object);
+    BOOL    IsHeapPointer (void* object, BOOL small_heap_only = FALSE);
+    
+#ifdef VERIFY_HEAP
+    void    ValidateObjectMember (Object *obj);
+#endif //_DEBUG
+
+    PER_HEAP    size_t  ApproxTotalBytesInUse(BOOL small_heap_only = FALSE);
+    PER_HEAP    size_t  ApproxFreeBytes();
+
+    unsigned GetCondemnedGeneration();
+
+    int GetGcLatencyMode();
+    int SetGcLatencyMode(int newLatencyMode);
+
+    int GetLOHCompactionMode();
+    void SetLOHCompactionMode(int newLOHCompactionyMode);
+
+    BOOL RegisterForFullGCNotification(uint32_t gen2Percentage,
+                                       uint32_t lohPercentage);
+    BOOL CancelFullGCNotification();
+    int WaitForFullGCApproach(int millisecondsTimeout);
+    int WaitForFullGCComplete(int millisecondsTimeout);
+
+    int StartNoGCRegion(uint64_t totalSize, BOOL lohSizeKnown, uint64_t lohSize, BOOL disallowFullBlockingGC);
+    int EndNoGCRegion();
+
+    PER_HEAP_ISOLATED     unsigned GetMaxGeneration();
+ 
+    unsigned GetGcCount();
+
+    Object* GetNextFinalizable() { return GetNextFinalizableObject(); };
+    size_t GetNumberOfFinalizable() { return GetNumberFinalizableObjects(); }
+
+    PER_HEAP_ISOLATED HRESULT GetGcCounters(int gen, gc_counters* counters);
+
+    size_t GetValidSegmentSize(BOOL large_seg = FALSE);
+
+    static size_t GetValidGen0MaxSize(size_t seg_size);
+
+    void SetReservedVMLimit (size_t vmlimit);
+
+    PER_HEAP_ISOLATED Object* GetNextFinalizableObject();
+    PER_HEAP_ISOLATED size_t GetNumberFinalizableObjects();
+    PER_HEAP_ISOLATED size_t GetFinalizablePromotedCount();
+
+    void SetFinalizeQueueForShutdown(BOOL fHasLock);
+    BOOL FinalizeAppDomain(AppDomain *pDomain, BOOL fRunFinalizers);
+    BOOL ShouldRestartFinalizerWatchDog();
+
+    void SetCardsAfterBulkCopy( Object**, size_t);
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    void WalkObject (Object* obj, walk_fn fn, void* context);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+public:	// FIX 
+
+    // Lock for finalization
+    PER_HEAP_ISOLATED   
+        VOLATILE(int32_t)          m_GCFLock;
+
+    PER_HEAP_ISOLATED   BOOL    GcCollectClasses;
+    PER_HEAP_ISOLATED
+        VOLATILE(BOOL)          GcInProgress;       // used for syncing w/GC
+    PER_HEAP_ISOLATED   VOLATILE(unsigned) GcCount;
+    PER_HEAP_ISOLATED   unsigned GcCondemnedGeneration;
+    // calculated at the end of a GC.
+    PER_HEAP_ISOLATED   size_t  totalSurvivedSize;
+
+    // Use only for GC tracing.
+    PER_HEAP    unsigned int GcDuration;
+
+    size_t  GarbageCollectGeneration (unsigned int gen=0, gc_reason reason=reason_empty);
+    // Interface with gc_heap
+    size_t  GarbageCollectTry (int generation, BOOL low_memory_p=FALSE, int mode=collection_blocking);
+
+#ifdef FEATURE_BASICFREEZE
+    // frozen segment management functions
+    virtual segment_handle RegisterFrozenSegment(segment_info *pseginfo);
+    virtual void UnregisterFrozenSegment(segment_handle seg);
+#endif // FEATURE_BASICFREEZE
+
+    void    WaitUntilConcurrentGCComplete ();                               // Use in managd threads
+#ifndef DACCESS_COMPILE    
+    HRESULT WaitUntilConcurrentGCCompleteAsync(int millisecondsTimeout);    // Use in native threads. TRUE if succeed. FALSE if failed or timeout
+#endif    
+    BOOL    IsConcurrentGCInProgress();
+
+    // Enable/disable concurrent GC    
+    void TemporaryEnableConcurrentGC();
+    void TemporaryDisableConcurrentGC();
+    BOOL IsConcurrentGCEnabled();    
+
+    PER_HEAP_ISOLATED   CLREvent *WaitForGCEvent;     // used for syncing w/GC
+
+    PER_HEAP_ISOLATED    CFinalize* m_Finalize;
+
+    PER_HEAP_ISOLATED   gc_heap* Getgc_heap();
+
+private:
+    static bool SafeToRestartManagedThreads()
+    {
+        // Note: this routine should return true when the last barrier
+        // to threads returning to cooperative mode is down after gc.
+        // In other words, if the sequence in GCHeap::RestartEE changes,
+        // the condition here may have to change as well.
+        return g_TrapReturningThreads == 0;
+    }
+#ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
+#ifdef STRESS_HEAP 
+public:
+    //return TRUE if GC actually happens, otherwise FALSE
+    BOOL    StressHeap(alloc_context * acontext = 0);
+protected:
+
+    // only used in BACKGROUND_GC, but the symbol is not defined yet...
+    PER_HEAP_ISOLATED int gc_stress_fgcs_in_bgc;
+
+#if !defined(MULTIPLE_HEAPS)
+    // handles to hold the string objects that will force GC movement
+    enum { NUM_HEAP_STRESS_OBJS = 8 };
+    PER_HEAP OBJECTHANDLE m_StressObjs[NUM_HEAP_STRESS_OBJS];
+    PER_HEAP int m_CurStressObj;
+#endif  // !defined(MULTIPLE_HEAPS)
+#endif  // STRESS_HEAP 
+#endif // FEATURE_REDHAWK
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    virtual void DescrGenerationsToProfiler (gen_walk_fn fn, void *context);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+#ifdef VERIFY_HEAP
+public:
+    Object * NextObj (Object * object);
+#ifdef FEATURE_BASICFREEZE
+    BOOL IsInFrozenSegment (Object * object);
+#endif //FEATURE_BASICFREEZE
+#endif //VERIFY_HEAP     
+};
+
+#endif  // GCIMPL_H_
diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h
new file mode 100644
index 0000000000..03a23454a0
--- /dev/null
+++ b/src/gc/gcpriv.h
@@ -0,0 +1,4354 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// optimize for speed
+
+
+#ifndef _DEBUG
+#ifdef _MSC_VER
+#pragma optimize( "t", on )
+#endif
+#endif
+#define inline __forceinline
+
+#include "gc.h"
+
+//#define DT_LOG
+
+#include "gcrecord.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4293)
+#pragma warning(disable:4477)
+#endif //_MSC_VER
+
+inline void FATAL_GC_ERROR()
+{
+    GCToOSInterface::DebugBreak();
+    _ASSERTE(!"Fatal Error in GC.");
+    EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
+}
+
+#ifdef _MSC_VER
+#pragma inline_depth(20)
+#endif
+
+/* the following section defines the optional features */
+
+// FEATURE_STRUCTALIGN was added by Midori. In CLR we are not interested
+// in supporting custom alignments on LOH. Currently FEATURE_LOH_COMPACTION
+// and FEATURE_STRUCTALIGN are mutually exclusive. It shouldn't be much 
+// work to make FEATURE_STRUCTALIGN not apply to LOH so they can be both
+// turned on.
+#define FEATURE_LOH_COMPACTION
+
+#ifdef FEATURE_64BIT_ALIGNMENT
+// We need the following feature as part of keeping 64-bit types aligned in the GC heap.
+#define RESPECT_LARGE_ALIGNMENT //used to keep "double" objects aligned during
+                                //relocation
+#endif //FEATURE_64BIT_ALIGNMENT
+
+#define SHORT_PLUGS //used to keep ephemeral plugs short so they fit better into the oldest generation free items
+
+#ifdef SHORT_PLUGS
+#define DESIRED_PLUG_LENGTH (1000)
+#endif //SHORT_PLUGS
+
+#define FEATURE_PREMORTEM_FINALIZATION
+#define GC_HISTORY
+
+#ifndef FEATURE_REDHAWK
+#define HEAP_ANALYZE
+#define COLLECTIBLE_CLASS
+#endif // !FEATURE_REDHAWK
+
+#ifdef HEAP_ANALYZE
+#define initial_internal_roots        (1024*16)
+#endif // HEAP_ANALYZE
+
+#define MARK_LIST         //used sorted list to speed up plan phase
+
+#define BACKGROUND_GC   //concurrent background GC (requires WRITE_WATCH)
+
+#ifdef SERVER_GC
+#define MH_SC_MARK //scalable marking
+//#define SNOOP_STATS //diagnostic
+#define PARALLEL_MARK_LIST_SORT //do the sorting and merging of the multiple mark lists in server gc in parallel
+#endif //SERVER_GC
+
+//This is used to mark some type volatile only when the scalable marking is used. 
+#if defined (SERVER_GC) && defined (MH_SC_MARK)
+#define SERVER_SC_MARK_VOLATILE(x) VOLATILE(x)
+#else //SERVER_GC&&MH_SC_MARK
+#define SERVER_SC_MARK_VOLATILE(x) x
+#endif //SERVER_GC&&MH_SC_MARK
+
+//#define MULTIPLE_HEAPS         //Allow multiple heaps for servers
+
+#define INTERIOR_POINTERS   //Allow interior pointers in the code manager
+
+#define CARD_BUNDLE         //enable card bundle feature.(requires WRITE_WATCH)
+
+// If this is defined we use a map for segments in order to find the heap for 
+// a segment fast. But it does use more memory as we have to cover the whole
+// heap range and for each entry we allocate a struct of 5 ptr-size words
+// (3 for WKS as there's only one heap). 
+#define SEG_MAPPING_TABLE
+
+// If allocating the heap mapping table for the available VA consumes too
+// much memory, you can enable this to allocate only the portion that
+// corresponds to rw segments and grow it when needed in grow_brick_card_table.
+// However in heap_of you will need to always compare the address with
+// g_lowest/highest before you can look at the heap mapping table.
+#define GROWABLE_SEG_MAPPING_TABLE
+
+#ifdef BACKGROUND_GC
+#define MARK_ARRAY      //Mark bit in an array
+#endif //BACKGROUND_GC
+
+#if defined(BACKGROUND_GC) || defined (CARD_BUNDLE) || defined(FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
+#define WRITE_WATCH     //Write Watch feature
+#endif //BACKGROUND_GC || CARD_BUNDLE
+
+#ifdef WRITE_WATCH
+#define array_size 100
+#endif //WRITE_WATCH
+
+//#define SHORT_PLUGS           //keep plug short
+
+#define FFIND_OBJECT        //faster find_object, slower allocation
+#define FFIND_DECAY  7      //Number of GC for which fast find will be active
+
+//#define NO_WRITE_BARRIER  //no write barrier, use Write Watch feature
+
+//#define DEBUG_WRITE_WATCH //Additional debug for write watch
+
+//#define STRESS_PINNING    //Stress pinning by pinning randomly
+
+//#define TRACE_GC          //debug trace gc operation
+//#define SIMPLE_DPRINTF
+
+//#define TIME_GC           //time allocation and garbage collection
+//#define TIME_WRITE_WATCH  //time GetWriteWatch and ResetWriteWatch calls
+//#define COUNT_CYCLES  //Use cycle counter for timing
+//#define JOIN_STATS         //amount of time spent in the join
+//also, see TIME_SUSPEND in switches.h.
+
+//#define SYNCHRONIZATION_STATS
+//#define SEG_REUSE_STATS
+
+#if defined (SYNCHRONIZATION_STATS) || defined (STAGE_STATS)
+#define BEGIN_TIMING(x) \
+    int64_t x##_start; \
+    x##_start = GCToOSInterface::QueryPerformanceCounter()
+
+#define END_TIMING(x) \
+    int64_t x##_end; \
+    x##_end = GCToOSInterface::QueryPerformanceCounter(); \
+    x += x##_end - x##_start
+
+#else
+#define BEGIN_TIMING(x)
+#define END_TIMING(x)
+#define BEGIN_TIMING_CYCLES(x)
+#define END_TIMING_CYCLES(x)
+#endif //SYNCHRONIZATION_STATS || STAGE_STATS
+
+/* End of optional features */
+
+#ifdef GC_CONFIG_DRIVEN
+void GCLogConfig (const char *fmt, ... );
+#define cprintf(x) {GCLogConfig x;}
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef _DEBUG
+#define TRACE_GC
+#endif
+
+#define NUMBERGENERATIONS   4               //Max number of generations
+
+// For the bestfit algorithm when we relocate ephemeral generations into an 
+// existing gen2 segment.
+// We recorded sizes from 2^6, 2^7, 2^8...up to 2^30 (1GB). So that's 25 sizes total.
+#define MIN_INDEX_POWER2 6
+
+#ifdef SERVER_GC
+
+#ifdef BIT64
+#define MAX_INDEX_POWER2 30
+#else
+#define MAX_INDEX_POWER2 26
+#endif  // BIT64
+
+#else //SERVER_GC
+
+#ifdef BIT64
+#define MAX_INDEX_POWER2 28
+#else
+#define MAX_INDEX_POWER2 24
+#endif  // BIT64
+
+#endif //SERVER_GC
+
+#define MAX_NUM_BUCKETS (MAX_INDEX_POWER2 - MIN_INDEX_POWER2 + 1)
+
+#define MAX_NUM_FREE_SPACES 200 
+#define MIN_NUM_FREE_SPACES 5 
+
+//Please leave these definitions intact.
+
+#define CLREvent CLREventStatic
+
+// hosted api
+#ifdef memcpy
+#undef memcpy
+#endif //memcpy
+
+#ifdef FEATURE_STRUCTALIGN
+#define REQD_ALIGN_DCL ,int requiredAlignment
+#define REQD_ALIGN_ARG ,requiredAlignment
+#define REQD_ALIGN_AND_OFFSET_DCL ,int requiredAlignment,size_t alignmentOffset
+#define REQD_ALIGN_AND_OFFSET_DEFAULT_DCL ,int requiredAlignment=DATA_ALIGNMENT,size_t alignmentOffset=0
+#define REQD_ALIGN_AND_OFFSET_ARG ,requiredAlignment,alignmentOffset
+#else // FEATURE_STRUCTALIGN
+#define REQD_ALIGN_DCL
+#define REQD_ALIGN_ARG
+#define REQD_ALIGN_AND_OFFSET_DCL
+#define REQD_ALIGN_AND_OFFSET_DEFAULT_DCL
+#define REQD_ALIGN_AND_OFFSET_ARG
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef MULTIPLE_HEAPS
+#define THREAD_NUMBER_DCL ,int thread
+#define THREAD_NUMBER_ARG ,thread
+#define THREAD_NUMBER_FROM_CONTEXT int thread = sc->thread_number;
+#define THREAD_FROM_HEAP  int thread = heap_number;
+#define HEAP_FROM_THREAD  gc_heap* hpt = gc_heap::g_heaps[thread];
+#else
+#define THREAD_NUMBER_DCL
+#define THREAD_NUMBER_ARG
+#define THREAD_NUMBER_FROM_CONTEXT
+#define THREAD_FROM_HEAP
+#define HEAP_FROM_THREAD  gc_heap* hpt = 0;
+#endif //MULTIPLE_HEAPS
+
+//These constants are ordered
+const int policy_sweep = 0;
+const int policy_compact = 1;
+const int policy_expand  = 2;
+
+#ifdef TRACE_GC
+
+
+extern int     print_level;
+extern BOOL    trace_gc;
+extern int    gc_trace_fac;
+
+
+class hlet
+{
+    static hlet* bindings;
+    int prev_val;
+    int* pval;
+    hlet* prev_let;
+public:
+    hlet (int& place, int value)
+    {
+        prev_val = place;
+        pval = &place;
+        place = value;
+        prev_let = bindings;
+        bindings = this;
+    }
+    ~hlet ()
+    {
+        *pval = prev_val;
+        bindings = prev_let;
+    }
+};
+
+
+#define let(p,v) hlet __x = hlet (p, v);
+
+#else //TRACE_GC
+
+#define gc_count    -1
+#define let(s,v)
+
+#endif //TRACE_GC
+
+#ifdef TRACE_GC
+#define SEG_REUSE_LOG_0 7
+#define SEG_REUSE_LOG_1 (SEG_REUSE_LOG_0 + 1)
+#define DT_LOG_0 (SEG_REUSE_LOG_1 + 1)
+#define BGC_LOG (DT_LOG_0 + 1)
+#define GTC_LOG (DT_LOG_0 + 2)
+#define GC_TABLE_LOG (DT_LOG_0 + 3)
+#define JOIN_LOG (DT_LOG_0 + 4)
+#define SPINLOCK_LOG (DT_LOG_0 + 5)
+#define SNOOP_LOG (DT_LOG_0 + 6)
+
+#ifndef DACCESS_COMPILE
+
+#ifdef SIMPLE_DPRINTF
+
+//#define dprintf(l,x) {if (trace_gc && ((l<=print_level)||gc_heap::settings.concurrent)) {printf ("\n");printf x ; fflush(stdout);}}
+void GCLog (const char *fmt, ... );
+//#define dprintf(l,x) {if (trace_gc && (l<=print_level)) {GCLog x;}}
+//#define dprintf(l,x) {if ((l==SEG_REUSE_LOG_0) || (l==SEG_REUSE_LOG_1) || (trace_gc && (l<=3))) {GCLog x;}}
+//#define dprintf(l,x) {if (l == DT_LOG_0) {GCLog x;}}
+//#define dprintf(l,x) {if (trace_gc && ((l <= 2) || (l == BGC_LOG) || (l==GTC_LOG))) {GCLog x;}}
+//#define dprintf(l,x) {if ((l == 1) || (l == 2222)) {GCLog x;}}
+#define dprintf(l,x) {if ((l <= 1) || (l == GTC_LOG)) {GCLog x;}}
+//#define dprintf(l,x) {if ((l==GTC_LOG) || (l <= 1)) {GCLog x;}}
+//#define dprintf(l,x) {if (trace_gc && ((l <= print_level) || (l==GTC_LOG))) {GCLog x;}}
+//#define dprintf(l,x) {if (l==GTC_LOG) {printf ("\n");printf x ; fflush(stdout);}}
+#else //SIMPLE_DPRINTF
+
+// The GCTrace output goes to stdout by default but can get sent to the stress log or the logfile if the
+// reg key GCTraceFacility is set.  THe stress log can only take a format string and 4 numbers or
+// string literals.
+#define dprintf(l,x) {if (trace_gc && (l<=print_level)) { \
+      if ( !gc_trace_fac) {printf ("\n");printf x ; fflush(stdout);} \
+      else if ( gc_trace_fac == 2) {LogSpewAlways x;LogSpewAlways ("\n");} \
+      else if ( gc_trace_fac == 1) {STRESS_LOG_VA(x);}}}
+
+#endif //SIMPLE_DPRINTF
+
+#else //DACCESS_COMPILE
+#define dprintf(l,x)
+#endif //DACCESS_COMPILE
+#else //TRACE_GC
+#define dprintf(l,x)
+#endif //TRACE_GC
+
+#ifndef FEATURE_REDHAWK
+#undef  assert
+#define assert _ASSERTE
+#undef  ASSERT
+#define ASSERT _ASSERTE
+#endif // FEATURE_REDHAWK
+
+#ifdef _DEBUG
+
+struct GCDebugSpinLock {
+    VOLATILE(int32_t) lock;                   // -1 if free, 0 if held
+    VOLATILE(Thread *) holding_thread;     // -1 if no thread holds the lock.
+    VOLATILE(BOOL) released_by_gc_p;       // a GC thread released the lock.
+
+    GCDebugSpinLock()
+        : lock(-1), holding_thread((Thread*) -1)
+    {
+    }
+};
+typedef GCDebugSpinLock GCSpinLock;
+
+#elif defined (SYNCHRONIZATION_STATS)
+
+struct GCSpinLockInstru {
+    VOLATILE(int32_t) lock;
+    // number of times we went into SwitchToThread in enter_spin_lock.
+    unsigned int num_switch_thread;
+    // number of times we went into WaitLonger.
+    unsigned int num_wait_longer;
+    // number of times we went to calling SwitchToThread in WaitLonger.
+    unsigned int num_switch_thread_w;
+    // number of times we went to calling DisablePreemptiveGC in WaitLonger.
+    unsigned int num_disable_preemptive_w;
+
+    GCSpinLockInstru()
+        : lock(-1), num_switch_thread(0), num_wait_longer(0), num_switch_thread_w(0), num_disable_preemptive_w(0)
+    {
+    }
+
+    void init()
+    {
+        num_switch_thread = 0;
+        num_wait_longer = 0;
+        num_switch_thread_w = 0;
+        num_disable_preemptive_w = 0;
+    }
+};
+
+typedef GCSpinLockInstru GCSpinLock;
+
+#else
+
+struct GCDebugSpinLock {
+    VOLATILE(int32_t) lock;                   // -1 if free, 0 if held
+
+    GCDebugSpinLock()
+        : lock(-1)
+    {
+    }
+};
+typedef GCDebugSpinLock GCSpinLock;
+
+#endif
+
+class mark;
+class heap_segment;
+class CObjectHeader;
+class l_heap;
+class sorted_table;
+class c_synchronize;
+class seg_free_spaces;
+class gc_heap;
+
+#ifdef BACKGROUND_GC
+class exclusive_sync;
+class recursive_gc_sync;
+#endif //BACKGROUND_GC
+
+// The following 2 modes are of the same format as in clr\src\bcl\system\runtime\gcsettings.cs
+// make sure you change that one if you change this one!
+enum gc_pause_mode
+{
+    pause_batch = 0, //We are not concerned about pause length
+    pause_interactive = 1,     //We are running an interactive app
+    pause_low_latency = 2,     //short pauses are essential
+    //avoid long pauses from blocking full GCs unless running out of memory
+    pause_sustained_low_latency = 3,
+    pause_no_gc = 4
+};
+
+enum gc_loh_compaction_mode
+{
+    loh_compaction_default = 1, // the default mode, don't compact LOH.
+    loh_compaction_once = 2, // only compact once the next time a blocking full GC happens.
+    loh_compaction_auto = 4 // GC decides when to compact LOH, to be implemented.
+};
+
+enum set_pause_mode_status
+{
+    set_pause_mode_success = 0,
+    set_pause_mode_no_gc = 1 // NoGCRegion is in progress, can't change pause mode.
+};
+
+enum gc_tuning_point
+{
+    tuning_deciding_condemned_gen,
+    tuning_deciding_full_gc,
+    tuning_deciding_compaction,
+    tuning_deciding_expansion,
+    tuning_deciding_promote_ephemeral
+};
+
+#if defined(TRACE_GC) && defined(BACKGROUND_GC)
+static const char * const str_bgc_state[] =
+{
+    "not_in_process",
+    "mark_handles",
+    "mark_stack",
+    "revisit_soh",
+    "revisit_loh",
+    "overflow_soh",
+    "overflow_loh",
+    "final_marking",
+    "sweep_soh",
+    "sweep_loh",
+    "plan_phase"
+};
+#endif // defined(TRACE_GC) && defined(BACKGROUND_GC)
+
+enum allocation_state
+{
+    a_state_start = 0,
+    a_state_can_allocate,
+    a_state_cant_allocate,
+    a_state_try_fit,
+    a_state_try_fit_new_seg,
+    a_state_try_fit_new_seg_after_cg,
+    a_state_try_fit_no_seg,
+    a_state_try_fit_after_cg,
+    a_state_try_fit_after_bgc,
+    a_state_try_free_full_seg_in_bgc, 
+    a_state_try_free_after_bgc,
+    a_state_try_seg_end,
+    a_state_acquire_seg,
+    a_state_acquire_seg_after_cg,
+    a_state_acquire_seg_after_bgc,
+    a_state_check_and_wait_for_bgc,
+    a_state_trigger_full_compact_gc,
+    a_state_trigger_ephemeral_gc,
+    a_state_trigger_2nd_ephemeral_gc,
+    a_state_check_retry_seg,
+    a_state_max
+};
+
+enum gc_type
+{
+    gc_type_compacting = 0,
+    gc_type_blocking = 1,
+#ifdef BACKGROUND_GC
+    gc_type_background = 2,
+#endif //BACKGROUND_GC
+    gc_type_max = 3
+};
+
+#define v_high_memory_load_th 97
+
+//encapsulates the mechanism for the current gc
+class gc_mechanisms
+{
+public:
+    VOLATILE(size_t) gc_index; // starts from 1 for the first GC, like dd_collection_count
+    int condemned_generation;
+    BOOL promotion;
+    BOOL compaction;
+    BOOL loh_compaction;
+    BOOL heap_expansion;
+    uint32_t concurrent;
+    BOOL demotion;
+    BOOL card_bundles;
+    int  gen0_reduction_count;
+    BOOL should_lock_elevation;
+    int elevation_locked_count;
+    BOOL elevation_reduced;
+    BOOL minimal_gc;
+    gc_reason reason;
+    gc_pause_mode pause_mode;
+    BOOL found_finalizers;
+
+#ifdef BACKGROUND_GC
+    BOOL background_p;
+    bgc_state b_state;
+    BOOL allocations_allowed;
+#endif //BACKGROUND_GC
+
+#ifdef STRESS_HEAP
+    BOOL stress_induced;
+#endif // STRESS_HEAP
+
+    uint32_t entry_memory_load;
+
+    void init_mechanisms(); //for each GC
+    void first_init(); // for the life of the EE
+
+    void record (gc_history_global* history);
+};
+
+// This is a compact version of gc_mechanism that we use to save in the history.
+class gc_mechanisms_store
+{
+public:
+    size_t gc_index; 
+    bool promotion;
+    bool compaction;
+    bool loh_compaction;
+    bool heap_expansion;
+    bool concurrent;
+    bool demotion;
+    bool card_bundles;
+    bool should_lock_elevation;
+    int condemned_generation   : 8; 
+    int gen0_reduction_count   : 8;
+    int elevation_locked_count : 8;
+    gc_reason reason           : 8;
+    gc_pause_mode pause_mode   : 8;
+#ifdef BACKGROUND_GC
+    bgc_state b_state          : 8;
+#endif //BACKGROUND_GC
+    bool found_finalizers;
+
+#ifdef BACKGROUND_GC
+    bool background_p;
+#endif //BACKGROUND_GC
+
+#ifdef STRESS_HEAP
+    bool stress_induced;
+#endif // STRESS_HEAP
+
+#ifdef BIT64
+    uint32_t entry_memory_load;
+#endif // BIT64
+
+    void store (gc_mechanisms* gm)
+    {
+        gc_index                = gm->gc_index; 
+        condemned_generation    = gm->condemned_generation;
+        promotion               = (gm->promotion != 0);
+        compaction              = (gm->compaction != 0);
+        loh_compaction          = (gm->loh_compaction != 0);
+        heap_expansion          = (gm->heap_expansion != 0);
+        concurrent              = (gm->concurrent != 0);
+        demotion                = (gm->demotion != 0);
+        card_bundles            = (gm->card_bundles != 0);
+        gen0_reduction_count    = gm->gen0_reduction_count;
+        should_lock_elevation   = (gm->should_lock_elevation != 0);
+        elevation_locked_count  = gm->elevation_locked_count;
+        reason                  = gm->reason;
+        pause_mode              = gm->pause_mode;
+        found_finalizers        = (gm->found_finalizers != 0);
+
+#ifdef BACKGROUND_GC
+        background_p            = (gm->background_p != 0);
+        b_state                 = gm->b_state;
+#endif //BACKGROUND_GC
+
+#ifdef STRESS_HEAP
+        stress_induced          = (gm->stress_induced != 0);
+#endif // STRESS_HEAP
+
+#ifdef BIT64
+        entry_memory_load       = gm->entry_memory_load;
+#endif // BIT64        
+    }
+};
+
+#ifdef GC_STATS
+
+// GC specific statistics, tracking counts and timings for GCs occuring in the system.
+// This writes the statistics to a file every 60 seconds, if a file is specified in
+// COMPlus_GcMixLog
+
+struct GCStatistics
+    : public StatisticsBase
+{
+    // initialized to the contents of COMPlus_GcMixLog, or NULL, if not present
+    static TCHAR* logFileName;
+    static FILE*  logFile;
+
+    // number of times we executed a background GC, a foreground GC, or a
+    // non-concurrent GC
+    int cntBGC, cntFGC, cntNGC;
+
+    // min, max, and total time spent performing BGCs, FGCs, NGCs
+    // (BGC time includes everything between the moment the BGC starts until 
+    // it completes, i.e. the times of all FGCs occuring concurrently)
+    MinMaxTot bgc, fgc, ngc;
+
+    // number of times we executed a compacting GC (sweeping counts can be derived)
+    int cntCompactNGC, cntCompactFGC;
+
+    // count of reasons
+    int cntReasons[reason_max];
+
+    // count of condemned generation, by NGC and FGC:
+    int cntNGCGen[max_generation+1];
+    int cntFGCGen[max_generation];
+    
+    ///////////////////////////////////////////////////////////////////////////////////////////////
+    // Internal mechanism:
+
+    virtual void Initialize();
+    virtual void DisplayAndUpdate();
+
+    // Public API
+
+    static BOOL Enabled()
+    { return logFileName != NULL; }
+
+    void AddGCStats(const gc_mechanisms& settings, size_t timeInMSec);
+};
+
+extern GCStatistics g_GCStatistics;
+extern GCStatistics g_LastGCStatistics;
+
+#endif // GC_STATS
+
+
+typedef DPTR(class heap_segment)               PTR_heap_segment;
+typedef DPTR(class gc_heap)                    PTR_gc_heap;
+typedef DPTR(PTR_gc_heap)                      PTR_PTR_gc_heap;
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+typedef DPTR(class CFinalize)                  PTR_CFinalize;
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+//-------------------------------------
+//generation free list. It is an array of free lists bucketed by size, starting at sizes lower than first_bucket_size 
+//and doubling each time. The last bucket (index == num_buckets) is for largest sizes with no limit
+
+#define MAX_BUCKET_COUNT (13)//Max number of buckets for the small generations. 
+class alloc_list 
+{
+    uint8_t* head;
+    uint8_t* tail;
+
+    size_t damage_count;
+public:
+#ifdef FL_VERIFICATION
+    size_t item_count;
+#endif //FL_VERIFICATION
+
+    uint8_t*& alloc_list_head () { return head;}
+    uint8_t*& alloc_list_tail () { return tail;}
+    size_t& alloc_list_damage_count(){ return damage_count; }
+    alloc_list()
+    {
+        head = 0; 
+        tail = 0; 
+        damage_count = 0;
+    }
+};
+
+
+class allocator 
+{
+    size_t num_buckets;
+    size_t frst_bucket_size;
+    alloc_list first_bucket;
+    alloc_list* buckets;
+    alloc_list& alloc_list_of (unsigned int bn);
+    size_t& alloc_list_damage_count_of (unsigned int bn);
+
+public:
+    allocator (unsigned int num_b, size_t fbs, alloc_list* b);
+    allocator()
+    {
+        num_buckets = 1;
+        frst_bucket_size = SIZE_T_MAX;
+    }
+    unsigned int number_of_buckets() {return (unsigned int)num_buckets;}
+
+    size_t first_bucket_size() {return frst_bucket_size;}
+    uint8_t*& alloc_list_head_of (unsigned int bn)
+    {
+        return alloc_list_of (bn).alloc_list_head();
+    }
+    uint8_t*& alloc_list_tail_of (unsigned int bn)
+    {
+        return alloc_list_of (bn).alloc_list_tail();
+    }
+    void clear();
+    BOOL discard_if_no_fit_p()
+    {
+        return (num_buckets == 1);
+    }
+
+    // This is when we know there's nothing to repair because this free
+    // list has never gone through plan phase. Right now it's only used
+    // by the background ephemeral sweep when we copy the local free list
+    // to gen0's free list.
+    //
+    // We copy head and tail manually (vs together like copy_to_alloc_list)
+    // since we need to copy tail first because when we get the free items off
+    // of each bucket we check head first. We also need to copy the
+    // smaller buckets first so when gen0 allocation needs to thread
+    // smaller items back that bucket is guaranteed to have been full
+    // copied.
+    void copy_with_no_repair (allocator* allocator_to_copy)
+    {
+        assert (num_buckets == allocator_to_copy->number_of_buckets());
+        for (unsigned int i = 0; i < num_buckets; i++)
+        {
+            alloc_list* al = &(allocator_to_copy->alloc_list_of (i));
+            alloc_list_tail_of(i) = al->alloc_list_tail();
+            alloc_list_head_of(i) = al->alloc_list_head();
+        }
+    }
+
+    void unlink_item (unsigned int bucket_number, uint8_t* item, uint8_t* previous_item, BOOL use_undo_p);
+    void thread_item (uint8_t* item, size_t size);
+    void thread_item_front (uint8_t* itme, size_t size);
+    void thread_free_item (uint8_t* free_item, uint8_t*& head, uint8_t*& tail);
+    void copy_to_alloc_list (alloc_list* toalist);
+    void copy_from_alloc_list (alloc_list* fromalist);
+    void commit_alloc_list_changes();
+};
+
+#define NUM_GEN_POWER2 (20)
+#define BASE_GEN_SIZE (1*512)
+
+// group the frequently used ones together (need intrumentation on accessors)
+class generation
+{
+public:
+    // Don't move these first two fields without adjusting the references
+    // from the __asm in jitinterface.cpp.
+    alloc_context   allocation_context;
+    heap_segment*   allocation_segment;
+    PTR_heap_segment start_segment;
+    uint8_t*        allocation_context_start_region;
+    uint8_t*        allocation_start;
+    allocator       free_list_allocator;
+    size_t          free_list_allocated;
+    size_t          end_seg_allocated;
+    BOOL            allocate_end_seg_p;
+    size_t          condemned_allocated;
+    size_t          free_list_space;
+    size_t          free_obj_space;
+    size_t          allocation_size;
+    uint8_t*        plan_allocation_start;
+    size_t          plan_allocation_start_size;
+
+    // this is the pinned plugs that got allocated into this gen.
+    size_t          pinned_allocated;
+    size_t          pinned_allocation_compact_size;
+    size_t          pinned_allocation_sweep_size;
+    int             gen_num;
+
+#ifdef FREE_USAGE_STATS
+    size_t          gen_free_spaces[NUM_GEN_POWER2];
+    // these are non pinned plugs only
+    size_t          gen_plugs[NUM_GEN_POWER2];
+    size_t          gen_current_pinned_free_spaces[NUM_GEN_POWER2];
+    size_t          pinned_free_obj_space;
+    // this is what got allocated into the pinned free spaces.
+    size_t          allocated_in_pinned_free;
+    size_t          allocated_since_last_pin;
+#endif //FREE_USAGE_STATS
+};
+
+// The dynamic data fields are grouped into 3 categories:
+//
+// calculated logical data (like desired_allocation)
+// physical data (like fragmentation)
+// const data (like min_gc_size), initialized at the beginning
+class dynamic_data
+{
+public:
+    ptrdiff_t new_allocation;
+    ptrdiff_t gc_new_allocation; // new allocation at beginning of gc
+    float     surv;
+    size_t    desired_allocation;
+
+    // # of bytes taken by objects (ie, not free space) at the beginning
+    // of the GC.
+    size_t    begin_data_size;
+    // # of bytes taken by survived objects after mark.
+    size_t    survived_size;
+    // # of bytes taken by survived pinned plugs after mark.
+    size_t    pinned_survived_size;
+    size_t    artificial_pinned_survived_size;
+    size_t    added_pinned_size;
+
+#ifdef SHORT_PLUGS
+    size_t    padding_size;
+#endif //SHORT_PLUGS
+#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
+    // # of plugs that are not pinned plugs.
+    size_t    num_npinned_plugs;
+#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
+    //total object size after a GC, ie, doesn't include fragmentation
+    size_t    current_size; 
+    size_t    collection_count;
+    size_t    promoted_size;
+    size_t    freach_previous_promotion;
+    size_t    fragmentation;    //fragmentation when we don't compact
+    size_t    gc_clock;         //gc# when last GC happened
+    size_t    time_clock;       //time when last gc started
+    size_t    gc_elapsed_time;  // Time it took for the gc to complete
+    float     gc_speed;         //  speed in bytes/msec for the gc to complete
+
+    // min_size is always the same as min_gc_size..
+    size_t    min_gc_size;
+    size_t    max_size;
+    size_t    min_size;
+    size_t    default_new_allocation;
+    size_t    fragmentation_limit;
+    float     fragmentation_burden_limit;
+    float     limit;
+    float     max_limit;
+};
+
+#define ro_in_entry 0x1
+
+#ifdef SEG_MAPPING_TABLE
+// Note that I am storing both h0 and seg0, even though in Server GC you can get to 
+// the heap* from the segment info. This is because heap_of needs to be really fast
+// and we would not want yet another indirection.
+struct seg_mapping
+{
+    // if an address is > boundary it belongs to h1; else h0.
+    // since we init h0 and h1 to 0, if we get 0 it means that
+    // address doesn't exist on managed segments. And heap_of 
+    // would just return heap0 which is what it does now.
+    uint8_t* boundary;
+#ifdef MULTIPLE_HEAPS
+    gc_heap* h0;
+    gc_heap* h1;
+#endif //MULTIPLE_HEAPS
+    // You could have an address that's inbetween 2 segments and 
+    // this would return a seg, the caller then will use 
+    // in_range_for_segment to determine if it's on that seg.
+    heap_segment* seg0; // this is what the seg for h0 is.
+    heap_segment* seg1; // this is what the seg for h1 is.
+    // Note that when frozen objects are used we mask seg1
+    // with 0x1 to indicate that there is a ro segment for
+    // this entry.
+};
+#endif //SEG_MAPPING_TABLE
+
+// alignment helpers
+//Alignment constant for allocation
+#define ALIGNCONST (DATA_ALIGNMENT-1)
+
+inline
+size_t Align (size_t nbytes, int alignment=ALIGNCONST)
+{
+    return (nbytes + alignment) & ~alignment;
+}
+
+//return alignment constant for small object heap vs large object heap
+inline
+int get_alignment_constant (BOOL small_object_p)
+{
+#ifdef FEATURE_STRUCTALIGN
+    // If any objects on the large object heap require 8-byte alignment,
+    // the compiler will tell us so.  Let's not guess an alignment here.
+    return ALIGNCONST;
+#else // FEATURE_STRUCTALIGN
+    return small_object_p ? ALIGNCONST : 7;
+#endif // FEATURE_STRUCTALIGN
+}
+
+struct etw_opt_info
+{
+    size_t desired_allocation;
+    size_t new_allocation;
+    int    gen_number;
+};
+
+enum alloc_wait_reason
+{
+    // When we don't care about firing an event for
+    // this.
+    awr_ignored = -1,
+
+    // when we detect we are in low memory
+    awr_low_memory = 0,
+
+    // when we detect the ephemeral segment is too full
+    awr_low_ephemeral = 1,
+
+    // we've given out too much budget for gen0.
+    awr_gen0_alloc = 2,
+
+    // we've given out too much budget for loh.
+    awr_loh_alloc = 3,
+
+    // this event is really obsolete - it's for pre-XP
+    // OSs where low mem notification is not supported.
+    awr_alloc_loh_low_mem = 4,
+
+    // we ran out of VM spaced to reserve on loh.
+    awr_loh_oos = 5, 
+
+    // ran out of space when allocating a small object
+    awr_gen0_oos_bgc = 6,
+
+    // ran out of space when allocating a large object
+    awr_loh_oos_bgc = 7,
+
+    // waiting for BGC to let FGC happen
+    awr_fgc_wait_for_bgc = 8,
+
+    // wait for bgc to finish to get loh seg.
+    awr_get_loh_seg = 9,
+
+    // we don't allow loh allocation during bgc planning.
+    awr_loh_alloc_during_plan = 10,
+
+    // we don't allow too much loh allocation during bgc.
+    awr_loh_alloc_during_bgc = 11
+};
+
+struct alloc_thread_wait_data
+{
+    int awr;
+};
+
+enum msl_take_state
+{
+    mt_get_large_seg,
+    mt_wait_bgc_plan,
+    mt_wait_bgc,
+    mt_block_gc,
+    mt_clr_mem,
+    mt_clr_large_mem,
+    mt_t_eph_gc,
+    mt_t_full_gc,
+    mt_alloc_small,
+    mt_alloc_large,
+    mt_alloc_small_cant,
+    mt_alloc_large_cant,
+    mt_try_alloc,
+    mt_try_budget
+};
+
+enum msl_enter_state
+{
+    me_acquire,
+    me_release
+};
+
+struct spinlock_info
+{
+    msl_enter_state enter_state;
+    msl_take_state take_state;
+    EEThreadId thread_id;
+};
+
+const unsigned HS_CACHE_LINE_SIZE = 128;
+
+#ifdef SNOOP_STATS
+struct snoop_stats_data
+{
+    int heap_index;
+
+    // total number of objects that we called
+    // gc_mark on.
+    size_t objects_checked_count;
+    // total number of time we called gc_mark
+    // on a 0 reference.
+    size_t zero_ref_count;
+    // total objects actually marked.
+    size_t objects_marked_count;
+    // number of objects written to the mark stack because
+    // of mark_stolen.
+    size_t stolen_stack_count;
+    // number of objects pushed onto the mark stack because
+    // of the partial mark code path.
+    size_t partial_stack_count;
+    // number of objects pushed onto the mark stack because
+    // of the non partial mark code path.
+    size_t normal_stack_count;
+    // number of references marked without mark stack.
+    size_t non_stack_count;
+
+    // number of times we detect next heap's mark stack
+    // is not busy.
+    size_t stack_idle_count;
+
+    // number of times we do switch to thread.
+    size_t switch_to_thread_count;
+
+    // number of times we are checking if the next heap's
+    // mark stack is busy.
+    size_t check_level_count;
+    // number of times next stack is busy and level is 
+    // at the bottom.
+    size_t busy_count;
+    // how many interlocked exchange operations we did
+    size_t interlocked_count;
+    // numer of times parent objects stolen
+    size_t partial_mark_parent_count;
+    // numer of times we look at a normal stolen entry, 
+    // or the beginning/ending PM pair.
+    size_t stolen_or_pm_count; 
+    // number of times we see 2 for the entry.
+    size_t stolen_entry_count; 
+    // number of times we see a PM entry that's not ready.
+    size_t pm_not_ready_count; 
+    // number of stolen normal marked objects and partial mark children.
+    size_t normal_count;
+    // number of times the bottom of mark stack was cleared.
+    size_t stack_bottom_clear_count;
+};
+#endif //SNOOP_STATS
+
+struct no_gc_region_info
+{
+    size_t soh_allocation_size;
+    size_t loh_allocation_size;
+    size_t started;
+    size_t num_gcs;
+    size_t num_gcs_induced;
+    start_no_gc_region_status start_status;
+    gc_pause_mode saved_pause_mode;
+    size_t saved_gen0_min_size;
+    size_t saved_gen3_min_size;
+    BOOL minimal_gc_p;
+};
+
+// if you change these, make sure you update them for sos (strike.cpp) as well.
+// 
+// !!!NOTE!!!
+// Right now I am only recording data from blocking GCs. When recording from BGC,
+// it should have its own copy just like gc_data_per_heap.
+// for BGCs we will have a very different set of datapoints to record.
+enum interesting_data_point
+{
+    idp_pre_short = 0,
+    idp_post_short = 1,
+    idp_merged_pin = 2,
+    idp_converted_pin = 3,
+    idp_pre_pin = 4,
+    idp_post_pin = 5,
+    idp_pre_and_post_pin = 6,
+    idp_pre_short_padded = 7,
+    idp_post_short_padded = 8,
+    max_idp_count
+};
+
+//class definition of the internal class
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+extern void GCProfileWalkHeapWorker(BOOL fProfilerPinned, BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+class gc_heap
+{
+    friend struct ::_DacGlobals;
+#ifdef DACCESS_COMPILE
+    friend class ::ClrDataAccess;
+    friend class ::DacHeapWalker;
+#endif //DACCESS_COMPILE
+
+    friend class GCHeap;
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    friend class CFinalize;
+#endif // FEATURE_PREMORTEM_FINALIZATION
+    friend struct ::alloc_context;
+    friend void ProfScanRootsHelper(Object** object, ScanContext *pSC, uint32_t dwFlags);
+    friend void GCProfileWalkHeapWorker(BOOL fProfilerPinned, BOOL fShouldWalkHeapRootsForEtw, BOOL fShouldWalkHeapObjectsForEtw);
+    friend class t_join;
+    friend class gc_mechanisms;
+    friend class seg_free_spaces;
+
+#ifdef BACKGROUND_GC
+    friend class exclusive_sync;
+    friend class recursive_gc_sync;
+#endif //BACKGROUND_GC
+
+#if defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
+    friend void checkGCWriteBarrier();
+    friend void initGCShadow();
+#endif //defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
+
+#ifdef MULTIPLE_HEAPS
+    typedef void (gc_heap::* card_fn) (uint8_t**, int);
+#define call_fn(fn) (this->*fn)
+#define __this this
+#else
+    typedef void (* card_fn) (uint8_t**);
+#define call_fn(fn) (*fn)
+#define __this (gc_heap*)0
+#endif
+
+public:
+
+#ifdef TRACE_GC
+    PER_HEAP
+    void print_free_list (int gen, heap_segment* seg);
+#endif // TRACE_GC
+
+#ifdef SYNCHRONIZATION_STATS
+
+    PER_HEAP_ISOLATED
+    void init_sync_stats()
+    {
+#ifdef MULTIPLE_HEAPS
+        for (int i = 0; i < gc_heap::n_heaps; i++)
+        {
+            gc_heap::g_heaps[i]->init_heap_sync_stats();
+        }
+#else  //MULTIPLE_HEAPS
+        init_heap_sync_stats();
+#endif  //MULTIPLE_HEAPS
+    }
+
+    PER_HEAP_ISOLATED
+    void print_sync_stats(unsigned int gc_count_during_log)
+    {
+        // bad/good gl acquire is accumulative during the log interval (because the numbers are too small)
+        // min/max msl_acquire is the min/max during the log interval, not each GC.
+        // Threads is however many allocation threads for the last GC.
+        // num of msl acquired, avg_msl, high and low are all for each GC.
+        printf("%2s%2s%10s%10s%12s%6s%4s%8s(  st,  wl, stw, dpw)\n",
+            "H", "T", "good_sus", "bad_sus", "avg_msl", "high", "low", "num_msl");
+
+#ifdef MULTIPLE_HEAPS
+        for (int i = 0; i < gc_heap::n_heaps; i++)
+        {
+            gc_heap::g_heaps[i]->print_heap_sync_stats(i, gc_count_during_log);
+        }
+#else  //MULTIPLE_HEAPS
+        print_heap_sync_stats(0, gc_count_during_log);
+#endif  //MULTIPLE_HEAPS
+    }
+
+#endif //SYNCHRONIZATION_STATS
+
+    PER_HEAP
+    void verify_soh_segment_list();
+    PER_HEAP
+    void verify_mark_array_cleared (heap_segment* seg);
+    PER_HEAP
+    void verify_mark_array_cleared();
+    PER_HEAP
+    void verify_seg_end_mark_array_cleared();
+    PER_HEAP
+    void verify_partial();
+
+#ifdef VERIFY_HEAP
+    PER_HEAP
+    void verify_free_lists(); 
+    PER_HEAP
+    void verify_heap (BOOL begin_gc_p);
+#endif //VERIFY_HEAP
+
+    PER_HEAP_ISOLATED
+    void fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num);
+
+    PER_HEAP_ISOLATED
+    void fire_pevents();
+
+#ifdef FEATURE_BASICFREEZE
+    static void walk_read_only_segment(heap_segment *seg, void *pvContext, object_callback_func pfnMethodTable, object_callback_func pfnObjRef);
+#endif
+
+    static
+    heap_segment* make_heap_segment (uint8_t* new_pages,
+                                     size_t size, 
+                                     int h_number);
+    static
+    l_heap* make_large_heap (uint8_t* new_pages, size_t size, BOOL managed);
+
+    static
+    gc_heap* make_gc_heap(
+#if defined (MULTIPLE_HEAPS)
+        GCHeap* vm_heap,
+        int heap_number
+#endif //MULTIPLE_HEAPS
+        );
+
+    static
+    void destroy_gc_heap(gc_heap* heap);
+
+    static
+    HRESULT initialize_gc  (size_t segment_size,
+                            size_t heap_size
+#ifdef MULTIPLE_HEAPS
+                            , unsigned number_of_heaps
+#endif //MULTIPLE_HEAPS
+        );
+
+    static
+    void shutdown_gc();
+
+    PER_HEAP
+    CObjectHeader* allocate (size_t jsize,
+                             alloc_context* acontext);
+
+#ifdef MULTIPLE_HEAPS
+    static void balance_heaps (alloc_context* acontext);
+    static 
+    gc_heap* balance_heaps_loh (alloc_context* acontext, size_t size);
+    static
+    void __stdcall gc_thread_stub (void* arg);
+#endif //MULTIPLE_HEAPS
+
+    CObjectHeader* try_fast_alloc (size_t jsize);
+
+    // For LOH allocations we only update the alloc_bytes_loh in allocation
+    // context - we don't actually use the ptr/limit from it so I am
+    // making this explicit by not passing in the alloc_context.
+    PER_HEAP
+    CObjectHeader* allocate_large_object (size_t size, int64_t& alloc_bytes);
+
+#ifdef FEATURE_STRUCTALIGN
+    PER_HEAP
+    uint8_t* pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignment, size_t size);
+#endif // FEATURE_STRUCTALIGN
+
+    PER_HEAP_ISOLATED
+    void do_pre_gc();
+
+    PER_HEAP_ISOLATED
+    void do_post_gc();
+
+    PER_HEAP
+    BOOL expand_soh_with_minimal_gc();
+
+    // EE is always suspended when this method is called.
+    // returning FALSE means we actually didn't do a GC. This happens
+    // when we figured that we needed to do a BGC.
+    PER_HEAP
+    int garbage_collect (int n);
+
+    PER_HEAP
+    void init_records();
+
+    static 
+    uint32_t* make_card_table (uint8_t* start, uint8_t* end);
+
+    static
+    void set_fgm_result (failure_get_memory f, size_t s, BOOL loh_p);
+
+    static
+    int grow_brick_card_tables (uint8_t* start,
+                                uint8_t* end,
+                                size_t size,
+                                heap_segment* new_seg, 
+                                gc_heap* hp,
+                                BOOL loh_p);
+
+    PER_HEAP
+    BOOL is_mark_set (uint8_t* o);
+
+#ifdef FEATURE_BASICFREEZE
+    PER_HEAP_ISOLATED
+    bool frozen_object_p(Object* obj);
+#endif // FEATURE_BASICFREEZE
+
+protected:
+
+    PER_HEAP
+    void walk_heap (walk_fn fn, void* context, int gen_number, BOOL walk_large_object_heap_p);
+
+    struct walk_relocate_args
+    {
+        uint8_t* last_plug;
+        BOOL is_shortened;
+        mark* pinned_plug_entry;
+    };
+
+    PER_HEAP
+    void walk_plug (uint8_t* plug, size_t size, BOOL check_last_object_p,
+                    walk_relocate_args* args, size_t profiling_context);
+
+    PER_HEAP
+    void walk_relocation (int condemned_gen_number,
+                          uint8_t* first_condemned_address, size_t profiling_context);
+
+    PER_HEAP
+    void walk_relocation_in_brick (uint8_t* tree, walk_relocate_args* args, size_t profiling_context);
+
+#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+    PER_HEAP
+    void walk_relocation_for_bgc(size_t profiling_context);
+
+    PER_HEAP
+    void make_free_lists_for_profiler_for_bgc();
+#endif // defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+
+    PER_HEAP
+    int generation_to_condemn (int n, 
+                               BOOL* blocking_collection_p,
+                               BOOL* elevation_requested_p,
+                               BOOL check_only_p);
+
+    PER_HEAP_ISOLATED
+    int joined_generation_to_condemn (BOOL should_evaluate_elevation, int n_initial, BOOL* blocking_collection
+                                        STRESS_HEAP_ARG(int n_original));
+
+    PER_HEAP
+    size_t min_reclaim_fragmentation_threshold (uint32_t num_heaps);
+
+    PER_HEAP_ISOLATED
+    uint64_t min_high_fragmentation_threshold (uint64_t available_mem, uint32_t num_heaps);
+
+    PER_HEAP
+    void concurrent_print_time_delta (const char* msg);
+    PER_HEAP
+    void free_list_info (int gen_num, const char* msg);
+
+    // in svr GC on entry and exit of this method, the GC threads are not 
+    // synchronized
+    PER_HEAP
+    void gc1();
+
+    PER_HEAP_ISOLATED
+    void save_data_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    void restore_data_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    void update_collection_counts_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    BOOL should_proceed_with_gc();
+
+    PER_HEAP_ISOLATED
+    void record_gcs_during_no_gc();
+
+    PER_HEAP
+    BOOL find_loh_free_for_no_gc();
+
+    PER_HEAP
+    BOOL find_loh_space_for_no_gc();
+
+    PER_HEAP
+    BOOL commit_loh_for_no_gc (heap_segment* seg);
+
+    PER_HEAP_ISOLATED
+    start_no_gc_region_status prepare_for_no_gc_region (uint64_t total_size,
+                                                        BOOL loh_size_known,
+                                                        uint64_t loh_size,
+                                                        BOOL disallow_full_blocking);
+
+    PER_HEAP
+    BOOL loh_allocated_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    void release_no_gc_loh_segments();    
+
+    PER_HEAP_ISOLATED
+    void thread_no_gc_loh_segments();
+
+    PER_HEAP
+    void allocate_for_no_gc_after_gc();
+
+    PER_HEAP
+    void set_loh_allocations_for_no_gc();
+
+    PER_HEAP
+    void set_soh_allocations_for_no_gc();
+
+    PER_HEAP
+    void prepare_for_no_gc_after_gc();
+
+    PER_HEAP_ISOLATED
+    void set_allocations_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    BOOL should_proceed_for_no_gc();
+
+    PER_HEAP_ISOLATED
+    start_no_gc_region_status get_start_no_gc_region_status();
+
+    PER_HEAP_ISOLATED
+    end_no_gc_region_status end_no_gc_region();
+
+    PER_HEAP_ISOLATED
+    void handle_failure_for_no_gc();
+
+    PER_HEAP
+    void fire_etw_allocation_event (size_t allocation_amount, int gen_number, uint8_t* object_address);
+
+    PER_HEAP
+    void fire_etw_pin_object_event (uint8_t* object, uint8_t** ppObject);
+
+    PER_HEAP
+    size_t limit_from_size (size_t size, size_t room, int gen_number,
+                            int align_const);
+    PER_HEAP
+    int try_allocate_more_space (alloc_context* acontext, size_t jsize,
+                                 int alloc_generation_number);
+    PER_HEAP
+    BOOL allocate_more_space (alloc_context* acontext, size_t jsize,
+                              int alloc_generation_number);
+
+    PER_HEAP
+    size_t get_full_compact_gc_count();
+
+    PER_HEAP
+    BOOL short_on_end_of_seg (int gen_number,
+                              heap_segment* seg,
+                              int align_const);
+
+    PER_HEAP
+    BOOL a_fit_free_list_p (int gen_number, 
+                            size_t size, 
+                            alloc_context* acontext,
+                            int align_const);
+
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    void wait_for_background (alloc_wait_reason awr);
+
+    PER_HEAP
+    void wait_for_bgc_high_memory (alloc_wait_reason awr);
+
+    PER_HEAP
+    void bgc_loh_alloc_clr (uint8_t* alloc_start,
+                            size_t size, 
+                            alloc_context* acontext,
+                            int align_const, 
+                            int lock_index,
+                            BOOL check_used_p,
+                            heap_segment* seg);
+#endif //BACKGROUND_GC
+    
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    void wait_for_background_planning (alloc_wait_reason awr);
+
+    PER_HEAP
+    BOOL bgc_loh_should_allocate();
+#endif //BACKGROUND_GC
+
+#define max_saved_spinlock_info 48
+
+#ifdef SPINLOCK_HISTORY
+    PER_HEAP
+    int spinlock_info_index;
+
+    PER_HEAP
+    spinlock_info last_spinlock_info[max_saved_spinlock_info + 8];
+#endif //SPINLOCK_HISTORY
+
+    PER_HEAP
+    void add_saved_spinlock_info (
+            msl_enter_state enter_state, 
+            msl_take_state take_state);
+
+    PER_HEAP
+    BOOL a_fit_free_list_large_p (size_t size, 
+                                  alloc_context* acontext,
+                                  int align_const);
+
+    PER_HEAP
+    BOOL a_fit_segment_end_p (int gen_number,
+                              heap_segment* seg,
+                              size_t size, 
+                              alloc_context* acontext,
+                              int align_const,
+                              BOOL* commit_failed_p);
+    PER_HEAP
+    BOOL loh_a_fit_segment_end_p (int gen_number,
+                                  size_t size, 
+                                  alloc_context* acontext,
+                                  int align_const,
+                                  BOOL* commit_failed_p,
+                                  oom_reason* oom_r);
+    PER_HEAP
+    BOOL loh_get_new_seg (generation* gen,
+                          size_t size,
+                          int align_const,
+                          BOOL* commit_failed_p,
+                          oom_reason* oom_r);
+
+    PER_HEAP_ISOLATED
+    size_t get_large_seg_size (size_t size);
+
+    PER_HEAP
+    BOOL retry_full_compact_gc (size_t size);
+
+    PER_HEAP
+    BOOL check_and_wait_for_bgc (alloc_wait_reason awr,
+                                 BOOL* did_full_compact_gc);
+
+    PER_HEAP
+    BOOL trigger_full_compact_gc (gc_reason gr, 
+                                  oom_reason* oom_r);
+
+    PER_HEAP
+    BOOL trigger_ephemeral_gc (gc_reason gr);
+
+    PER_HEAP
+    BOOL soh_try_fit (int gen_number,
+                      size_t size, 
+                      alloc_context* acontext,
+                      int align_const,
+                      BOOL* commit_failed_p,
+                      BOOL* short_seg_end_p);
+    PER_HEAP
+    BOOL loh_try_fit (int gen_number,
+                      size_t size, 
+                      alloc_context* acontext,
+                      int align_const,
+                      BOOL* commit_failed_p,
+                      oom_reason* oom_r);
+
+    PER_HEAP
+    BOOL allocate_small (int gen_number,
+                         size_t size, 
+                         alloc_context* acontext,
+                         int align_const);
+
+    enum c_gc_state
+    {
+        c_gc_state_marking,
+        c_gc_state_planning,
+        c_gc_state_free
+    };
+
+#ifdef RECORD_LOH_STATE
+    #define max_saved_loh_states 12
+    PER_HEAP
+    int loh_state_index;
+
+    struct loh_state_info
+    {
+        allocation_state alloc_state;
+        EEThreadId thread_id;
+    };
+
+    PER_HEAP
+    loh_state_info last_loh_states[max_saved_loh_states];
+    PER_HEAP
+    void add_saved_loh_state (allocation_state loh_state_to_save, EEThreadId thread_id);
+#endif //RECORD_LOH_STATE
+    PER_HEAP
+    BOOL allocate_large (int gen_number,
+                         size_t size, 
+                         alloc_context* acontext,
+                         int align_const);
+
+    PER_HEAP_ISOLATED
+    int init_semi_shared();
+    PER_HEAP
+    int init_gc_heap (int heap_number);
+    PER_HEAP
+    void self_destroy();
+    PER_HEAP_ISOLATED
+    void destroy_semi_shared();
+    PER_HEAP
+    void repair_allocation_contexts (BOOL repair_p);
+    PER_HEAP
+    void fix_allocation_contexts (BOOL for_gc_p);
+    PER_HEAP
+    void fix_youngest_allocation_area (BOOL for_gc_p);
+    PER_HEAP
+    void fix_allocation_context (alloc_context* acontext, BOOL for_gc_p,
+                                 int align_const);
+    PER_HEAP
+    void fix_large_allocation_area (BOOL for_gc_p);
+    PER_HEAP
+    void fix_older_allocation_area (generation* older_gen);
+    PER_HEAP
+    void set_allocation_heap_segment (generation* gen);
+    PER_HEAP
+    void reset_allocation_pointers (generation* gen, uint8_t* start);
+    PER_HEAP
+    int object_gennum (uint8_t* o);
+    PER_HEAP
+    int object_gennum_plan (uint8_t* o);
+    PER_HEAP_ISOLATED
+    void init_heap_segment (heap_segment* seg);
+    PER_HEAP
+    void delete_heap_segment (heap_segment* seg, BOOL consider_hoarding=FALSE);
+#ifdef FEATURE_BASICFREEZE
+    PER_HEAP
+    BOOL insert_ro_segment (heap_segment* seg);
+    PER_HEAP
+    void remove_ro_segment (heap_segment* seg);
+#endif //FEATURE_BASICFREEZE
+    PER_HEAP
+    BOOL set_ro_segment_in_range (heap_segment* seg);
+    PER_HEAP
+    BOOL unprotect_segment (heap_segment* seg);
+    PER_HEAP
+    heap_segment* soh_get_segment_to_expand();
+    PER_HEAP
+    heap_segment* get_segment (size_t size, BOOL loh_p);
+    PER_HEAP_ISOLATED
+    void seg_mapping_table_add_segment (heap_segment* seg, gc_heap* hp);
+    PER_HEAP_ISOLATED
+    void seg_mapping_table_remove_segment (heap_segment* seg);
+    PER_HEAP
+    heap_segment* get_large_segment (size_t size, BOOL* did_full_compact_gc);
+    PER_HEAP
+    void thread_loh_segment (heap_segment* new_seg);
+    PER_HEAP_ISOLATED
+    heap_segment* get_segment_for_loh (size_t size
+#ifdef MULTIPLE_HEAPS
+                                      , gc_heap* hp
+#endif //MULTIPLE_HEAPS
+                                      );
+    PER_HEAP
+    void reset_heap_segment_pages (heap_segment* seg);
+    PER_HEAP
+    void decommit_heap_segment_pages (heap_segment* seg, size_t extra_space);
+    PER_HEAP
+    void decommit_heap_segment (heap_segment* seg);
+    PER_HEAP
+    void clear_gen0_bricks();
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    void rearrange_small_heap_segments();
+#endif //BACKGROUND_GC
+    PER_HEAP
+    void rearrange_large_heap_segments();
+    PER_HEAP
+    void rearrange_heap_segments(BOOL compacting);
+
+    PER_HEAP_ISOLATED
+    void reset_write_watch_for_gc_heap(void* base_address, size_t region_size);
+    PER_HEAP_ISOLATED
+    void get_write_watch_for_gc_heap(bool reset, void *base_address, size_t region_size, void** dirty_pages, uintptr_t* dirty_page_count_ref, bool is_runtime_suspended);
+
+    PER_HEAP
+    void switch_one_quantum();
+    PER_HEAP
+    void reset_ww_by_chunk (uint8_t* start_address, size_t total_reset_size);
+    PER_HEAP
+    void switch_on_reset (BOOL concurrent_p, size_t* current_total_reset_size, size_t last_reset_size);
+    PER_HEAP
+    void reset_write_watch (BOOL concurrent_p);
+    PER_HEAP
+    void adjust_ephemeral_limits (bool is_runtime_suspended);
+    PER_HEAP
+    void make_generation (generation& gen, heap_segment* seg,
+                          uint8_t* start, uint8_t* pointer);
+
+
+#define USE_PADDING_FRONT 1
+#define USE_PADDING_TAIL  2
+
+    PER_HEAP
+    BOOL size_fit_p (size_t size REQD_ALIGN_AND_OFFSET_DCL, uint8_t* alloc_pointer, uint8_t* alloc_limit,
+                     uint8_t* old_loc=0, int use_padding=USE_PADDING_TAIL);
+    PER_HEAP
+    BOOL a_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit,
+                       int align_const);
+
+    PER_HEAP
+    void handle_oom (int heap_num, oom_reason reason, size_t alloc_size, 
+                     uint8_t* allocated, uint8_t* reserved);
+
+    PER_HEAP
+    size_t card_of ( uint8_t* object);
+    PER_HEAP
+    uint8_t* brick_address (size_t brick);
+    PER_HEAP
+    size_t brick_of (uint8_t* add);
+    PER_HEAP
+    uint8_t* card_address (size_t card);
+    PER_HEAP
+    size_t card_to_brick (size_t card);
+    PER_HEAP
+    void clear_card (size_t card);
+    PER_HEAP
+    void set_card (size_t card);
+    PER_HEAP
+    BOOL  card_set_p (size_t card);
+    PER_HEAP
+    void card_table_set_bit (uint8_t* location);
+
+#ifdef CARD_BUNDLE
+    PER_HEAP
+    void update_card_table_bundle();
+    PER_HEAP
+    void reset_card_table_write_watch();
+    PER_HEAP
+    void card_bundle_clear(size_t cardb);
+    PER_HEAP
+    void card_bundles_set (size_t start_cardb, size_t end_cardb);
+    PER_HEAP
+    BOOL card_bundle_set_p (size_t cardb);
+    PER_HEAP
+    BOOL find_card_dword (size_t& cardw, size_t cardw_end);
+    PER_HEAP
+    void enable_card_bundles();
+    PER_HEAP_ISOLATED
+    BOOL card_bundles_enabled();
+
+#endif //CARD_BUNDLE
+
+    PER_HEAP
+    BOOL find_card (uint32_t* card_table, size_t& card,
+                    size_t card_word_end, size_t& end_card);
+    PER_HEAP
+    BOOL grow_heap_segment (heap_segment* seg, uint8_t* high_address);
+    PER_HEAP
+    int grow_heap_segment (heap_segment* seg, uint8_t* high_address, uint8_t* old_loc, size_t size, BOOL pad_front_p REQD_ALIGN_AND_OFFSET_DCL);
+    PER_HEAP
+    void copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
+                                short* old_brick_table,
+                                heap_segment* seg,
+                                uint8_t* start, uint8_t* end);
+    PER_HEAP
+    void init_brick_card_range (heap_segment* seg);
+    PER_HEAP
+    void copy_brick_card_table_l_heap ();
+    PER_HEAP
+    void copy_brick_card_table();
+    PER_HEAP
+    void clear_brick_table (uint8_t* from, uint8_t* end);
+    PER_HEAP
+    void set_brick (size_t index, ptrdiff_t val);
+    PER_HEAP
+    int brick_entry (size_t index);
+#ifdef MARK_ARRAY
+    PER_HEAP
+    unsigned int mark_array_marked (uint8_t* add);
+    PER_HEAP
+    void mark_array_set_marked (uint8_t* add);
+    PER_HEAP
+    BOOL is_mark_bit_set (uint8_t* add);
+    PER_HEAP
+    void gmark_array_set_marked (uint8_t* add);
+    PER_HEAP
+    void set_mark_array_bit (size_t mark_bit);
+    PER_HEAP
+    BOOL mark_array_bit_set (size_t mark_bit);
+    PER_HEAP
+    void mark_array_clear_marked (uint8_t* add);
+    PER_HEAP
+    void clear_mark_array (uint8_t* from, uint8_t* end, BOOL check_only=TRUE
+#ifdef FEATURE_BASICFREEZE
+        , BOOL read_only=FALSE
+#endif // FEATURE_BASICFREEZE
+        );
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    void seg_clear_mark_array_bits_soh (heap_segment* seg);
+    PER_HEAP
+    void clear_batch_mark_array_bits (uint8_t* start, uint8_t* end);
+    PER_HEAP
+    void bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end);
+    PER_HEAP
+    void clear_mark_array_by_objects (uint8_t* from, uint8_t* end, BOOL loh_p);
+#ifdef VERIFY_HEAP
+    PER_HEAP
+    void set_batch_mark_array_bits (uint8_t* start, uint8_t* end);
+    PER_HEAP
+    void check_batch_mark_array_bits (uint8_t* start, uint8_t* end);
+#endif //VERIFY_HEAP
+#endif //BACKGROUND_GC
+#endif //MARK_ARRAY
+
+    PER_HEAP
+    BOOL large_object_marked (uint8_t* o, BOOL clearp);
+
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    BOOL background_allowed_p();
+#endif //BACKGROUND_GC
+
+    PER_HEAP_ISOLATED
+    void send_full_gc_notification (int gen_num, BOOL due_to_alloc_p);
+
+    PER_HEAP
+    void check_for_full_gc (int gen_num, size_t size);
+
+    PER_HEAP
+    void adjust_limit (uint8_t* start, size_t limit_size, generation* gen,
+                       int gen_number);
+    PER_HEAP
+    void adjust_limit_clr (uint8_t* start, size_t limit_size,
+                           alloc_context* acontext, heap_segment* seg,
+                           int align_const, int gen_number);
+    PER_HEAP
+    void  leave_allocation_segment (generation* gen);
+
+    PER_HEAP
+    void init_free_and_plug();
+
+    PER_HEAP
+    void print_free_and_plug (const char* msg);
+
+    PER_HEAP
+    void add_gen_plug (int gen_number, size_t plug_size);
+
+    PER_HEAP
+    void add_gen_free (int gen_number, size_t free_size);
+
+    PER_HEAP
+    void add_item_to_current_pinned_free (int gen_number, size_t free_size);
+    
+    PER_HEAP
+    void remove_gen_free (int gen_number, size_t free_size);
+
+    PER_HEAP
+    uint8_t* allocate_in_older_generation (generation* gen, size_t size,
+                                        int from_gen_number,
+                                        uint8_t* old_loc=0
+                                        REQD_ALIGN_AND_OFFSET_DEFAULT_DCL);
+    PER_HEAP
+    generation*  ensure_ephemeral_heap_segment (generation* consing_gen);
+    PER_HEAP
+    uint8_t* allocate_in_condemned_generations (generation* gen,
+                                             size_t size,
+                                             int from_gen_number,
+#ifdef SHORT_PLUGS
+                                             BOOL* convert_to_pinned_p=NULL,
+                                             uint8_t* next_pinned_plug=0,
+                                             heap_segment* current_seg=0,
+#endif //SHORT_PLUGS
+                                             uint8_t* old_loc=0
+                                             REQD_ALIGN_AND_OFFSET_DEFAULT_DCL);
+#ifdef INTERIOR_POINTERS
+    // Verifies that interior is actually in the range of seg; otherwise 
+    // returns 0.
+    PER_HEAP_ISOLATED
+    heap_segment* find_segment (uint8_t* interior, BOOL small_segment_only_p);
+
+    PER_HEAP
+    heap_segment* find_segment_per_heap (uint8_t* interior, BOOL small_segment_only_p);
+
+    PER_HEAP
+    uint8_t* find_object_for_relocation (uint8_t* o, uint8_t* low, uint8_t* high);
+#endif //INTERIOR_POINTERS
+
+    PER_HEAP_ISOLATED
+    gc_heap* heap_of (uint8_t* object);
+
+    PER_HEAP_ISOLATED
+    gc_heap* heap_of_gc (uint8_t* object);
+
+    PER_HEAP_ISOLATED
+    size_t&  promoted_bytes (int);
+
+    PER_HEAP
+    uint8_t* find_object (uint8_t* o, uint8_t* low);
+
+    PER_HEAP
+    dynamic_data* dynamic_data_of (int gen_number);
+    PER_HEAP
+    ptrdiff_t  get_desired_allocation (int gen_number);
+    PER_HEAP
+    ptrdiff_t  get_new_allocation (int gen_number);
+    PER_HEAP
+    ptrdiff_t  get_allocation (int gen_number);
+    PER_HEAP
+    bool new_allocation_allowed (int gen_number);
+#ifdef BACKGROUND_GC
+    PER_HEAP_ISOLATED
+    void allow_new_allocation (int gen_number);
+    PER_HEAP_ISOLATED
+    void disallow_new_allocation (int gen_number);
+#endif //BACKGROUND_GC
+    PER_HEAP
+    void reset_pinned_queue();
+    PER_HEAP
+    void reset_pinned_queue_bos();
+    PER_HEAP
+    void set_allocator_next_pin (generation* gen);
+    PER_HEAP
+    void set_allocator_next_pin (uint8_t* alloc_pointer, uint8_t*& alloc_limit);
+    PER_HEAP
+    void enque_pinned_plug (generation* gen, uint8_t* plug, size_t len);
+    PER_HEAP
+    void enque_pinned_plug (uint8_t* plug,
+                            BOOL save_pre_plug_info_p,
+                            uint8_t* last_object_in_last_plug);
+    PER_HEAP
+    void merge_with_last_pinned_plug (uint8_t* last_pinned_plug, size_t plug_size);
+    PER_HEAP
+    void set_pinned_info (uint8_t* last_pinned_plug,
+                          size_t plug_len,
+                          uint8_t* alloc_pointer,
+                          uint8_t*& alloc_limit);
+    PER_HEAP
+    void set_pinned_info (uint8_t* last_pinned_plug, size_t plug_len, generation* gen);
+    PER_HEAP
+    void save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_object_in_last_plug, uint8_t* post_plug);
+    PER_HEAP
+    size_t deque_pinned_plug ();
+    PER_HEAP
+    mark* pinned_plug_of (size_t bos);
+    PER_HEAP
+    mark* oldest_pin ();
+    PER_HEAP
+    mark* before_oldest_pin();
+    PER_HEAP
+    BOOL pinned_plug_que_empty_p ();
+    PER_HEAP
+    void make_mark_stack (mark* arr);
+#ifdef MH_SC_MARK
+    PER_HEAP
+    int& mark_stack_busy();
+    PER_HEAP
+    VOLATILE(uint8_t*)& ref_mark_stack (gc_heap* hp, int index);
+#endif
+#ifdef BACKGROUND_GC
+    PER_HEAP_ISOLATED
+    size_t&  bpromoted_bytes (int);
+    PER_HEAP
+    void make_background_mark_stack (uint8_t** arr);
+    PER_HEAP
+    void make_c_mark_list (uint8_t** arr);
+#endif //BACKGROUND_GC
+    PER_HEAP
+    generation* generation_of (int  n);
+    PER_HEAP
+    BOOL gc_mark1 (uint8_t* o);
+    PER_HEAP
+    BOOL gc_mark (uint8_t* o, uint8_t* low, uint8_t* high);
+    PER_HEAP
+    uint8_t* mark_object(uint8_t* o THREAD_NUMBER_DCL);
+#ifdef HEAP_ANALYZE
+    PER_HEAP
+    void ha_mark_object_simple (uint8_t** o THREAD_NUMBER_DCL);
+#endif //HEAP_ANALYZE
+    PER_HEAP
+    void mark_object_simple (uint8_t** o THREAD_NUMBER_DCL);
+    PER_HEAP
+    void mark_object_simple1 (uint8_t* o, uint8_t* start THREAD_NUMBER_DCL);
+
+#ifdef MH_SC_MARK
+    PER_HEAP
+    void mark_steal ();
+#endif //MH_SC_MARK
+
+#ifdef BACKGROUND_GC
+
+    PER_HEAP
+    BOOL background_marked (uint8_t* o);
+    PER_HEAP
+    BOOL background_mark1 (uint8_t* o);
+    PER_HEAP
+    BOOL background_mark (uint8_t* o, uint8_t* low, uint8_t* high);
+    PER_HEAP
+    uint8_t* background_mark_object (uint8_t* o THREAD_NUMBER_DCL);
+    PER_HEAP
+    void background_mark_simple (uint8_t* o THREAD_NUMBER_DCL);
+    PER_HEAP
+    void background_mark_simple1 (uint8_t* o THREAD_NUMBER_DCL);
+    PER_HEAP_ISOLATED
+    void background_promote (Object**, ScanContext* , uint32_t);
+    PER_HEAP
+    BOOL background_object_marked (uint8_t* o, BOOL clearp);
+    PER_HEAP
+    void init_background_gc();
+    PER_HEAP
+    uint8_t* background_next_end (heap_segment*, BOOL);
+    PER_HEAP
+    void generation_delete_heap_segment (generation*, 
+                                         heap_segment*, heap_segment*, heap_segment*);
+    PER_HEAP
+    void set_mem_verify (uint8_t*, uint8_t*, uint8_t);
+    PER_HEAP
+    void process_background_segment_end (heap_segment*, generation*, uint8_t*,
+                                     heap_segment*, BOOL*);
+    PER_HEAP
+    void process_n_background_segments (heap_segment*, heap_segment*, generation* gen);
+    PER_HEAP
+    BOOL fgc_should_consider_object (uint8_t* o,
+                                     heap_segment* seg,
+                                     BOOL consider_bgc_mark_p,
+                                     BOOL check_current_sweep_p,
+                                     BOOL check_saved_sweep_p);
+    PER_HEAP
+    void should_check_bgc_mark (heap_segment* seg, 
+                                BOOL* consider_bgc_mark_p, 
+                                BOOL* check_current_sweep_p,
+                                BOOL* check_saved_sweep_p);
+    PER_HEAP
+    void background_ephemeral_sweep();
+    PER_HEAP
+    void background_sweep ();
+    PER_HEAP
+    void background_mark_through_object (uint8_t* oo THREAD_NUMBER_DCL);
+    PER_HEAP
+    uint8_t* background_seg_end (heap_segment* seg, BOOL concurrent_p);
+    PER_HEAP
+    uint8_t* background_first_overflow (uint8_t* min_add,
+                                     heap_segment* seg,
+                                     BOOL concurrent_p, 
+                                     BOOL small_object_p);
+    PER_HEAP
+    void background_process_mark_overflow_internal (int condemned_gen_number,
+                                                    uint8_t* min_add, uint8_t* max_add,
+                                                    BOOL concurrent_p);
+    PER_HEAP
+    BOOL background_process_mark_overflow (BOOL concurrent_p);
+
+    // for foreground GC to get hold of background structures containing refs
+    PER_HEAP
+    void
+    scan_background_roots (promote_func* fn, int hn, ScanContext *pSC);
+
+    PER_HEAP
+    BOOL bgc_mark_array_range (heap_segment* seg, 
+                               BOOL whole_seg_p,
+                               uint8_t** range_beg,
+                               uint8_t** range_end);
+    PER_HEAP
+    void bgc_verify_mark_array_cleared (heap_segment* seg);
+    PER_HEAP
+    void verify_mark_bits_cleared (uint8_t* obj, size_t s);
+    PER_HEAP
+    void clear_all_mark_array();
+#endif //BACKGROUND_GC
+
+    PER_HEAP
+    uint8_t* next_end (heap_segment* seg, uint8_t* f);
+    PER_HEAP
+    void fix_card_table ();
+    PER_HEAP
+    void mark_through_object (uint8_t* oo, BOOL mark_class_object_p THREAD_NUMBER_DCL);
+    PER_HEAP
+    BOOL process_mark_overflow (int condemned_gen_number);
+    PER_HEAP
+    void process_mark_overflow_internal (int condemned_gen_number,
+                                         uint8_t* min_address, uint8_t* max_address);
+
+#ifdef SNOOP_STATS
+    PER_HEAP
+    void print_snoop_stat();
+#endif //SNOOP_STATS
+
+#ifdef MH_SC_MARK
+
+    PER_HEAP
+    BOOL check_next_mark_stack (gc_heap* next_heap);
+
+#endif //MH_SC_MARK
+
+    PER_HEAP
+    void scan_dependent_handles (int condemned_gen_number, ScanContext *sc, BOOL initial_scan_p);
+
+    PER_HEAP
+    void mark_phase (int condemned_gen_number, BOOL mark_only_p);
+
+    PER_HEAP
+    void pin_object (uint8_t* o, uint8_t** ppObject, uint8_t* low, uint8_t* high);
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    PER_HEAP_ISOLATED
+    size_t get_total_pinned_objects();
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+    PER_HEAP
+    void reset_mark_stack ();
+    PER_HEAP
+    uint8_t* insert_node (uint8_t* new_node, size_t sequence_number,
+                       uint8_t* tree, uint8_t* last_node);
+    PER_HEAP
+    size_t update_brick_table (uint8_t* tree, size_t current_brick,
+                               uint8_t* x, uint8_t* plug_end);
+
+    PER_HEAP
+    void plan_generation_start (generation* gen, generation* consing_gen, uint8_t* next_plug_to_allocate);
+
+    PER_HEAP
+    void realloc_plan_generation_start (generation* gen, generation* consing_gen);
+
+    PER_HEAP
+    void plan_generation_starts (generation*& consing_gen);
+
+    PER_HEAP
+    void advance_pins_for_demotion (generation* gen);
+
+    PER_HEAP
+    void process_ephemeral_boundaries(uint8_t* x, int& active_new_gen_number,
+                                      int& active_old_gen_number,
+                                      generation*& consing_gen,
+                                      BOOL& allocate_in_condemned);
+    PER_HEAP
+    void seg_clear_mark_bits (heap_segment* seg);
+    PER_HEAP
+    void sweep_ro_segments (heap_segment* start_seg);
+    PER_HEAP
+    void convert_to_pinned_plug (BOOL& last_npinned_plug_p, 
+                                 BOOL& last_pinned_plug_p, 
+                                 BOOL& pinned_plug_p,
+                                 size_t ps,
+                                 size_t& artificial_pinned_size);
+    PER_HEAP
+    void store_plug_gap_info (uint8_t* plug_start,
+                              uint8_t* plug_end,
+                              BOOL& last_npinned_plug_p, 
+                              BOOL& last_pinned_plug_p, 
+                              uint8_t*& last_pinned_plug,
+                              BOOL& pinned_plug_p,
+                              uint8_t* last_object_in_last_plug,
+                              BOOL& merge_with_last_pin_p,
+                              // this is only for verification purpose
+                              size_t last_plug_len);
+    PER_HEAP
+    void plan_phase (int condemned_gen_number);
+
+    PER_HEAP
+    void record_interesting_data_point (interesting_data_point idp);
+
+#ifdef GC_CONFIG_DRIVEN
+    PER_HEAP
+    void record_interesting_info_per_heap();
+    PER_HEAP_ISOLATED
+    void record_global_mechanisms();
+    PER_HEAP_ISOLATED
+    BOOL should_do_sweeping_gc (BOOL compact_p);
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef FEATURE_LOH_COMPACTION
+    // plan_loh can allocate memory so it can fail. If it fails, we will
+    // fall back to sweeping.  
+    PER_HEAP
+    BOOL plan_loh();
+
+    PER_HEAP
+    void compact_loh();
+
+    PER_HEAP
+    void relocate_in_loh_compact();
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    PER_HEAP
+    void walk_relocation_loh (size_t profiling_context);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+    PER_HEAP
+    BOOL loh_enque_pinned_plug (uint8_t* plug, size_t len);
+
+    PER_HEAP
+    void loh_set_allocator_next_pin();
+
+    PER_HEAP
+    BOOL loh_pinned_plug_que_empty_p();
+
+    PER_HEAP
+    size_t loh_deque_pinned_plug();
+
+    PER_HEAP
+    mark* loh_pinned_plug_of (size_t bos);
+
+    PER_HEAP
+    mark* loh_oldest_pin();
+
+    PER_HEAP
+    BOOL loh_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_limit);
+
+    PER_HEAP
+    uint8_t* loh_allocate_in_condemned (uint8_t* old_loc, size_t size);
+
+    PER_HEAP_ISOLATED
+    BOOL loh_object_p (uint8_t* o);
+
+    PER_HEAP_ISOLATED
+    BOOL should_compact_loh();
+
+    // If the LOH compaction mode is just to compact once,
+    // we need to see if we should reset it back to not compact.
+    // We would only reset if every heap's LOH was compacted.
+    PER_HEAP_ISOLATED
+    void check_loh_compact_mode  (BOOL all_heaps_compacted_p);
+#endif //FEATURE_LOH_COMPACTION
+
+    PER_HEAP
+    void decommit_ephemeral_segment_pages (int condemned_gen_number);
+    PER_HEAP
+    void fix_generation_bounds (int condemned_gen_number,
+                                generation* consing_gen);
+    PER_HEAP
+    uint8_t* generation_limit (int gen_number);
+
+    struct make_free_args
+    {
+        int free_list_gen_number;
+        uint8_t* current_gen_limit;
+        generation* free_list_gen;
+        uint8_t* highest_plug;
+    };
+    PER_HEAP
+    uint8_t* allocate_at_end (size_t size);
+    PER_HEAP
+    BOOL ensure_gap_allocation (int condemned_gen_number);
+    // make_free_lists is only called by blocking GCs.
+    PER_HEAP
+    void make_free_lists (int condemned_gen_number);
+    PER_HEAP
+    void make_free_list_in_brick (uint8_t* tree, make_free_args* args);
+    PER_HEAP
+    void thread_gap (uint8_t* gap_start, size_t size, generation*  gen);
+    PER_HEAP
+    void loh_thread_gap_front (uint8_t* gap_start, size_t size, generation*  gen);
+    PER_HEAP
+    void make_unused_array (uint8_t* x, size_t size, BOOL clearp=FALSE, BOOL resetp=FALSE);
+    PER_HEAP
+    void clear_unused_array (uint8_t* x, size_t size);
+    PER_HEAP
+    void relocate_address (uint8_t** old_address THREAD_NUMBER_DCL);
+    struct relocate_args
+    {
+        uint8_t* last_plug;
+        uint8_t* low;
+        uint8_t* high;
+        BOOL is_shortened;
+        mark* pinned_plug_entry;
+    };
+
+    PER_HEAP
+    void reloc_survivor_helper (uint8_t** pval);
+    PER_HEAP
+    void check_class_object_demotion (uint8_t* obj);
+    PER_HEAP
+    void check_class_object_demotion_internal (uint8_t* obj);
+
+    PER_HEAP 
+    void check_demotion_helper (uint8_t** pval, uint8_t* parent_obj);
+
+    PER_HEAP
+    void relocate_survivor_helper (uint8_t* plug, uint8_t* plug_end);
+
+    PER_HEAP
+    void verify_pins_with_post_plug_info (const char* msg);
+
+#ifdef COLLECTIBLE_CLASS
+    PER_HEAP
+    void unconditional_set_card_collectible (uint8_t* obj);
+#endif //COLLECTIBLE_CLASS
+
+    PER_HEAP
+    void relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_end, mark* pinned_plug_entry);
+    
+    PER_HEAP
+    void relocate_obj_helper (uint8_t* x, size_t s);
+
+    PER_HEAP
+    void reloc_ref_in_shortened_obj (uint8_t** address_to_set_card, uint8_t** address_to_reloc);
+
+    PER_HEAP
+    void relocate_pre_plug_info (mark* pinned_plug_entry);
+
+    PER_HEAP
+    void relocate_shortened_obj_helper (uint8_t* x, size_t s, uint8_t* end, mark* pinned_plug_entry, BOOL is_pinned);
+
+    PER_HEAP
+    void relocate_survivors_in_plug (uint8_t* plug, uint8_t* plug_end,
+                                     BOOL check_last_object_p, 
+                                     mark* pinned_plug_entry);
+    PER_HEAP
+    void relocate_survivors_in_brick (uint8_t* tree, relocate_args* args);
+
+    PER_HEAP
+    void update_oldest_pinned_plug();
+
+    PER_HEAP
+    void relocate_survivors (int condemned_gen_number,
+                             uint8_t* first_condemned_address );
+    PER_HEAP
+    void relocate_phase (int condemned_gen_number,
+                         uint8_t* first_condemned_address);
+
+    struct compact_args
+    {
+        BOOL copy_cards_p;
+        uint8_t* last_plug;
+        ptrdiff_t last_plug_relocation;
+        uint8_t* before_last_plug;
+        size_t current_compacted_brick;
+        BOOL is_shortened;
+        mark* pinned_plug_entry;
+        BOOL check_gennum_p;
+        int src_gennum;
+
+        void print()
+        {
+            dprintf (3, ("last plug: %Ix, last plug reloc: %Ix, before last: %Ix, b: %Ix",
+                last_plug, last_plug_relocation, before_last_plug, current_compacted_brick));
+        }
+    };
+
+    PER_HEAP
+    void copy_cards_range (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p);
+    PER_HEAP
+    void  gcmemcopy (uint8_t* dest, uint8_t* src, size_t len, BOOL copy_cards_p);
+    PER_HEAP
+    void compact_plug (uint8_t* plug, size_t size, BOOL check_last_object_p, compact_args* args);
+    PER_HEAP
+    void compact_in_brick (uint8_t* tree, compact_args* args);
+
+    PER_HEAP
+    mark* get_next_pinned_entry (uint8_t* tree,
+                                 BOOL* has_pre_plug_info_p,
+                                 BOOL* has_post_plug_info_p,
+                                 BOOL deque_p=TRUE);
+
+    PER_HEAP
+    mark* get_oldest_pinned_entry (BOOL* has_pre_plug_info_p, BOOL* has_post_plug_info_p);
+
+    PER_HEAP
+    void recover_saved_pinned_info();
+
+    PER_HEAP
+    void compact_phase (int condemned_gen_number, uint8_t*
+                        first_condemned_address, BOOL clear_cards);
+    PER_HEAP
+    void clear_cards (size_t start_card, size_t end_card);
+    PER_HEAP
+    void clear_card_for_addresses (uint8_t* start_address, uint8_t* end_address);
+    PER_HEAP
+    void copy_cards (size_t dst_card, size_t src_card,
+                     size_t end_card, BOOL nextp);
+    PER_HEAP
+    void copy_cards_for_addresses (uint8_t* dest, uint8_t* src, size_t len);
+
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    void copy_mark_bits (size_t dst_mark_bit, size_t src_mark_bit, size_t end_mark_bit);
+    PER_HEAP
+    void copy_mark_bits_for_addresses (uint8_t* dest, uint8_t* src, size_t len);
+#endif //BACKGROUND_GC
+
+
+    PER_HEAP
+    BOOL ephemeral_pointer_p (uint8_t* o);
+    PER_HEAP
+    void fix_brick_to_highest (uint8_t* o, uint8_t* next_o);
+    PER_HEAP
+    uint8_t* find_first_object (uint8_t* start_address, uint8_t* first_object);
+    PER_HEAP
+    uint8_t* compute_next_boundary (uint8_t* low, int gen_number, BOOL relocating);
+    PER_HEAP
+    void keep_card_live (uint8_t* o, size_t& n_gen,
+                         size_t& cg_pointers_found);
+    PER_HEAP
+    void mark_through_cards_helper (uint8_t** poo, size_t& ngen,
+                                    size_t& cg_pointers_found,
+                                    card_fn fn, uint8_t* nhigh,
+                                    uint8_t* next_boundary);
+
+    PER_HEAP
+    BOOL card_transition (uint8_t* po, uint8_t* end, size_t card_word_end,
+                               size_t& cg_pointers_found, 
+                               size_t& n_eph, size_t& n_card_set,
+                               size_t& card, size_t& end_card,
+                               BOOL& foundp, uint8_t*& start_address,
+                               uint8_t*& limit, size_t& n_cards_cleared);
+    PER_HEAP
+    void mark_through_cards_for_segments (card_fn fn, BOOL relocating);
+
+    PER_HEAP
+    void repair_allocation_in_expanded_heap (generation* gen);
+    PER_HEAP
+    BOOL can_fit_in_spaces_p (size_t* ordered_blocks, int small_index, size_t* ordered_spaces, int big_index);
+    PER_HEAP
+    BOOL can_fit_blocks_p (size_t* ordered_blocks, int block_index, size_t* ordered_spaces, int* space_index);
+    PER_HEAP
+    BOOL can_fit_all_blocks_p (size_t* ordered_blocks, size_t* ordered_spaces, int count);
+#ifdef SEG_REUSE_STATS
+    PER_HEAP
+    size_t dump_buckets (size_t* ordered_indices, int count, size_t* total_size);
+#endif //SEG_REUSE_STATS
+    PER_HEAP
+    void build_ordered_free_spaces (heap_segment* seg);
+    PER_HEAP
+    void count_plug (size_t last_plug_size, uint8_t*& last_plug);
+    PER_HEAP
+    void count_plugs_in_brick (uint8_t* tree, uint8_t*& last_plug);
+    PER_HEAP
+    void build_ordered_plug_indices ();
+    PER_HEAP
+    void init_ordered_free_space_indices ();
+    PER_HEAP
+    void trim_free_spaces_indices ();
+    PER_HEAP
+    BOOL try_best_fit (BOOL end_of_segment_p);
+    PER_HEAP
+    BOOL best_fit (size_t free_space, size_t largest_free_space, size_t additional_space, BOOL* use_additional_space);
+    PER_HEAP
+    BOOL process_free_space (heap_segment* seg, 
+                             size_t free_space,
+                             size_t min_free_size, 
+                             size_t min_cont_size,
+                             size_t* total_free_space,
+                             size_t* largest_free_space);
+    PER_HEAP
+    size_t compute_eph_gen_starts_size();
+    PER_HEAP
+    void compute_new_ephemeral_size();
+    PER_HEAP
+    BOOL expand_reused_seg_p();
+    PER_HEAP
+    BOOL can_expand_into_p (heap_segment* seg, size_t min_free_size,
+                            size_t min_cont_size, allocator* al);
+    PER_HEAP
+    uint8_t* allocate_in_expanded_heap (generation* gen, size_t size,
+                                     BOOL& adjacentp, uint8_t* old_loc,
+#ifdef SHORT_PLUGS
+                                     BOOL set_padding_on_saved_p,
+                                     mark* pinned_plug_entry,
+#endif //SHORT_PLUGS
+                                     BOOL consider_bestfit, int active_new_gen_number
+                                     REQD_ALIGN_AND_OFFSET_DEFAULT_DCL);
+    PER_HEAP
+    void realloc_plug (size_t last_plug_size, uint8_t*& last_plug,
+                       generation* gen, uint8_t* start_address,
+                       unsigned int& active_new_gen_number,
+                       uint8_t*& last_pinned_gap, BOOL& leftp,
+                       BOOL shortened_p
+#ifdef SHORT_PLUGS
+                       , mark* pinned_plug_entry
+#endif //SHORT_PLUGS
+                       );
+    PER_HEAP
+    void realloc_in_brick (uint8_t* tree, uint8_t*& last_plug, uint8_t* start_address,
+                           generation* gen,
+                           unsigned int& active_new_gen_number,
+                           uint8_t*& last_pinned_gap, BOOL& leftp);
+    PER_HEAP
+    void realloc_plugs (generation* consing_gen, heap_segment* seg,
+                        uint8_t* start_address, uint8_t* end_address,
+                        unsigned active_new_gen_number);
+
+    PER_HEAP
+    void set_expand_in_full_gc (int condemned_gen_number);
+
+    PER_HEAP
+    void verify_no_pins (uint8_t* start, uint8_t* end);
+
+    PER_HEAP
+    generation* expand_heap (int condemned_generation,
+                             generation* consing_gen,
+                             heap_segment* new_heap_segment);
+
+    PER_HEAP
+    void save_ephemeral_generation_starts();
+
+    PER_HEAP
+    bool init_dynamic_data ();
+    PER_HEAP
+    float surv_to_growth (float cst, float limit, float max_limit);
+    PER_HEAP
+    size_t desired_new_allocation (dynamic_data* dd, size_t out,
+                                   int gen_number, int pass);
+
+    PER_HEAP
+    void trim_youngest_desired_low_memory();
+
+    PER_HEAP
+    void decommit_ephemeral_segment_pages();
+
+#ifdef BIT64
+    PER_HEAP_ISOLATED
+    size_t trim_youngest_desired (uint32_t memory_load,
+                                  size_t total_new_allocation,
+                                  size_t total_min_allocation);
+    PER_HEAP_ISOLATED
+    size_t joined_youngest_desired (size_t new_allocation);
+#endif // BIT64
+    PER_HEAP_ISOLATED
+    size_t get_total_heap_size ();
+    PER_HEAP_ISOLATED
+    size_t get_total_committed_size();
+
+    PER_HEAP_ISOLATED
+    void get_memory_info (uint32_t* memory_load, 
+                          uint64_t* available_physical=NULL,
+                          uint64_t* available_page_file=NULL);
+    PER_HEAP
+    size_t generation_size (int gen_number);
+    PER_HEAP_ISOLATED
+    size_t get_total_survived_size();
+    PER_HEAP
+    size_t get_current_allocated();
+    PER_HEAP_ISOLATED
+    size_t get_total_allocated();
+    PER_HEAP
+    size_t current_generation_size (int gen_number);
+    PER_HEAP
+    size_t generation_plan_size (int gen_number);
+    PER_HEAP
+    void  compute_promoted_allocation (int gen_number);
+    PER_HEAP
+    size_t  compute_in (int gen_number);
+    PER_HEAP
+    void compute_new_dynamic_data (int gen_number);
+    PER_HEAP
+    gc_history_per_heap* get_gc_data_per_heap();
+    PER_HEAP
+    size_t new_allocation_limit (size_t size, size_t free_size, int gen_number);
+    PER_HEAP
+    size_t generation_fragmentation (generation* gen,
+                                     generation* consing_gen,
+                                     uint8_t* end);
+    PER_HEAP
+    size_t generation_sizes (generation* gen);
+    PER_HEAP
+    size_t committed_size();
+    PER_HEAP
+    size_t approximate_new_allocation();
+    PER_HEAP
+    size_t end_space_after_gc();
+    PER_HEAP
+    BOOL decide_on_compacting (int condemned_gen_number,
+                               size_t fragmentation,
+                               BOOL& should_expand);
+    PER_HEAP
+    BOOL ephemeral_gen_fit_p (gc_tuning_point tp);
+    PER_HEAP
+    void reset_large_object (uint8_t* o);
+    PER_HEAP
+    void sweep_large_objects ();
+    PER_HEAP
+    void relocate_in_large_objects ();
+    PER_HEAP
+    void mark_through_cards_for_large_objects (card_fn fn, BOOL relocating);
+    PER_HEAP
+    void descr_segment (heap_segment* seg);
+    PER_HEAP
+    void descr_card_table ();
+    PER_HEAP
+    void descr_generations (BOOL begin_gc_p);
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    PER_HEAP_ISOLATED
+    void descr_generations_to_profiler (gen_walk_fn fn, void *context);
+    PER_HEAP
+    void record_survived_for_profiler(int condemned_gen_number, uint8_t * first_condemned_address);
+    PER_HEAP
+    void notify_profiler_of_surviving_large_objects ();
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+    /*------------ Multiple non isolated heaps ----------------*/
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP_ISOLATED
+    BOOL   create_thread_support (unsigned number_of_heaps);
+    PER_HEAP_ISOLATED
+    void destroy_thread_support ();
+    PER_HEAP
+    bool create_gc_thread();
+    PER_HEAP
+    void gc_thread_function();
+#ifdef MARK_LIST
+#ifdef PARALLEL_MARK_LIST_SORT
+    PER_HEAP
+    void sort_mark_list();
+    PER_HEAP
+    void merge_mark_lists();
+    PER_HEAP
+    void append_to_mark_list(uint8_t **start, uint8_t **end);
+#else //PARALLEL_MARK_LIST_SORT
+    PER_HEAP_ISOLATED
+    void combine_mark_lists();
+#endif //PARALLEL_MARK_LIST_SORT
+#endif
+#endif //MULTIPLE_HEAPS
+
+    /*------------ End of Multiple non isolated heaps ---------*/
+
+#ifndef SEG_MAPPING_TABLE
+    PER_HEAP_ISOLATED
+    heap_segment* segment_of (uint8_t* add,  ptrdiff_t & delta,
+                              BOOL verify_p = FALSE);
+#endif //SEG_MAPPING_TABLE
+
+#ifdef BACKGROUND_GC
+
+    //this is called by revisit....
+    PER_HEAP
+    uint8_t* high_page (heap_segment* seg, BOOL concurrent_p);
+
+    PER_HEAP
+    void revisit_written_page (uint8_t* page, uint8_t* end, BOOL concurrent_p,
+                               heap_segment* seg,  uint8_t*& last_page,
+                               uint8_t*& last_object, BOOL large_objects_p,
+                               size_t& num_marked_objects);
+    PER_HEAP
+    void revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p=FALSE);
+
+    PER_HEAP
+    void concurrent_scan_dependent_handles (ScanContext *sc);
+
+    PER_HEAP_ISOLATED
+    void suspend_EE ();
+
+    PER_HEAP_ISOLATED
+    void bgc_suspend_EE ();
+
+    PER_HEAP_ISOLATED
+    void restart_EE ();
+
+    PER_HEAP
+    void background_verify_mark (Object*& object, ScanContext* sc, uint32_t flags);
+
+    PER_HEAP
+    void background_scan_dependent_handles (ScanContext *sc);
+
+    PER_HEAP
+    void allow_fgc();
+
+    // Restores BGC settings if necessary.
+    PER_HEAP_ISOLATED
+    void recover_bgc_settings();
+
+    PER_HEAP
+    void save_bgc_data_per_heap();
+
+    PER_HEAP
+    BOOL should_commit_mark_array();
+
+    PER_HEAP
+    void clear_commit_flag();
+
+    PER_HEAP_ISOLATED
+    void clear_commit_flag_global();
+
+    PER_HEAP_ISOLATED
+    void verify_mark_array_cleared (heap_segment* seg, uint32_t* mark_array_addr);
+
+    PER_HEAP_ISOLATED
+    void verify_mark_array_cleared (uint8_t* begin, uint8_t* end, uint32_t* mark_array_addr);
+
+    PER_HEAP_ISOLATED
+    BOOL commit_mark_array_by_range (uint8_t* begin,
+                                     uint8_t* end,
+                                     uint32_t* mark_array_addr);
+
+    PER_HEAP_ISOLATED
+    BOOL commit_mark_array_new_seg (gc_heap* hp, 
+                                    heap_segment* seg,
+                                    uint32_t* new_card_table = 0,
+                                    uint8_t* new_lowest_address = 0);
+
+    PER_HEAP_ISOLATED
+    BOOL commit_mark_array_with_check (heap_segment* seg, uint32_t* mark_array_addr);
+
+    // commit the portion of the mark array that corresponds to 
+    // this segment (from beginning to reserved).
+    // seg and heap_segment_reserved (seg) are guaranteed to be 
+    // page aligned.
+    PER_HEAP_ISOLATED
+    BOOL commit_mark_array_by_seg (heap_segment* seg, uint32_t* mark_array_addr);
+
+    // During BGC init, we commit the mark array for all in range
+    // segments whose mark array hasn't been committed or fully
+    // committed. All rw segments are in range, only ro segments
+    // can be partial in range.
+    PER_HEAP
+    BOOL commit_mark_array_bgc_init (uint32_t* mark_array_addr);
+
+    PER_HEAP
+    BOOL commit_new_mark_array (uint32_t* new_mark_array);
+
+    // We need to commit all segments that intersect with the bgc
+    // range. If a segment is only partially in range, we still
+    // should commit the mark array for the whole segment as 
+    // we will set the mark array commit flag for this segment.
+    PER_HEAP_ISOLATED
+    BOOL commit_new_mark_array_global (uint32_t* new_mark_array);
+
+    // We can't decommit the first and the last page in the mark array
+    // if the beginning and ending don't happen to be page aligned.
+    PER_HEAP
+    void decommit_mark_array_by_seg (heap_segment* seg);
+
+    PER_HEAP
+    void background_mark_phase();
+
+    PER_HEAP
+    void background_drain_mark_list (int thread);
+
+    PER_HEAP
+    void background_grow_c_mark_list();
+
+    PER_HEAP_ISOLATED
+    void background_promote_callback(Object** object, ScanContext* sc, uint32_t flags);
+
+    PER_HEAP
+    void mark_absorb_new_alloc();
+
+    PER_HEAP
+    void restart_vm();
+
+    PER_HEAP
+    BOOL prepare_bgc_thread(gc_heap* gh);
+    PER_HEAP
+    BOOL create_bgc_thread(gc_heap* gh);
+    PER_HEAP_ISOLATED
+    BOOL create_bgc_threads_support (int number_of_heaps);
+    PER_HEAP
+    BOOL create_bgc_thread_support();
+    PER_HEAP_ISOLATED
+    int check_for_ephemeral_alloc();
+    PER_HEAP_ISOLATED
+    void wait_to_proceed();
+    PER_HEAP_ISOLATED
+    void fire_alloc_wait_event_begin (alloc_wait_reason awr);
+    PER_HEAP_ISOLATED
+    void fire_alloc_wait_event_end (alloc_wait_reason awr);
+    PER_HEAP
+    void background_gc_wait_lh (alloc_wait_reason awr = awr_ignored);
+    PER_HEAP
+    uint32_t background_gc_wait (alloc_wait_reason awr = awr_ignored, int time_out_ms = INFINITE);
+    PER_HEAP_ISOLATED
+    void start_c_gc();
+    PER_HEAP
+    void kill_gc_thread();
+    PER_HEAP
+    uint32_t bgc_thread_function();
+    PER_HEAP_ISOLATED
+    void do_background_gc();
+    static
+    uint32_t __stdcall bgc_thread_stub (void* arg);
+
+#endif //BACKGROUND_GC
+ 
+public:
+
+    PER_HEAP_ISOLATED
+    VOLATILE(bool) internal_gc_done;
+
+#ifdef BACKGROUND_GC
+    PER_HEAP_ISOLATED
+    uint32_t cm_in_progress;
+
+    PER_HEAP
+    BOOL expanded_in_fgc;
+
+    // normally this is FALSE; we set it to TRUE at the end of the gen1 GC
+    // we do right before the bgc starts.
+    PER_HEAP_ISOLATED
+    BOOL     dont_restart_ee_p;
+
+    PER_HEAP_ISOLATED
+    CLREvent bgc_start_event;
+#endif //BACKGROUND_GC
+
+    PER_HEAP_ISOLATED
+    uint32_t wait_for_gc_done(int32_t timeOut = INFINITE);
+
+    // Returns TRUE if the thread used to be in cooperative mode 
+    // before calling this function.
+    PER_HEAP_ISOLATED
+    BOOL enable_preemptive (Thread* current_thread);
+    PER_HEAP_ISOLATED
+    void disable_preemptive (Thread* current_thread, BOOL restore_cooperative);
+
+    /* ------------------- per heap members --------------------------*/
+
+    PER_HEAP
+#ifndef MULTIPLE_HEAPS
+    CLREvent gc_done_event;
+#else // MULTIPLE_HEAPS
+    CLREvent gc_done_event;
+#endif // MULTIPLE_HEAPS
+
+    PER_HEAP
+    VOLATILE(int32_t) gc_done_event_lock;
+
+    PER_HEAP
+    VOLATILE(bool) gc_done_event_set;
+
+    PER_HEAP 
+    void set_gc_done();
+
+    PER_HEAP 
+    void reset_gc_done();
+
+    PER_HEAP
+    void enter_gc_done_event_lock();
+
+    PER_HEAP
+    void exit_gc_done_event_lock();
+
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP
+    uint8_t*  ephemeral_low;      //lowest ephemeral address
+
+    PER_HEAP
+    uint8_t*  ephemeral_high;     //highest ephemeral address
+#endif //MULTIPLE_HEAPS
+
+    PER_HEAP
+    uint32_t* card_table;
+
+    PER_HEAP
+    short* brick_table;
+
+#ifdef MARK_ARRAY
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP
+    uint32_t* mark_array;
+#else
+    SPTR_DECL(uint32_t, mark_array);
+#endif //MULTIPLE_HEAPS
+#endif //MARK_ARRAY
+
+#ifdef CARD_BUNDLE
+    PER_HEAP
+    uint32_t* card_bundle_table;
+#endif //CARD_BUNDLE
+
+#if !defined(SEG_MAPPING_TABLE) || defined(FEATURE_BASICFREEZE)
+    PER_HEAP_ISOLATED
+    sorted_table* seg_table;
+#endif //!SEG_MAPPING_TABLE || FEATURE_BASICFREEZE
+
+    PER_HEAP_ISOLATED
+    VOLATILE(BOOL) gc_started;
+
+    // The following 2 events are there to support the gen2 
+    // notification feature which is only enabled if concurrent
+    // GC is disabled.
+    PER_HEAP_ISOLATED
+    CLREvent full_gc_approach_event;
+
+    PER_HEAP_ISOLATED
+    CLREvent full_gc_end_event;
+
+    // Full GC Notification percentages.
+    PER_HEAP_ISOLATED
+    uint32_t fgn_maxgen_percent;
+
+    PER_HEAP_ISOLATED
+    uint32_t fgn_loh_percent;
+
+    PER_HEAP_ISOLATED
+    VOLATILE(bool) full_gc_approach_event_set;
+
+#ifdef BACKGROUND_GC
+    PER_HEAP_ISOLATED
+    BOOL fgn_last_gc_was_concurrent;
+#endif //BACKGROUND_GC
+
+    PER_HEAP
+    size_t fgn_last_alloc;
+
+    static uint32_t user_thread_wait (CLREvent *event, BOOL no_mode_change, int time_out_ms=INFINITE);
+
+    static wait_full_gc_status full_gc_wait (CLREvent *event, int time_out_ms);
+
+    PER_HEAP
+    uint8_t* demotion_low;
+
+    PER_HEAP
+    uint8_t* demotion_high;
+
+    PER_HEAP
+    BOOL demote_gen1_p;
+
+    PER_HEAP
+    uint8_t* last_gen1_pin_end;
+
+    PER_HEAP
+    gen_to_condemn_tuning gen_to_condemn_reasons;
+
+    PER_HEAP
+    size_t etw_allocation_running_amount[2];
+
+    PER_HEAP
+    int gc_policy;  //sweep, compact, expand
+
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP_ISOLATED
+    bool gc_thread_no_affinitize_p;
+
+    PER_HEAP_ISOLATED
+    CLREvent gc_start_event;
+
+    PER_HEAP_ISOLATED
+    CLREvent ee_suspend_event;
+
+    PER_HEAP
+    heap_segment* new_heap_segment;
+
+#define alloc_quantum_balance_units (16)
+
+    PER_HEAP_ISOLATED
+    size_t min_balance_threshold;
+#else //MULTIPLE_HEAPS
+
+    PER_HEAP
+    size_t allocation_running_time;
+
+    PER_HEAP
+    size_t allocation_running_amount;
+
+#endif //MULTIPLE_HEAPS
+
+    PER_HEAP_ISOLATED
+    gc_mechanisms settings;
+
+    PER_HEAP_ISOLATED
+    gc_history_global gc_data_global;
+
+    PER_HEAP_ISOLATED
+    size_t gc_last_ephemeral_decommit_time;
+
+    PER_HEAP_ISOLATED
+    size_t gc_gen0_desired_high;
+
+    PER_HEAP
+    size_t gen0_big_free_spaces;
+
+#ifdef SHORT_PLUGS
+    PER_HEAP_ISOLATED
+    double short_plugs_pad_ratio;
+#endif //SHORT_PLUGS
+
+#ifdef BIT64
+    PER_HEAP_ISOLATED
+    size_t youngest_gen_desired_th;
+#endif //BIT64
+
+    PER_HEAP_ISOLATED
+    uint32_t high_memory_load_th;
+
+    PER_HEAP_ISOLATED
+    uint64_t mem_one_percent;
+
+    PER_HEAP_ISOLATED
+    uint64_t total_physical_mem;
+
+    PER_HEAP_ISOLATED
+    uint64_t entry_available_physical_mem;
+
+    PER_HEAP_ISOLATED
+    size_t last_gc_index;
+
+    PER_HEAP_ISOLATED
+    size_t min_segment_size;
+
+    PER_HEAP
+    uint8_t* lowest_address;
+
+    PER_HEAP
+    uint8_t* highest_address;
+
+    PER_HEAP
+    BOOL ephemeral_promotion;
+    PER_HEAP
+    uint8_t* saved_ephemeral_plan_start[NUMBERGENERATIONS-1];
+    PER_HEAP
+    size_t saved_ephemeral_plan_start_size[NUMBERGENERATIONS-1];
+
+protected:
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP
+    GCHeap* vm_heap;
+    PER_HEAP
+    int heap_number;
+    PER_HEAP
+    VOLATILE(int) alloc_context_count;
+#else //MULTIPLE_HEAPS
+#define vm_heap ((GCHeap*) g_pGCHeap)
+#define heap_number (0)
+#endif //MULTIPLE_HEAPS
+
+#ifndef MULTIPLE_HEAPS
+    SPTR_DECL(heap_segment,ephemeral_heap_segment);
+#else
+    PER_HEAP
+    heap_segment* ephemeral_heap_segment;
+#endif // !MULTIPLE_HEAPS
+
+    PER_HEAP
+    size_t time_bgc_last;
+
+    PER_HEAP
+    uint8_t*       gc_low; // lowest address being condemned
+
+    PER_HEAP
+    uint8_t*       gc_high; //highest address being condemned
+
+    PER_HEAP
+    size_t      mark_stack_tos;
+
+    PER_HEAP
+    size_t      mark_stack_bos;
+
+    PER_HEAP
+    size_t      mark_stack_array_length;
+
+    PER_HEAP
+    mark*       mark_stack_array;
+
+    PER_HEAP
+    BOOL        verify_pinned_queue_p;
+
+    PER_HEAP
+    uint8_t*    oldest_pinned_plug;
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    PER_HEAP
+    size_t      num_pinned_objects;
+#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef FEATURE_LOH_COMPACTION
+    PER_HEAP
+    size_t      loh_pinned_queue_tos;
+
+    PER_HEAP
+    size_t      loh_pinned_queue_bos;
+
+    PER_HEAP
+    size_t      loh_pinned_queue_length;
+
+    PER_HEAP_ISOLATED
+    int         loh_pinned_queue_decay;
+
+    PER_HEAP
+    mark*       loh_pinned_queue;
+
+    // This is for forced LOH compaction via the complus env var
+    PER_HEAP_ISOLATED
+    BOOL        loh_compaction_always_p;
+
+    // This is set by the user.
+    PER_HEAP_ISOLATED
+    gc_loh_compaction_mode loh_compaction_mode;
+
+    // We may not compact LOH on every heap if we can't
+    // grow the pinned queue. This is to indicate whether
+    // this heap's LOH is compacted or not. So even if
+    // settings.loh_compaction is TRUE this may not be TRUE.
+    PER_HEAP
+    BOOL        loh_compacted_p;
+#endif //FEATURE_LOH_COMPACTION
+
+#ifdef BACKGROUND_GC
+
+    PER_HEAP
+    EEThreadId bgc_thread_id;
+
+#ifdef WRITE_WATCH
+    PER_HEAP
+    uint8_t* background_written_addresses [array_size+2];
+#endif //WRITE_WATCH
+
+#if defined (DACCESS_COMPILE) && !defined (MULTIPLE_HEAPS)
+    // doesn't need to be volatile for DAC.
+    SVAL_DECL(c_gc_state, current_c_gc_state);
+#else
+    PER_HEAP_ISOLATED
+    VOLATILE(c_gc_state) current_c_gc_state;     //tells the large object allocator to
+    //mark the object as new since the start of gc.
+#endif //DACCESS_COMPILE && !MULTIPLE_HEAPS
+
+    PER_HEAP_ISOLATED
+    gc_mechanisms saved_bgc_settings;
+
+    PER_HEAP
+    gc_history_per_heap bgc_data_per_heap;
+
+    PER_HEAP
+    BOOL bgc_thread_running; // gc thread is its main loop
+
+    PER_HEAP_ISOLATED
+    BOOL keep_bgc_threads_p;
+
+    // This event is used by BGC threads to do something on 
+    // one specific thread while other BGC threads have to 
+    // wait. This is different from a join 'cause you can't
+    // specify which thread should be doing some task
+    // while other threads have to wait.
+    // For example, to make the BGC threads managed threads 
+    // we need to create them on the thread that called 
+    // SuspendEE which is heap 0.
+    PER_HEAP_ISOLATED
+    CLREvent bgc_threads_sync_event;
+
+    PER_HEAP
+    Thread* bgc_thread;
+
+    PER_HEAP
+    CLRCriticalSection bgc_threads_timeout_cs;
+
+    PER_HEAP_ISOLATED
+    CLREvent background_gc_done_event;
+
+    PER_HEAP_ISOLATED
+    CLREvent ee_proceed_event;
+
+    PER_HEAP
+    CLREvent gc_lh_block_event;
+
+    PER_HEAP_ISOLATED
+    bool gc_can_use_concurrent;
+
+    PER_HEAP_ISOLATED
+    bool temp_disable_concurrent_p;
+
+    PER_HEAP_ISOLATED
+    BOOL do_ephemeral_gc_p;
+
+    PER_HEAP_ISOLATED
+    BOOL do_concurrent_p;
+
+    PER_HEAP
+    VOLATILE(bgc_state) current_bgc_state;
+
+    struct gc_history
+    {
+        size_t gc_index;
+        bgc_state current_bgc_state;
+        uint32_t gc_time_ms;
+        // This is in bytes per ms; consider breaking it 
+        // into the efficiency per phase.
+        size_t gc_efficiency; 
+        uint8_t* eph_low;
+        uint8_t* gen0_start;
+        uint8_t* eph_high;
+        uint8_t* bgc_highest;
+        uint8_t* bgc_lowest;
+        uint8_t* fgc_highest;
+        uint8_t* fgc_lowest;
+        uint8_t* g_highest;
+        uint8_t* g_lowest;
+    };
+
+#define max_history_count 64
+
+    PER_HEAP
+    int gchist_index_per_heap;
+
+    PER_HEAP
+    gc_history gchist_per_heap[max_history_count];
+
+    PER_HEAP_ISOLATED
+    int gchist_index;
+
+    PER_HEAP_ISOLATED
+    gc_mechanisms_store gchist[max_history_count];
+
+    PER_HEAP
+    void add_to_history_per_heap();
+
+    PER_HEAP_ISOLATED
+    void add_to_history();
+
+    PER_HEAP
+    size_t total_promoted_bytes;
+
+    PER_HEAP
+    size_t     bgc_overflow_count;
+
+    PER_HEAP
+    size_t     bgc_begin_loh_size;
+    PER_HEAP
+    size_t     end_loh_size;
+
+    // We need to throttle the LOH allocations during BGC since we can't
+    // collect LOH when BGC is in progress. 
+    // We allow the LOH heap size to double during a BGC. So for every
+    // 10% increase we will have the LOH allocating thread sleep for one more
+    // ms. So we are already 30% over the original heap size the thread will
+    // sleep for 3ms.
+    PER_HEAP
+    uint32_t   bgc_alloc_spin_loh;
+
+    // This includes what we allocate at the end of segment - allocating
+    // in free list doesn't increase the heap size.
+    PER_HEAP
+    size_t     bgc_loh_size_increased;
+
+    PER_HEAP
+    size_t     bgc_loh_allocated_in_free;
+
+    PER_HEAP
+    size_t     background_soh_alloc_count;
+
+    PER_HEAP
+    size_t     background_loh_alloc_count;
+
+    PER_HEAP
+    uint8_t**  background_mark_stack_tos;
+
+    PER_HEAP
+    uint8_t**  background_mark_stack_array;
+
+    PER_HEAP
+    size_t    background_mark_stack_array_length;
+
+    PER_HEAP
+    uint8_t*  background_min_overflow_address;
+
+    PER_HEAP
+    uint8_t*  background_max_overflow_address;
+
+    // We can't process the soh range concurrently so we
+    // wait till final mark to process it.
+    PER_HEAP
+    BOOL      processed_soh_overflow_p;
+
+    PER_HEAP
+    uint8_t*  background_min_soh_overflow_address;
+
+    PER_HEAP
+    uint8_t*  background_max_soh_overflow_address;
+
+    PER_HEAP
+    heap_segment* saved_overflow_ephemeral_seg;
+
+#ifndef MULTIPLE_HEAPS
+    SPTR_DECL(heap_segment, saved_sweep_ephemeral_seg);
+
+    SPTR_DECL(uint8_t, saved_sweep_ephemeral_start);
+
+    SPTR_DECL(uint8_t, background_saved_lowest_address);
+
+    SPTR_DECL(uint8_t, background_saved_highest_address);
+#else
+
+    PER_HEAP
+    heap_segment* saved_sweep_ephemeral_seg;
+
+    PER_HEAP
+    uint8_t* saved_sweep_ephemeral_start;
+
+    PER_HEAP
+    uint8_t* background_saved_lowest_address;
+
+    PER_HEAP
+    uint8_t* background_saved_highest_address;
+#endif //!MULTIPLE_HEAPS
+
+    // This is used for synchronization between the bgc thread
+    // for this heap and the user threads allocating on this
+    // heap.
+    PER_HEAP
+    exclusive_sync* bgc_alloc_lock;
+
+#ifdef SNOOP_STATS
+    PER_HEAP
+    snoop_stats_data snoop_stat;
+#endif //SNOOP_STATS
+
+
+    PER_HEAP
+    uint8_t**          c_mark_list;
+
+    PER_HEAP
+    size_t          c_mark_list_length;
+
+    PER_HEAP
+    size_t          c_mark_list_index;
+#endif //BACKGROUND_GC
+
+#ifdef MARK_LIST
+    PER_HEAP
+    uint8_t** mark_list;
+
+    PER_HEAP_ISOLATED
+    size_t mark_list_size;
+
+    PER_HEAP
+    uint8_t** mark_list_end;
+
+    PER_HEAP
+    uint8_t** mark_list_index;
+
+    PER_HEAP_ISOLATED
+    uint8_t** g_mark_list;
+#ifdef PARALLEL_MARK_LIST_SORT
+    PER_HEAP_ISOLATED
+    uint8_t** g_mark_list_copy;
+    PER_HEAP
+    uint8_t*** mark_list_piece_start;
+    uint8_t*** mark_list_piece_end;
+#endif //PARALLEL_MARK_LIST_SORT
+#endif //MARK_LIST
+
+    PER_HEAP
+    uint8_t*  min_overflow_address;
+
+    PER_HEAP
+    uint8_t*  max_overflow_address;
+
+    PER_HEAP
+    uint8_t*  shigh; //keeps track of the highest marked object
+
+    PER_HEAP
+    uint8_t*  slow; //keeps track of the lowest marked object
+
+    PER_HEAP
+    size_t allocation_quantum;
+
+    PER_HEAP
+    size_t alloc_contexts_used;
+
+    PER_HEAP_ISOLATED
+    no_gc_region_info current_no_gc_region_info;
+
+    PER_HEAP
+    size_t soh_allocation_no_gc;
+
+    PER_HEAP
+    size_t loh_allocation_no_gc;
+
+    PER_HEAP
+    heap_segment* saved_loh_segment_no_gc;
+
+    PER_HEAP_ISOLATED
+    BOOL proceed_with_gc_p;
+
+#define youngest_generation (generation_of (0))
+#define large_object_generation (generation_of (max_generation+1))
+
+#ifndef MULTIPLE_HEAPS
+    SPTR_DECL(uint8_t,alloc_allocated);
+#else
+    PER_HEAP
+    uint8_t* alloc_allocated; //keeps track of the highest
+    //address allocated by alloc
+#endif // !MULTIPLE_HEAPS
+
+    // The more_space_lock and gc_lock is used for 3 purposes:
+    //
+    // 1) to coordinate threads that exceed their quantum (UP & MP) (more_space_lock)
+    // 2) to synchronize allocations of large objects (more_space_lock)
+    // 3) to synchronize the GC itself (gc_lock)
+    //
+    PER_HEAP_ISOLATED
+    GCSpinLock gc_lock; //lock while doing GC
+
+    PER_HEAP
+    GCSpinLock more_space_lock; //lock while allocating more space
+
+#ifdef SYNCHRONIZATION_STATS
+
+    PER_HEAP
+    unsigned int good_suspension;
+
+    PER_HEAP
+    unsigned int bad_suspension;
+
+    // Number of times when msl_acquire is > 200 cycles.
+    PER_HEAP
+    unsigned int num_high_msl_acquire;
+
+    // Number of times when msl_acquire is < 200 cycles.
+    PER_HEAP
+    unsigned int num_low_msl_acquire;
+
+    // Number of times the more_space_lock is acquired.
+    PER_HEAP
+    unsigned int num_msl_acquired;
+
+    // Total cycles it takes to acquire the more_space_lock.
+    PER_HEAP
+    uint64_t total_msl_acquire;
+
+    PER_HEAP
+    void init_heap_sync_stats()
+    {
+        good_suspension = 0;
+        bad_suspension = 0;
+        num_msl_acquired = 0;
+        total_msl_acquire = 0;
+        num_high_msl_acquire = 0;
+        num_low_msl_acquire = 0;
+        more_space_lock.init();
+        gc_lock.init();
+    }
+
+    PER_HEAP
+    void print_heap_sync_stats(unsigned int heap_num, unsigned int gc_count_during_log)
+    {
+        printf("%2d%2d%10u%10u%12u%6u%4u%8u(%4u,%4u,%4u,%4u)\n",
+            heap_num,
+            alloc_contexts_used,
+            good_suspension,
+            bad_suspension,
+            (unsigned int)(total_msl_acquire / gc_count_during_log),
+            num_high_msl_acquire / gc_count_during_log,
+            num_low_msl_acquire / gc_count_during_log,
+            num_msl_acquired / gc_count_during_log,
+            more_space_lock.num_switch_thread / gc_count_during_log,
+            more_space_lock.num_wait_longer / gc_count_during_log,
+            more_space_lock.num_switch_thread_w / gc_count_during_log,
+            more_space_lock.num_disable_preemptive_w / gc_count_during_log);
+    }
+
+#endif //SYNCHRONIZATION_STATS
+
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP
+    generation generation_table [NUMBERGENERATIONS+1];
+#endif
+
+
+#define NUM_LOH_ALIST (7)
+#define BASE_LOH_ALIST (64*1024)
+    PER_HEAP 
+    alloc_list loh_alloc_list[NUM_LOH_ALIST-1];
+
+#define NUM_GEN2_ALIST (12)
+#ifdef BIT64
+#define BASE_GEN2_ALIST (1*256)
+#else
+#define BASE_GEN2_ALIST (1*128)
+#endif // BIT64
+    PER_HEAP
+    alloc_list gen2_alloc_list[NUM_GEN2_ALIST-1];
+
+//------------------------------------------    
+
+    PER_HEAP
+    dynamic_data dynamic_data_table [NUMBERGENERATIONS+1];
+
+    PER_HEAP
+    gc_history_per_heap gc_data_per_heap;
+
+    PER_HEAP
+    size_t maxgen_pinned_compact_before_advance;
+
+    // dynamic tuning.
+    PER_HEAP
+    BOOL dt_low_ephemeral_space_p (gc_tuning_point tp);
+    // if elevate_p is FALSE, it means we are determining fragmentation for a generation
+    // to see if we should condemn this gen; otherwise it means we are determining if
+    // we should elevate to doing max_gen from an ephemeral gen.
+    PER_HEAP
+    BOOL dt_high_frag_p (gc_tuning_point tp, int gen_number, BOOL elevate_p=FALSE);
+    PER_HEAP
+    BOOL 
+    dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number);
+    PER_HEAP
+    BOOL dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem);
+    PER_HEAP
+    BOOL dt_low_card_table_efficiency_p (gc_tuning_point tp);
+
+    PER_HEAP
+    int generation_skip_ratio;//in %
+
+    PER_HEAP
+    BOOL gen0_bricks_cleared;
+#ifdef FFIND_OBJECT
+    PER_HEAP
+    int gen0_must_clear_bricks;
+#endif //FFIND_OBJECT
+    
+    PER_HEAP_ISOLATED
+    size_t full_gc_counts[gc_type_max];
+
+    // the # of bytes allocates since the last full compacting GC.
+    PER_HEAP
+    uint64_t loh_alloc_since_cg;
+
+    PER_HEAP
+    BOOL elevation_requested;
+
+    // if this is TRUE, we should always guarantee that we do a 
+    // full compacting GC before we OOM.
+    PER_HEAP
+    BOOL last_gc_before_oom;
+
+    PER_HEAP_ISOLATED
+    BOOL should_expand_in_full_gc;
+
+#ifdef BACKGROUND_GC
+    PER_HEAP_ISOLATED
+    size_t ephemeral_fgc_counts[max_generation];
+
+    PER_HEAP_ISOLATED
+    BOOL alloc_wait_event_p;
+
+#ifndef MULTIPLE_HEAPS
+    SPTR_DECL(uint8_t, next_sweep_obj);
+#else
+    PER_HEAP
+    uint8_t* next_sweep_obj;
+#endif //MULTIPLE_HEAPS
+
+    PER_HEAP
+    uint8_t* current_sweep_pos;
+
+#endif //BACKGROUND_GC
+
+#ifndef MULTIPLE_HEAPS
+    SVAL_DECL(oom_history, oom_info);
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    SPTR_DECL(CFinalize,finalize_queue);
+#endif //FEATURE_PREMORTEM_FINALIZATION
+#else
+
+    PER_HEAP
+    oom_history oom_info;
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+    PER_HEAP
+    PTR_CFinalize finalize_queue;
+#endif //FEATURE_PREMORTEM_FINALIZATION
+#endif // !MULTIPLE_HEAPS
+
+    PER_HEAP
+    fgm_history fgm_result;
+
+    PER_HEAP_ISOLATED
+    size_t eph_gen_starts_size;
+
+#ifdef GC_CONFIG_DRIVEN
+    PER_HEAP_ISOLATED
+    size_t time_init;
+
+    PER_HEAP_ISOLATED
+    size_t time_since_init;
+
+    // 0 stores compacting GCs;
+    // 1 stores sweeping GCs;
+    PER_HEAP_ISOLATED
+    size_t compact_or_sweep_gcs[2];
+
+    PER_HEAP
+    size_t interesting_data_per_gc[max_idp_count];
+
+#ifdef MULTIPLE_HEAPS
+    PER_HEAP
+    size_t interesting_data_per_heap[max_idp_count];
+
+    PER_HEAP
+    size_t compact_reasons_per_heap[max_compact_reasons_count];
+
+    PER_HEAP
+    size_t expand_mechanisms_per_heap[max_expand_mechanisms_count];
+
+    PER_HEAP
+    size_t interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count];
+#endif //MULTIPLE_HEAPS
+#endif //GC_CONFIG_DRIVEN
+
+    PER_HEAP
+    BOOL        ro_segments_in_range;
+
+#ifdef BACKGROUND_GC
+    PER_HEAP
+    heap_segment* freeable_small_heap_segment;
+#endif //BACKGROUND_GC
+
+    PER_HEAP
+    heap_segment* freeable_large_heap_segment;
+
+    PER_HEAP_ISOLATED
+    heap_segment* segment_standby_list;
+
+    PER_HEAP
+    size_t ordered_free_space_indices[MAX_NUM_BUCKETS];
+
+    PER_HEAP
+    size_t saved_ordered_free_space_indices[MAX_NUM_BUCKETS];
+
+    PER_HEAP
+    size_t ordered_plug_indices[MAX_NUM_BUCKETS];
+
+    PER_HEAP
+    size_t saved_ordered_plug_indices[MAX_NUM_BUCKETS];
+
+    PER_HEAP
+    BOOL ordered_plug_indices_init;
+
+    PER_HEAP
+    BOOL use_bestfit;
+
+    PER_HEAP
+    uint8_t* bestfit_first_pin;
+
+    PER_HEAP
+    BOOL commit_end_of_seg;
+
+    PER_HEAP
+    size_t max_free_space_items; // dynamically adjusted.
+
+    PER_HEAP
+    size_t free_space_buckets;
+
+    PER_HEAP
+    size_t free_space_items;
+
+    // -1 means we are using all the free
+    // spaces we have (not including
+    // end of seg space).
+    PER_HEAP
+    int trimmed_free_space_index;
+
+    PER_HEAP
+    size_t total_ephemeral_plugs;
+
+    PER_HEAP
+    seg_free_spaces* bestfit_seg;
+
+    // Note: we know this from the plan phase.
+    // total_ephemeral_plugs actually has the same value
+    // but while we are calculating its value we also store
+    // info on how big the plugs are for best fit which we
+    // don't do in plan phase.
+    // TODO: get rid of total_ephemeral_plugs.
+    PER_HEAP
+    size_t total_ephemeral_size;
+
+public:
+
+#ifdef HEAP_ANALYZE
+
+    PER_HEAP_ISOLATED
+    BOOL heap_analyze_enabled;
+
+    PER_HEAP
+    size_t internal_root_array_length;
+
+#ifndef MULTIPLE_HEAPS
+    SPTR_DECL(PTR_uint8_t, internal_root_array);
+    SVAL_DECL(size_t, internal_root_array_index);
+    SVAL_DECL(BOOL,   heap_analyze_success);
+#else
+    PER_HEAP
+    uint8_t** internal_root_array;
+
+    PER_HEAP
+    size_t internal_root_array_index;
+
+    PER_HEAP
+    BOOL   heap_analyze_success;
+#endif // !MULTIPLE_HEAPS
+
+    // next two fields are used to optimize the search for the object 
+    // enclosing the current reference handled by ha_mark_object_simple.
+    PER_HEAP
+    uint8_t*  current_obj;
+
+    PER_HEAP
+    size_t current_obj_size;
+
+#endif //HEAP_ANALYZE
+
+    /* ----------------------- global members ----------------------- */
+public:
+
+    PER_HEAP
+    int         condemned_generation_num;
+
+    PER_HEAP
+    BOOL        blocking_collection;
+
+#ifdef MULTIPLE_HEAPS
+    SVAL_DECL(int, n_heaps);
+    SPTR_DECL(PTR_gc_heap, g_heaps);
+
+    static
+    size_t*   g_promoted;
+#ifdef BACKGROUND_GC
+    static
+    size_t*   g_bpromoted;
+#endif //BACKGROUND_GC
+#ifdef MH_SC_MARK
+    PER_HEAP_ISOLATED
+    int*  g_mark_stack_busy;
+#endif //MH_SC_MARK
+#else
+    static
+    size_t    g_promoted;
+#ifdef BACKGROUND_GC
+    static
+    size_t    g_bpromoted;
+#endif //BACKGROUND_GC
+#endif //MULTIPLE_HEAPS
+    
+    static
+    size_t reserved_memory;
+    static
+    size_t reserved_memory_limit;
+    static
+    BOOL      g_low_memory_status;
+
+protected:
+    PER_HEAP
+    void update_collection_counts ();
+
+}; // class gc_heap
+
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+class CFinalize
+{
+#ifdef DACCESS_COMPILE
+    friend class ::ClrDataAccess;
+#endif // DACCESS_COMPILE
+private:
+
+    //adjust the count and add a constant to add a segment
+    static const int ExtraSegCount = 2;
+    static const int FinalizerListSeg = NUMBERGENERATIONS+1;
+    static const int CriticalFinalizerListSeg = NUMBERGENERATIONS;
+    //Does not correspond to a segment
+    static const int FreeList = NUMBERGENERATIONS+ExtraSegCount;
+
+    PTR_PTR_Object m_Array;
+    PTR_PTR_Object m_FillPointers[NUMBERGENERATIONS+ExtraSegCount];
+    PTR_PTR_Object m_EndArray;
+    size_t   m_PromotedCount;
+    
+    VOLATILE(int32_t) lock;
+#ifdef _DEBUG
+    EEThreadId lockowner_threadid;
+#endif // _DEBUG
+
+    BOOL GrowArray();
+    void MoveItem (Object** fromIndex,
+                   unsigned int fromSeg,
+                   unsigned int toSeg);
+
+    inline PTR_PTR_Object& SegQueue (unsigned int Seg)
+    {
+        return (Seg ? m_FillPointers [Seg-1] : m_Array);
+    }
+    inline PTR_PTR_Object& SegQueueLimit (unsigned int Seg)
+    {
+        return m_FillPointers [Seg];
+    }
+
+    BOOL IsSegEmpty ( unsigned int i)
+    {
+        ASSERT ( (int)i < FreeList);
+        return (SegQueueLimit(i) == SegQueue (i));
+
+    }
+
+    BOOL FinalizeSegForAppDomain (AppDomain *pDomain, 
+                                  BOOL fRunFinalizers, 
+                                  unsigned int Seg);
+
+public:
+    ~CFinalize();
+    bool Initialize();
+    void EnterFinalizeLock();
+    void LeaveFinalizeLock();
+    bool RegisterForFinalization (int gen, Object* obj, size_t size=0);
+    Object* GetNextFinalizableObject (BOOL only_non_critical=FALSE);
+    BOOL ScanForFinalization (promote_func* fn, int gen,BOOL mark_only_p, gc_heap* hp);
+    void RelocateFinalizationData (int gen, gc_heap* hp);
+#ifdef GC_PROFILING
+    void WalkFReachableObjects (gc_heap* hp);
+#endif //GC_PROFILING
+    void GcScanRoots (promote_func* fn, int hn, ScanContext *pSC);
+    void UpdatePromotedGenerations (int gen, BOOL gen_0_empty_p);
+    size_t GetPromotedCount();
+
+    //Methods used by the shutdown code to call every finalizer
+    void SetSegForShutDown(BOOL fHasLock);
+    size_t GetNumberFinalizableObjects();
+    void DiscardNonCriticalObjects();
+
+    //Methods used by the app domain unloading call to finalize objects in an app domain
+    BOOL FinalizeAppDomain (AppDomain *pDomain, BOOL fRunFinalizers);
+
+    void CheckFinalizerObjects();
+};
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+inline
+ size_t& dd_begin_data_size (dynamic_data* inst)
+{
+  return inst->begin_data_size;
+}
+inline
+ size_t& dd_survived_size (dynamic_data* inst)
+{
+  return inst->survived_size;
+}
+#if defined (RESPECT_LARGE_ALIGNMENT) || defined (FEATURE_STRUCTALIGN)
+inline
+ size_t& dd_num_npinned_plugs(dynamic_data* inst)
+{
+  return inst->num_npinned_plugs;
+}
+#endif //RESPECT_LARGE_ALIGNMENT || FEATURE_STRUCTALIGN
+inline
+size_t& dd_pinned_survived_size (dynamic_data* inst)
+{
+  return inst->pinned_survived_size;
+}
+inline
+size_t& dd_added_pinned_size (dynamic_data* inst)
+{
+  return inst->added_pinned_size;
+}
+inline
+size_t& dd_artificial_pinned_survived_size (dynamic_data* inst)
+{
+  return inst->artificial_pinned_survived_size;
+}
+#ifdef SHORT_PLUGS
+inline
+size_t& dd_padding_size (dynamic_data* inst)
+{
+  return inst->padding_size;
+}
+#endif //SHORT_PLUGS
+inline
+ size_t& dd_current_size (dynamic_data* inst)
+{
+  return inst->current_size;
+}
+inline
+float& dd_surv (dynamic_data* inst)
+{
+  return inst->surv;
+}
+inline
+size_t& dd_freach_previous_promotion (dynamic_data* inst)
+{
+  return inst->freach_previous_promotion;
+}
+inline
+size_t& dd_desired_allocation (dynamic_data* inst)
+{
+  return inst->desired_allocation;
+}
+inline
+size_t& dd_collection_count (dynamic_data* inst)
+{
+    return inst->collection_count;
+}
+inline
+size_t& dd_promoted_size (dynamic_data* inst)
+{
+    return inst->promoted_size;
+}
+inline
+float& dd_limit (dynamic_data* inst)
+{
+  return inst->limit;
+}
+inline
+float& dd_max_limit (dynamic_data* inst)
+{
+  return inst->max_limit;
+}
+inline
+size_t& dd_min_gc_size (dynamic_data* inst)
+{
+  return inst->min_gc_size;
+}
+inline
+size_t& dd_max_size (dynamic_data* inst)
+{
+  return inst->max_size;
+}
+inline
+size_t& dd_min_size (dynamic_data* inst)
+{
+  return inst->min_size;
+}
+inline
+ptrdiff_t& dd_new_allocation (dynamic_data* inst)
+{
+  return inst->new_allocation;
+}
+inline
+ptrdiff_t& dd_gc_new_allocation (dynamic_data* inst)
+{
+  return inst->gc_new_allocation;
+}
+inline
+size_t& dd_default_new_allocation (dynamic_data* inst)
+{
+  return inst->default_new_allocation;
+}
+inline
+size_t& dd_fragmentation_limit (dynamic_data* inst)
+{
+  return inst->fragmentation_limit;
+}
+inline
+float& dd_fragmentation_burden_limit (dynamic_data* inst)
+{
+  return inst->fragmentation_burden_limit;
+}
+inline
+float dd_v_fragmentation_burden_limit (dynamic_data* inst)
+{
+  return (min (2*dd_fragmentation_burden_limit (inst), 0.75f));
+}
+inline
+size_t& dd_fragmentation (dynamic_data* inst)
+{
+  return inst->fragmentation;
+}
+
+inline
+size_t& dd_gc_clock (dynamic_data* inst)
+{
+  return inst->gc_clock;
+}
+inline
+size_t& dd_time_clock (dynamic_data* inst)
+{
+  return inst->time_clock;
+}
+
+inline
+size_t& dd_gc_elapsed_time (dynamic_data* inst)
+{
+    return inst->gc_elapsed_time;
+}
+
+inline
+float& dd_gc_speed (dynamic_data* inst)
+{
+    return inst->gc_speed;
+}
+
+inline
+alloc_context* generation_alloc_context (generation* inst)
+{
+    return &(inst->allocation_context);
+}
+
+inline
+uint8_t*& generation_allocation_start (generation* inst)
+{
+  return inst->allocation_start;
+}
+inline
+uint8_t*& generation_allocation_pointer (generation* inst)
+{
+  return inst->allocation_context.alloc_ptr;
+}
+inline
+uint8_t*& generation_allocation_limit (generation* inst)
+{
+  return inst->allocation_context.alloc_limit;
+}
+inline 
+allocator* generation_allocator (generation* inst)
+{
+    return &inst->free_list_allocator;
+}
+
+inline
+PTR_heap_segment& generation_start_segment (generation* inst)
+{
+  return inst->start_segment;
+}
+inline
+heap_segment*& generation_allocation_segment (generation* inst)
+{
+  return inst->allocation_segment;
+}
+inline
+uint8_t*& generation_plan_allocation_start (generation* inst)
+{
+  return inst->plan_allocation_start;
+}
+inline
+size_t& generation_plan_allocation_start_size (generation* inst)
+{
+  return inst->plan_allocation_start_size;
+}
+inline
+uint8_t*& generation_allocation_context_start_region (generation* inst)
+{
+  return inst->allocation_context_start_region;
+}
+inline
+size_t& generation_free_list_space (generation* inst)
+{
+  return inst->free_list_space;
+}
+inline
+size_t& generation_free_obj_space (generation* inst)
+{
+  return inst->free_obj_space;
+}
+inline
+size_t& generation_allocation_size (generation* inst)
+{
+  return inst->allocation_size;
+}
+
+inline
+size_t& generation_pinned_allocated (generation* inst)
+{
+    return inst->pinned_allocated;
+}
+inline
+size_t& generation_pinned_allocation_sweep_size (generation* inst)
+{
+    return inst->pinned_allocation_sweep_size;
+}
+inline
+size_t& generation_pinned_allocation_compact_size (generation* inst)
+{
+    return inst->pinned_allocation_compact_size;
+}
+inline
+size_t&  generation_free_list_allocated (generation* inst)
+{
+    return inst->free_list_allocated;
+}
+inline
+size_t&  generation_end_seg_allocated (generation* inst)
+{
+    return inst->end_seg_allocated;
+}
+inline
+BOOL&  generation_allocate_end_seg_p (generation* inst)
+{
+    return inst->allocate_end_seg_p;
+}
+inline
+size_t& generation_condemned_allocated (generation* inst)
+{
+    return inst->condemned_allocated;
+}
+#ifdef FREE_USAGE_STATS
+inline
+size_t& generation_pinned_free_obj_space (generation* inst)
+{
+    return inst->pinned_free_obj_space;
+}
+inline
+size_t& generation_allocated_in_pinned_free (generation* inst)
+{
+    return inst->allocated_in_pinned_free;
+}
+inline
+size_t& generation_allocated_since_last_pin (generation* inst)
+{
+    return inst->allocated_since_last_pin;
+}
+#endif //FREE_USAGE_STATS
+inline 
+float generation_allocator_efficiency (generation* inst)
+{
+    if ((generation_free_list_allocated (inst) + generation_free_obj_space (inst)) != 0)
+    {
+        return ((float) (generation_free_list_allocated (inst)) / (float)(generation_free_list_allocated (inst) + generation_free_obj_space (inst)));
+    }
+    else
+        return 0;
+}
+inline
+size_t generation_unusable_fragmentation (generation* inst)
+{
+    return (size_t)(generation_free_obj_space (inst) + 
+                    (1.0f-generation_allocator_efficiency(inst))*generation_free_list_space (inst));
+}
+
+#define plug_skew           sizeof(ObjHeader)
+#define min_obj_size        (sizeof(uint8_t*)+plug_skew+sizeof(size_t))//syncblock + vtable+ first field
+//Note that this encodes the fact that plug_skew is a multiple of uint8_t*.
+// We always use USE_PADDING_TAIL when fitting so items on the free list should be
+// twice the min_obj_size.
+#define min_free_list       (2*min_obj_size)
+struct plug
+{
+    uint8_t *  skew[plug_skew / sizeof(uint8_t *)];
+};
+
+class pair
+{
+public:
+    short left;
+    short right;
+};
+
+//Note that these encode the fact that plug_skew is a multiple of uint8_t*.
+// Each of new field is prepended to the prior struct.
+
+struct plug_and_pair
+{
+    pair        m_pair;
+    plug        m_plug;
+};
+
+struct plug_and_reloc
+{
+    ptrdiff_t   reloc;
+    pair        m_pair;
+    plug        m_plug;
+};
+
+struct plug_and_gap
+{
+    ptrdiff_t   gap;
+    ptrdiff_t   reloc;
+    union
+    {
+        pair    m_pair;
+        int     lr;  //for clearing the entire pair in one instruction
+    };
+    plug        m_plug;
+};
+
+struct gap_reloc_pair
+{
+    size_t gap;
+    size_t   reloc;
+    pair        m_pair;
+};
+
+#define min_pre_pin_obj_size (sizeof (gap_reloc_pair) + min_obj_size)
+
+struct DECLSPEC_ALIGN(8) aligned_plug_and_gap
+{
+    plug_and_gap plugandgap;
+};
+
+struct loh_obj_and_pad
+{
+    ptrdiff_t   reloc;    
+    plug        m_plug;
+};
+
+struct loh_padding_obj
+{
+    uint8_t*    mt;
+    size_t      len;
+    ptrdiff_t   reloc;
+    plug        m_plug;
+};
+#define loh_padding_obj_size (sizeof(loh_padding_obj))
+
+//flags description
+#define heap_segment_flags_readonly     1
+#define heap_segment_flags_inrange      2
+#define heap_segment_flags_unmappable   4
+#define heap_segment_flags_loh          8
+#ifdef BACKGROUND_GC
+#define heap_segment_flags_swept        16
+#define heap_segment_flags_decommitted  32
+#define heap_segment_flags_ma_committed 64
+// for segments whose mark array is only partially committed.
+#define heap_segment_flags_ma_pcommitted 128
+#endif //BACKGROUND_GC
+
+//need to be careful to keep enough pad items to fit a relocation node
+//padded to QuadWord before the plug_skew
+
+class heap_segment
+{
+public:
+    uint8_t*        allocated;
+    uint8_t*        committed;
+    uint8_t*        reserved;
+    uint8_t*        used;
+    uint8_t*        mem;
+    size_t          flags;
+    PTR_heap_segment next;
+    uint8_t*        plan_allocated;
+#ifdef BACKGROUND_GC
+    uint8_t*        background_allocated;
+    uint8_t*        saved_bg_allocated;
+#endif //BACKGROUND_GC
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap*        heap;
+#endif //MULTIPLE_HEAPS
+
+#ifdef _MSC_VER
+// Disable this warning - we intentionally want __declspec(align()) to insert padding for us
+#pragma warning(disable:4324)  // structure was padded due to __declspec(align())
+#endif
+    aligned_plug_and_gap padandplug;
+#ifdef _MSC_VER
+#pragma warning(default:4324)  // structure was padded due to __declspec(align())
+#endif
+};
+
+inline
+uint8_t*& heap_segment_reserved (heap_segment* inst)
+{
+  return inst->reserved;
+}
+inline
+uint8_t*& heap_segment_committed (heap_segment* inst)
+{
+  return inst->committed;
+}
+inline
+uint8_t*& heap_segment_used (heap_segment* inst)
+{
+  return inst->used;
+}
+inline
+uint8_t*& heap_segment_allocated (heap_segment* inst)
+{
+  return inst->allocated;
+}
+
+inline
+BOOL heap_segment_read_only_p (heap_segment* inst)
+{
+    return ((inst->flags & heap_segment_flags_readonly) != 0);
+}
+
+inline
+BOOL heap_segment_in_range_p (heap_segment* inst)
+{
+    return (!(inst->flags & heap_segment_flags_readonly) ||
+            ((inst->flags & heap_segment_flags_inrange) != 0));
+}
+
+inline
+BOOL heap_segment_unmappable_p (heap_segment* inst)
+{
+    return (!(inst->flags & heap_segment_flags_readonly) ||
+            ((inst->flags & heap_segment_flags_unmappable) != 0));
+}
+
+inline
+BOOL heap_segment_loh_p (heap_segment * inst)
+{
+    return !!(inst->flags & heap_segment_flags_loh);
+}
+
+#ifdef BACKGROUND_GC
+inline
+BOOL heap_segment_decommitted_p (heap_segment * inst)
+{
+    return !!(inst->flags & heap_segment_flags_decommitted);
+}
+#endif //BACKGROUND_GC
+
+inline
+PTR_heap_segment & heap_segment_next (heap_segment* inst)
+{
+  return inst->next;
+}
+inline
+uint8_t*& heap_segment_mem (heap_segment* inst)
+{
+  return inst->mem;
+}
+inline
+uint8_t*& heap_segment_plan_allocated (heap_segment* inst)
+{
+  return inst->plan_allocated;
+}
+
+#ifdef BACKGROUND_GC
+inline
+uint8_t*& heap_segment_background_allocated (heap_segment* inst)
+{
+  return inst->background_allocated;
+}
+inline
+uint8_t*& heap_segment_saved_bg_allocated (heap_segment* inst)
+{
+  return inst->saved_bg_allocated;
+}
+#endif //BACKGROUND_GC
+
+#ifdef MULTIPLE_HEAPS
+inline
+gc_heap*& heap_segment_heap (heap_segment* inst)
+{
+    return inst->heap;
+}
+#endif //MULTIPLE_HEAPS
+
+#ifndef MULTIPLE_HEAPS
+
+#ifndef DACCESS_COMPILE
+extern "C" {
+#endif //!DACCESS_COMPILE
+
+GARY_DECL(generation,generation_table,NUMBERGENERATIONS+1);
+
+#ifdef GC_CONFIG_DRIVEN
+GARY_DECL(size_t, interesting_data_per_heap, max_idp_count);
+GARY_DECL(size_t, compact_reasons_per_heap, max_compact_reasons_count);
+GARY_DECL(size_t, expand_mechanisms_per_heap, max_expand_mechanisms_count);
+GARY_DECL(size_t, interesting_mechanism_bits_per_heap, max_gc_mechanism_bits_count);
+#endif //GC_CONFIG_DRIVEN
+
+#ifndef DACCESS_COMPILE
+}
+#endif //!DACCESS_COMPILE
+
+#endif //MULTIPLE_HEAPS
+
+inline
+generation* gc_heap::generation_of (int  n)
+{
+    assert (((n <= max_generation+1) && (n >= 0)));
+    return &generation_table [ n ];
+}
+
+inline
+dynamic_data* gc_heap::dynamic_data_of (int gen_number)
+{
+    return &dynamic_data_table [ gen_number ];
+}
+
+extern "C" uint8_t* g_ephemeral_low;
+extern "C" uint8_t* g_ephemeral_high;
+
+#define card_word_width ((size_t)32)
+
+//
+// The value of card_size is determined empirically according to the average size of an object
+// In the code we also rely on the assumption that one card_table entry (uint32_t) covers an entire os page
+//
+#if defined (BIT64)
+#define card_size ((size_t)(2*OS_PAGE_SIZE/card_word_width))
+#else
+#define card_size ((size_t)(OS_PAGE_SIZE/card_word_width))
+#endif // BIT64
+
+inline
+size_t card_word (size_t card)
+{
+    return card / card_word_width;
+}
+
+inline
+unsigned card_bit (size_t card)
+{
+    return (unsigned)(card % card_word_width);
+}
+
+inline
+size_t gcard_of (uint8_t* object)
+{
+    return (size_t)(object) / card_size;
+}
+
diff --git a/src/gc/gcrecord.h b/src/gc/gcrecord.h
new file mode 100644
index 0000000000..8c95ad04d3
--- /dev/null
+++ b/src/gc/gcrecord.h
@@ -0,0 +1,425 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*++
+
+Module Name:
+
+    gcrecord.h
+
+--*/
+
+#ifndef __gc_record_h__
+#define __gc_record_h__
+
+#define max_generation 2
+
+// We pack the dynamic tuning for deciding which gen to condemn in a uint32_t.
+// We assume that 2 bits are enough to represent the generation. 
+#define bits_generation 2
+#define generation_mask (~(~0u << bits_generation))
+//=======================note !!!===================================//
+// If you add stuff to this enum, remember to update total_gen_reasons
+// and record_condemn_gen_reasons below.
+//=======================note !!!===================================//
+
+// These are condemned reasons related to generations.
+// Each reason takes up 2 bits as we have 3 generations.
+// So we can store up to 16 reasons in this uint32_t.
+// They need processing before being used.
+// See the set and the get method for details.
+enum gc_condemn_reason_gen
+{
+    gen_initial         = 0, // indicates the initial gen to condemn.
+    gen_final_per_heap  = 1, // indicates the final gen to condemn per heap.
+    gen_alloc_budget    = 2, // indicates which gen's budget is exceeded.
+    gen_time_tuning     = 3, // indicates the gen number that time based tuning decided.
+    gcrg_max = 4
+};
+
+// These are condemned reasons related to conditions we are in.
+// For example, we are in very high memory load which is a condition. 
+// Each condition takes up a single bit indicates TRUE or FALSE.
+// We can store 32 of these.
+enum gc_condemn_reason_condition
+{
+    gen_induced_fullgc_p  = 0,
+    gen_expand_fullgc_p = 1,
+    gen_high_mem_p = 2,
+    gen_very_high_mem_p = 3,
+    gen_low_ephemeral_p = 4,
+    gen_low_card_p = 5,
+    gen_eph_high_frag_p = 6,
+    gen_max_high_frag_p = 7,
+    gen_max_high_frag_e_p = 8,
+    gen_max_high_frag_m_p = 9, 
+    gen_max_high_frag_vm_p = 10,
+    gen_max_gen1 = 11,
+    gen_before_oom = 12,
+    gen_gen2_too_small = 13,
+    gen_induced_noforce_p = 14,
+    gen_before_bgc = 15,
+    gen_almost_max_alloc = 16,
+    gcrc_max = 17
+};
+
+#ifdef DT_LOG
+static char* record_condemn_reasons_gen_header = "[cg]i|f|a|t|";
+static char* record_condemn_reasons_condition_header = "[cc]i|e|h|v|l|l|e|m|m|m|m|g|o|s|n|b|a|";
+static char char_gen_number[4] = {'0', '1', '2', '3'};
+#endif //DT_LOG
+
+class gen_to_condemn_tuning
+{
+    uint32_t condemn_reasons_gen;
+    uint32_t condemn_reasons_condition;
+
+#ifdef DT_LOG
+    char str_reasons_gen[64];
+    char str_reasons_condition[64];
+#endif //DT_LOG
+
+    void init_str()
+    {
+#ifdef DT_LOG
+        memset (str_reasons_gen, '|', sizeof (char) * 64);
+        str_reasons_gen[gcrg_max*2] = 0;
+        memset (str_reasons_condition, '|', sizeof (char) * 64);
+        str_reasons_condition[gcrc_max*2] = 0;
+#endif //DT_LOG
+    }
+
+public:
+    void init()
+    {
+        condemn_reasons_gen = 0;
+        condemn_reasons_condition = 0;
+        init_str();
+    }
+
+    void init (gen_to_condemn_tuning* reasons)
+    {
+        condemn_reasons_gen = reasons->condemn_reasons_gen;
+        condemn_reasons_condition = reasons->condemn_reasons_condition;
+        init_str();
+    }
+
+    void set_gen (gc_condemn_reason_gen condemn_gen_reason, uint32_t value)
+    {
+        assert ((value & (~generation_mask)) == 0);
+        condemn_reasons_gen |= (value << (condemn_gen_reason * 2));
+    }
+
+    void set_condition (gc_condemn_reason_condition condemn_gen_reason)
+    {
+        condemn_reasons_condition |= (1 << condemn_gen_reason);
+    }
+
+    // This checks if condition_to_check is the only condition set.
+    BOOL is_only_condition (gc_condemn_reason_condition condition_to_check)
+    {
+        uint32_t temp_conditions = 1 << condition_to_check;
+        return !(condemn_reasons_condition ^ temp_conditions);
+    }
+
+    uint32_t get_gen (gc_condemn_reason_gen condemn_gen_reason)
+    {
+        uint32_t value = ((condemn_reasons_gen >> (condemn_gen_reason * 2)) & generation_mask);
+        return value;
+    }
+
+    uint32_t get_condition (gc_condemn_reason_condition condemn_gen_reason)
+    {
+        uint32_t value = (condemn_reasons_condition & (1 << condemn_gen_reason));
+        return value;
+    }
+
+    uint32_t get_reasons0()
+    {
+        return condemn_reasons_gen;
+    }
+
+    uint32_t get_reasons1()
+    {
+        return condemn_reasons_condition;
+    }
+
+#ifdef DT_LOG
+    char get_gen_char (uint32_t value)
+    {
+        return char_gen_number[value];
+    }
+    char get_condition_char (uint32_t value)
+    {
+        return (value ? 'Y' : 'N');
+    }
+#endif //DT_LOG
+
+    void print (int heap_num);
+};
+
+// Right now these are all size_t's but if you add a type that requires
+// padding you should add a pragma pack here since I am firing this as
+// a struct in an ETW event.
+struct gc_generation_data
+{
+    // data recorded at the beginning of a GC
+    size_t size_before; // including fragmentation.
+    size_t free_list_space_before;
+    size_t free_obj_space_before;
+    
+    // data recorded at the end of a GC
+    size_t size_after;  // including fragmentation.
+    size_t free_list_space_after;
+    size_t free_obj_space_after;
+    size_t in;
+    size_t pinned_surv;
+    size_t npinned_surv;
+    size_t new_allocation;
+
+    void print (int heap_num, int gen_num);
+};
+
+struct maxgen_size_increase
+{
+    size_t free_list_allocated;
+    size_t free_list_rejected;
+    size_t end_seg_allocated;
+    size_t condemned_allocated;
+    size_t pinned_allocated;
+    size_t pinned_allocated_advance;
+    uint32_t running_free_list_efficiency;
+};
+
+// The following indicates various mechanisms and one value
+// related to each one. Each value has its corresponding string
+// representation so if you change the enum's, make sure you
+// also add its string form.
+
+// Note that if we are doing a gen1 GC, we won't 
+// really expand the heap if we are reusing, but
+// we'll record the can_expand_into_p result here.
+enum gc_heap_expand_mechanism
+{
+    expand_reuse_normal = 0,
+    expand_reuse_bestfit = 1,
+    expand_new_seg_ep = 2, // new seg with ephemeral promotion
+    expand_new_seg = 3,
+    expand_no_memory = 4, // we can't get a new seg.
+    expand_next_full_gc = 5, 
+    max_expand_mechanisms_count = 6
+};
+
+#ifdef DT_LOG
+static char* str_heap_expand_mechanisms[] = 
+{
+    "reused seg with normal fit",
+    "reused seg with best fit",
+    "expand promoting eph",
+    "expand with a new seg",
+    "no memory for a new seg",
+    "expand in next full GC"
+};
+#endif //DT_LOG
+
+enum gc_heap_compact_reason
+{
+    compact_low_ephemeral = 0,
+    compact_high_frag = 1,
+    compact_no_gaps = 2,
+    compact_loh_forced = 3,
+    compact_last_gc = 4,
+    compact_induced_compacting = 5,
+    compact_fragmented_gen0 = 6, 
+    compact_high_mem_load = 7, 
+    compact_high_mem_frag = 8, 
+    compact_vhigh_mem_frag = 9,
+    compact_no_gc_mode = 10,
+    max_compact_reasons_count = 11
+};
+
+#ifndef DACCESS_COMPILE
+static BOOL gc_heap_compact_reason_mandatory_p[] =
+{
+    TRUE, //compact_low_ephemeral = 0,
+    FALSE, //compact_high_frag = 1,
+    TRUE, //compact_no_gaps = 2,
+    TRUE, //compact_loh_forced = 3,
+    TRUE, //compact_last_gc = 4
+    TRUE, //compact_induced_compacting = 5,
+    FALSE, //compact_fragmented_gen0 = 6, 
+    FALSE, //compact_high_mem_load = 7, 
+    TRUE, //compact_high_mem_frag = 8, 
+    TRUE, //compact_vhigh_mem_frag = 9,
+    TRUE //compact_no_gc_mode = 10
+};
+
+static BOOL gc_expand_mechanism_mandatory_p[] =
+{
+    FALSE, //expand_reuse_normal = 0,
+    TRUE, //expand_reuse_bestfit = 1,
+    FALSE, //expand_new_seg_ep = 2, // new seg with ephemeral promotion
+    TRUE, //expand_new_seg = 3,
+    FALSE, //expand_no_memory = 4, // we can't get a new seg.
+    TRUE //expand_next_full_gc = 5
+};
+#endif //!DACCESS_COMPILE
+
+#ifdef DT_LOG
+static char* str_heap_compact_reasons[] = 
+{
+    "low on ephemeral space",
+    "high fragmetation",
+    "couldn't allocate gaps",
+    "user specfied compact LOH",
+    "last GC before OOM",
+    "induced compacting GC",
+    "fragmented gen0 (ephemeral GC)", 
+    "high memory load (ephemeral GC)",
+    "high memory load and frag",
+    "very high memory load and frag",
+    "no gc mode"
+};
+#endif //DT_LOG
+
+enum gc_mechanism_per_heap
+{
+    gc_heap_expand,
+    gc_heap_compact,
+    max_mechanism_per_heap
+};
+
+enum gc_mechanism_bit_per_heap
+{
+    gc_mark_list_bit = 0,
+    gc_demotion_bit = 1, 
+    max_gc_mechanism_bits_count = 2
+};
+
+#ifdef DT_LOG
+struct gc_mechanism_descr
+{
+    char* name;
+    char** descr;
+};
+
+static gc_mechanism_descr gc_mechanisms_descr[max_mechanism_per_heap] =
+{
+    {"expanded heap ", str_heap_expand_mechanisms},
+    {"compacted because of ", str_heap_compact_reasons}
+};
+#endif //DT_LOG
+
+int index_of_set_bit (size_t power2);
+
+#define mechanism_mask (1 << (sizeof (uint32_t) * 8 - 1))
+// interesting per heap data we want to record for each GC.
+class gc_history_per_heap
+{
+public:
+    gc_generation_data gen_data[max_generation+2]; 
+    maxgen_size_increase maxgen_size_info;
+    gen_to_condemn_tuning gen_to_condemn_reasons;
+
+    // The mechanisms data is compacted in the following way:
+    // most significant bit indicates if we did the operation.
+    // the rest of the bits indicate the reason/mechanism
+    // why we chose to do the operation. For example:
+    // if we did a heap expansion using best fit we'd have
+    // 0x80000002 for the gc_heap_expand mechanism.
+    // Only one value is possible for each mechanism - meaning the 
+    // values are all exclusive
+    // TODO: for the config stuff I need to think more about how to represent this
+    // because we might want to know all reasons (at least all mandatory ones) for 
+    // compact.
+    // TODO: no need to the MSB for this
+    uint32_t mechanisms[max_mechanism_per_heap];
+
+    // Each bit in this uint32_t represent if a mechanism was used or not.
+    uint32_t machanism_bits;
+
+    uint32_t heap_index; 
+
+    size_t extra_gen0_committed;
+
+    void set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value);
+
+    void set_mechanism_bit (gc_mechanism_bit_per_heap mech_bit)
+    {
+        machanism_bits |= 1 << mech_bit;
+    }
+
+    void clear_mechanism_bit (gc_mechanism_bit_per_heap mech_bit)
+    {
+        machanism_bits &= ~(1 << mech_bit);
+    }
+
+    BOOL is_mechanism_bit_set (gc_mechanism_bit_per_heap mech_bit)
+    {
+        return (machanism_bits & (1 << mech_bit));
+    }
+    
+    void clear_mechanism(gc_mechanism_per_heap mechanism_per_heap)
+    {
+        uint32_t* mechanism = &mechanisms[mechanism_per_heap];
+        *mechanism = 0;
+    }
+
+    int get_mechanism (gc_mechanism_per_heap mechanism_per_heap)
+    {
+        uint32_t mechanism = mechanisms[mechanism_per_heap];
+
+        if (mechanism & mechanism_mask)
+        {
+            int index = index_of_set_bit ((size_t)(mechanism & (~mechanism_mask)));
+            assert (index != -1);
+            return index;
+        }
+
+        return -1;
+    }
+
+    void print();
+};
+
+// we store up to 32 boolean settings.
+enum gc_global_mechanism_p
+{
+    global_concurrent = 0,
+    global_compaction = 1,
+    global_promotion = 2,
+    global_demotion = 3,
+    global_card_bundles = 4,
+    global_elevation = 5,
+    max_global_mechanisms_count
+};
+
+struct gc_history_global
+{
+    // We may apply other factors after we calculated gen0 budget in
+    // desired_new_allocation such as equalization or smoothing so
+    // record the final budget here. 
+    size_t final_youngest_desired;
+    uint32_t num_heaps;
+    int condemned_generation;
+    int gen0_reduction_count;
+    gc_reason reason;
+    int pause_mode;
+    uint32_t mem_pressure;
+    uint32_t global_mechanims_p;
+
+    void set_mechanism_p (gc_global_mechanism_p mechanism)
+    {
+        global_mechanims_p |= (1 << mechanism);
+    }
+
+    BOOL get_mechanism_p (gc_global_mechanism_p mechanism)
+    {
+        return (global_mechanims_p & (1 << mechanism));
+    }
+
+    void print();
+};
+
+#endif //__gc_record_h__
diff --git a/src/gc/gcscan.cpp b/src/gc/gcscan.cpp
new file mode 100644
index 0000000000..42989e0414
--- /dev/null
+++ b/src/gc/gcscan.cpp
@@ -0,0 +1,282 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * GCSCAN.CPP 
+ *
+ * GC Root Scanning
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gcscan.h"
+#include "gc.h"
+#include "objecthandle.h"
+
+#ifdef DACCESS_COMPILE
+SVAL_IMPL_INIT(int32_t, GCScan, m_GcStructuresInvalidCnt, 1);
+#else //DACCESS_COMPILE
+VOLATILE(int32_t) GCScan::m_GcStructuresInvalidCnt = 1;
+#endif //DACCESS_COMPILE
+
+bool GCScan::GetGcRuntimeStructuresValid ()
+{
+    LIMITED_METHOD_CONTRACT;
+    SUPPORTS_DAC;
+    _ASSERTE ((int32_t)m_GcStructuresInvalidCnt >= 0);
+    return (int32_t)m_GcStructuresInvalidCnt == 0;
+}
+
+#ifdef DACCESS_COMPILE
+
+#ifndef FEATURE_REDHAWK
+void
+GCScan::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
+{
+    UNREFERENCED_PARAMETER(flags);
+    m_GcStructuresInvalidCnt.EnumMem();
+}
+#endif
+
+#else
+
+//
+// Dependent handle promotion scan support
+//
+
+// This method is called first during the mark phase. It's job is to set up the context for further scanning
+// (remembering the scan parameters the GC gives us and initializing some state variables we use to determine
+// whether further scans will be required or not).
+//
+// This scan is not guaranteed to return complete results due to the GC context in which we are called. In
+// particular it is possible, due to either a mark stack overflow or unsynchronized operation in server GC
+// mode, that not all reachable objects will be reported as promoted yet. However, the operations we perform
+// will still be correct and this scan allows us to spot a common optimization where no dependent handles are
+// due for retirement in this particular GC. This is an important optimization to take advantage of since
+// synchronizing the GC to calculate complete results is a costly operation.
+void GCScan::GcDhInitialScan(promote_func* fn, int condemned, int max_gen, ScanContext* sc)
+{
+    // We allocate space for dependent handle scanning context during Ref_Initialize. Under server GC there
+    // are actually as many contexts as heaps (and CPUs). Ref_GetDependentHandleContext() retrieves the
+    // correct context for the current GC thread based on the ScanContext passed to us by the GC.
+    DhContext *pDhContext = Ref_GetDependentHandleContext(sc);
+
+    // Record GC callback parameters in the DH context so that the GC doesn't continually have to pass the
+    // same data to each call.
+    pDhContext->m_pfnPromoteFunction = fn;
+    pDhContext->m_iCondemned = condemned;
+    pDhContext->m_iMaxGen = max_gen;
+    pDhContext->m_pScanContext = sc;
+
+    // Look for dependent handle whose primary has been promoted but whose secondary has not. Promote the
+    // secondary in those cases. Additionally this scan sets the m_fUnpromotedPrimaries and m_fPromoted state
+    // flags in the DH context. The m_fUnpromotedPrimaries flag is the most interesting here: if this flag is
+    // false after the scan then it doesn't matter how many object promotions might currently be missing since
+    // there are no secondary objects that are currently unpromoted anyway. This is the (hopefully common)
+    // circumstance under which we don't have to perform any costly additional re-scans.
+    Ref_ScanDependentHandlesForPromotion(pDhContext);
+}
+
+// This method is called after GcDhInitialScan and before each subsequent scan (GcDhReScan below). It
+// determines whether any handles are left that have unpromoted secondaries.
+bool GCScan::GcDhUnpromotedHandlesExist(ScanContext* sc)
+{
+    WRAPPER_NO_CONTRACT;
+    // Locate our dependent handle context based on the GC context.
+    DhContext *pDhContext = Ref_GetDependentHandleContext(sc);
+
+    return pDhContext->m_fUnpromotedPrimaries;
+}
+
+// Perform a re-scan of dependent handles, promoting secondaries associated with newly promoted primaries as
+// above. We may still need to call this multiple times since promotion of a secondary late in the table could
+// promote a primary earlier in the table. Also, GC graph promotions are not guaranteed to be complete by the
+// time the promotion callback returns (the mark stack can overflow). As a result the GC might have to call
+// this method in a loop. The scan records state that let's us know when to terminate (no further handles to
+// be promoted or no promotions in the last scan). Returns true if at least one object was promoted as a
+// result of the scan.
+bool GCScan::GcDhReScan(ScanContext* sc)
+{
+    // Locate our dependent handle context based on the GC context.
+    DhContext *pDhContext = Ref_GetDependentHandleContext(sc);
+
+    return Ref_ScanDependentHandlesForPromotion(pDhContext);
+}
+
+/*
+ * Scan for dead weak pointers
+ */
+
+void GCScan::GcWeakPtrScan( promote_func* fn, int condemned, int max_gen, ScanContext* sc )
+{
+    // Clear out weak pointers that are no longer live.
+    Ref_CheckReachable(condemned, max_gen, (uintptr_t)sc);
+
+    // Clear any secondary objects whose primary object is now definitely dead.
+    Ref_ScanDependentHandlesForClearing(condemned, max_gen, sc, fn);
+}
+
+static void CALLBACK CheckPromoted(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t * /*pExtraInfo*/, uintptr_t /*lp1*/, uintptr_t /*lp2*/)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Checking referent of Weak-", pObjRef, "to ", *pObjRef)));
+
+    Object **pRef = (Object **)pObjRef;
+    if (!GCHeap::GetGCHeap()->IsPromoted(*pRef))
+    {
+        LOG((LF_GC, LL_INFO100, LOG_HANDLE_OBJECT_CLASS("Severing Weak-", pObjRef, "to unreachable ", *pObjRef)));
+
+        *pRef = NULL;
+    }
+    else
+    {
+        LOG((LF_GC, LL_INFO1000000, "reachable " LOG_OBJECT_CLASS(*pObjRef)));
+    }
+}
+
+void GCScan::GcWeakPtrScanBySingleThread( int condemned, int max_gen, ScanContext* sc )
+{
+    UNREFERENCED_PARAMETER(condemned);
+    UNREFERENCED_PARAMETER(max_gen);
+    GCToEEInterface::SyncBlockCacheWeakPtrScan(&CheckPromoted, (uintptr_t)sc, 0);
+}
+
+void GCScan::GcScanSizedRefs(promote_func* fn, int condemned, int max_gen, ScanContext* sc)
+{
+    Ref_ScanSizedRefHandles(condemned, max_gen, sc, fn);
+}
+
+void GCScan::GcShortWeakPtrScan(promote_func* fn,  int condemned, int max_gen, 
+                                     ScanContext* sc)
+{
+    UNREFERENCED_PARAMETER(fn);
+    Ref_CheckAlive(condemned, max_gen, (uintptr_t)sc);
+}
+
+/*
+ * Scan all stack roots in this 'namespace'
+ */
+ 
+void GCScan::GcScanRoots(promote_func* fn,  int condemned, int max_gen, 
+                             ScanContext* sc)
+{
+    GCToEEInterface::GcScanRoots(fn, condemned, max_gen, sc);
+}
+
+/*
+ * Scan all handle roots in this 'namespace'
+ */
+
+
+void GCScan::GcScanHandles (promote_func* fn,  int condemned, int max_gen, 
+                                ScanContext* sc)
+{
+    STRESS_LOG1(LF_GC|LF_GCROOTS, LL_INFO10, "GcScanHandles (Promotion Phase = %d)\n", sc->promotion);
+    if (sc->promotion)
+    {
+        Ref_TracePinningRoots(condemned, max_gen, sc, fn);
+        Ref_TraceNormalRoots(condemned, max_gen, sc, fn);
+    }
+    else
+    {
+        Ref_UpdatePointers(condemned, max_gen, sc, fn);
+        Ref_UpdatePinnedPointers(condemned, max_gen, sc, fn);
+        Ref_ScanDependentHandlesForRelocation(condemned, max_gen, sc, fn);
+    }
+}
+
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+/*
+ * Scan all handle roots in this 'namespace' for profiling
+ */
+
+void GCScan::GcScanHandlesForProfilerAndETW (int max_gen, ScanContext* sc)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    LOG((LF_GC|LF_GCROOTS, LL_INFO10, "Profiler Root Scan Phase, Handles\n"));
+    Ref_ScanPointersForProfilerAndETW(max_gen, (uintptr_t)sc);
+}
+
+/*
+ * Scan dependent handles in this 'namespace' for profiling
+ */
+void GCScan::GcScanDependentHandlesForProfilerAndETW (int max_gen, ProfilingScanContext* sc)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    LOG((LF_GC|LF_GCROOTS, LL_INFO10, "Profiler Root Scan Phase, DependentHandles\n"));
+    Ref_ScanDependentHandlesForProfilerAndETW(max_gen, sc);
+}
+
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+void GCScan::GcRuntimeStructuresValid (BOOL bValid)
+{
+    WRAPPER_NO_CONTRACT;
+    if (!bValid)
+    {
+        int32_t result;
+        result = Interlocked::Increment (&m_GcStructuresInvalidCnt);
+        _ASSERTE (result > 0);
+    }
+    else
+    {
+        int32_t result;
+        result = Interlocked::Decrement (&m_GcStructuresInvalidCnt);
+        _ASSERTE (result >= 0);
+    }
+}
+
+void GCScan::GcDemote (int condemned, int max_gen, ScanContext* sc)
+{
+    Ref_RejuvenateHandles (condemned, max_gen, (uintptr_t)sc);
+    if (!GCHeap::IsServerHeap() || sc->thread_number == 0)
+        GCToEEInterface::SyncBlockCacheDemote(max_gen);
+}
+
+void GCScan::GcPromotionsGranted (int condemned, int max_gen, ScanContext* sc)
+{
+    Ref_AgeHandles(condemned, max_gen, (uintptr_t)sc);
+    if (!GCHeap::IsServerHeap() || sc->thread_number == 0)
+        GCToEEInterface::SyncBlockCachePromotionsGranted(max_gen);
+}
+
+
+size_t GCScan::AskForMoreReservedMemory (size_t old_size, size_t need_size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if !defined(FEATURE_CORECLR) && !defined(FEATURE_REDHAWK)
+    // call the host....
+
+    IGCHostControl *pGCHostControl = CorHost::GetGCHostControl();
+
+    if (pGCHostControl)
+    {
+        size_t new_max_limit_size = need_size;
+        pGCHostControl->RequestVirtualMemLimit (old_size, 
+                                                (SIZE_T*)&new_max_limit_size);
+        return new_max_limit_size;
+    }
+#endif
+
+    return old_size + need_size;
+}
+
+void GCScan::VerifyHandleTable(int condemned, int max_gen, ScanContext* sc)
+{
+    LIMITED_METHOD_CONTRACT;
+    Ref_VerifyHandleTable(condemned, max_gen, sc);
+}
+
+#endif // !DACCESS_COMPILE
diff --git a/src/gc/gcscan.h b/src/gc/gcscan.h
new file mode 100644
index 0000000000..3515b8e1b6
--- /dev/null
+++ b/src/gc/gcscan.h
@@ -0,0 +1,109 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * GCSCAN.H
+ *
+ * GC Root Scanning
+ *
+
+ *
+ */
+
+#ifndef _GCSCAN_H_
+#define _GCSCAN_H_
+
+#include "gc.h"
+
+// Scanning dependent handles for promotion can become a complex operation due to cascaded dependencies and
+// other issues (see the comments for GcDhInitialScan and friends in gcscan.cpp for further details). As a
+// result we need to maintain a context between all the DH scanning methods called during a single mark phase.
+// The structure below describes this context. We allocate one of these per GC heap at Ref_Initialize time and
+// select between them based on the ScanContext passed to us by the GC during the mark phase.
+struct DhContext
+{
+    bool            m_fUnpromotedPrimaries;     // Did last scan find at least one non-null unpromoted primary?
+    bool            m_fPromoted;                // Did last scan promote at least one secondary?
+    promote_func   *m_pfnPromoteFunction;       // GC promote callback to be used for all secondary promotions
+    int             m_iCondemned;               // The condemned generation
+    int             m_iMaxGen;                  // The maximum generation
+    ScanContext    *m_pScanContext;             // The GC's scan context for this phase
+};
+
+class GCScan
+{
+    friend struct ::_DacGlobals;
+
+  public:
+
+    static void GcScanSizedRefs(promote_func* fn, int condemned, int max_gen, ScanContext* sc);
+
+    // Regular stack Roots
+    static void GcScanRoots (promote_func* fn, int condemned, int max_gen, ScanContext* sc);
+
+    //
+    static void GcScanHandles (promote_func* fn, int condemned, int max_gen, ScanContext* sc);
+
+    static void GcRuntimeStructuresValid (BOOL bValid);
+
+    static bool GetGcRuntimeStructuresValid ();
+#ifdef DACCESS_COMPILE    
+    static void EnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+#endif // DACCESS_COMPILE
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+    static void GcScanHandlesForProfilerAndETW (int max_gen, ScanContext* sc);
+    static void GcScanDependentHandlesForProfilerAndETW (int max_gen, ProfilingScanContext* sc);
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+    // scan for dead weak pointers
+    static void GcWeakPtrScan (promote_func* fn, int condemned, int max_gen, ScanContext*sc );
+    static void GcWeakPtrScanBySingleThread (int condemned, int max_gen, ScanContext*sc );
+
+    // scan for dead weak pointers
+    static void GcShortWeakPtrScan (promote_func* fn, int condemned, int max_gen, 
+                                    ScanContext* sc);
+
+    //
+    // Dependent handle promotion scan support
+    //
+
+    // Perform initial (incomplete) scan which will deterimine if there's any further work required.
+    static void GcDhInitialScan(promote_func* fn, int condemned, int max_gen, ScanContext* sc);
+
+    // Called between scans to ask if any handles with an unpromoted secondary existed at the end of the last
+    // scan.
+    static bool GcDhUnpromotedHandlesExist(ScanContext* sc);
+
+    // Rescan the handles for additonal primaries that have been promoted since the last scan. Return true if
+    // any objects were promoted as a result.
+    static bool GcDhReScan(ScanContext* sc);
+
+    // post-promotions callback
+    static void GcPromotionsGranted (int condemned, int max_gen, 
+                                     ScanContext* sc);
+
+    // post-promotions callback some roots were demoted
+    static void GcDemote (int condemned, int max_gen, ScanContext* sc);
+    
+    static size_t AskForMoreReservedMemory (size_t old_size, size_t need_size);
+
+    static void VerifyHandleTable(int condemned, int max_gen, ScanContext* sc);
+    
+private:
+#ifdef DACCESS_COMPILE    
+    SVAL_DECL(int32_t, m_GcStructuresInvalidCnt);
+#else
+    static VOLATILE(int32_t) m_GcStructuresInvalidCnt;
+#endif //DACCESS_COMPILE
+};
+
+// These two functions are utilized to scan the heap if requested by ETW
+// or a profiler. The implementations of these two functions are in profheapwalkhelper.cpp.
+#if defined(FEATURE_EVENT_TRACE) | defined(GC_PROFILING)
+void ScanRootsHelper(Object* pObj, Object** ppRoot, ScanContext * pSC, DWORD dwFlags);
+BOOL HeapWalkHelper(Object * pBO, void * pvContext);
+#endif
+
+#endif // _GCSCAN_H_
diff --git a/src/gc/gcsvr.cpp b/src/gc/gcsvr.cpp
new file mode 100644
index 0000000000..cf5fc9335f
--- /dev/null
+++ b/src/gc/gcsvr.cpp
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#include "common.h"
+
+#if defined(FEATURE_SVR_GC)
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+#include "gcdesc.h"
+
+#define SERVER_GC 1
+
+namespace SVR { 
+#include "gcimpl.h"
+#include "gc.cpp"
+}
+
+#endif // defined(FEATURE_SVR_GC)
diff --git a/src/gc/gcwks.cpp b/src/gc/gcwks.cpp
new file mode 100644
index 0000000000..574df8215a
--- /dev/null
+++ b/src/gc/gcwks.cpp
@@ -0,0 +1,23 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+#include "gcdesc.h"
+
+#ifdef SERVER_GC
+#undef SERVER_GC
+#endif
+
+namespace WKS { 
+#include "gcimpl.h"
+#include "gc.cpp"
+}
+
diff --git a/src/gc/handletable.cpp b/src/gc/handletable.cpp
new file mode 100644
index 0000000000..43b43ffcea
--- /dev/null
+++ b/src/gc/handletable.cpp
@@ -0,0 +1,1474 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Main Entrypoint Layer.
+ *
+ * Implements generic support for external roots into a GC heap.
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+
+#include "objecthandle.h"
+#include "handletablepriv.h"
+
+#ifndef FEATURE_REDHAWK
+#include "nativeoverlapped.h"
+#endif
+
+/****************************************************************************
+ *
+ * FORWARD DECLARATIONS
+ *
+ ****************************************************************************/
+
+#ifdef _DEBUG
+void DEBUG_PostGCScanHandler(HandleTable *pTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, ScanCallbackInfo *info);
+void DEBUG_LogScanningStatistics(HandleTable *pTable, uint32_t level);
+#endif
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * HELPER ROUTINES
+ *
+ ****************************************************************************/
+
+/*
+ * Table
+ *
+ * Gets and validates the table pointer from a table handle.
+ *
+ */
+__inline PTR_HandleTable Table(HHANDLETABLE hTable)
+{
+    WRAPPER_NO_CONTRACT;
+    SUPPORTS_DAC;
+
+    // convert the handle to a pointer
+    PTR_HandleTable pTable = (PTR_HandleTable)hTable;
+
+    // sanity
+    _ASSERTE(pTable);
+
+    // return the table pointer
+    return pTable;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * MAIN ENTRYPOINTS
+ *
+ ****************************************************************************/
+#ifndef DACCESS_COMPILE
+/*
+ * HndCreateHandleTable
+ *
+ * Alocates and initializes a handle table.
+ *
+ */
+HHANDLETABLE HndCreateHandleTable(const uint32_t *pTypeFlags, uint32_t uTypeCount, ADIndex uADIndex)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        INJECT_FAULT(return NULL);
+    }
+    CONTRACTL_END;
+
+    // sanity
+    _ASSERTE(uTypeCount);
+
+    // verify that we can handle the specified number of types
+    // may need to increase HANDLE_MAX_INTERNAL_TYPES (by 4)
+    _ASSERTE(uTypeCount <= HANDLE_MAX_PUBLIC_TYPES);
+
+    // verify that segment header layout we're using fits expected size
+    _ASSERTE(sizeof(_TableSegmentHeader) <= HANDLE_HEADER_SIZE);
+    // if you hit this then TABLE LAYOUT IS BROKEN
+
+    // compute the size of the handle table allocation
+    uint32_t dwSize = sizeof(HandleTable) + (uTypeCount * sizeof(HandleTypeCache));
+
+    // allocate the table
+    HandleTable *pTable = (HandleTable *) new (nothrow) uint8_t[dwSize];
+    if (pTable == NULL)
+        return NULL;
+
+    memset (pTable, 0, dwSize);
+
+    // allocate the initial handle segment
+    pTable->pSegmentList = SegmentAlloc(pTable);
+
+    // if that failed then we are also out of business
+    if (!pTable->pSegmentList)
+    {
+        // free the table's memory and get out
+        delete [] (uint8_t*)pTable;
+        return NULL;
+    }
+
+    // initialize the table's lock
+    // We need to allow CRST_UNSAFE_SAMELEVEL, because
+    // during AD unload, we need to move some TableSegment from unloaded domain to default domain.
+    // We need to take both locks for the two HandleTable's to avoid racing with concurrent gc thread.
+    if (!pTable->Lock.InitNoThrow(CrstHandleTable, CrstFlags(CRST_REENTRANCY | CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD | CRST_UNSAFE_SAMELEVEL)))
+    {
+        SegmentFree(pTable->pSegmentList);
+        delete [] (uint8_t*)pTable;
+        return NULL;
+    }
+
+    // remember how many types we are supporting
+    pTable->uTypeCount = uTypeCount;
+
+    // Store user data
+    pTable->uTableIndex = (uint32_t) -1;
+    pTable->uADIndex = uADIndex;
+
+    // loop over various arrays an initialize them
+    uint32_t u;
+
+    // initialize the type flags for the types we were passed
+    for (u = 0; u < uTypeCount; u++)
+        pTable->rgTypeFlags[u] = pTypeFlags[u];
+
+    // preinit the rest to HNDF_NORMAL
+    while (u < HANDLE_MAX_INTERNAL_TYPES)
+        pTable->rgTypeFlags[u++] = HNDF_NORMAL;
+
+    // initialize the main cache
+    for (u = 0; u < uTypeCount; u++)
+    {
+        // at init time, the only non-zero field in a type cache is the free index
+        pTable->rgMainCache[u].lFreeIndex = HANDLES_PER_CACHE_BANK;
+    }
+
+#ifdef _DEBUG
+    // set up scanning stats
+    pTable->_DEBUG_iMaxGen = -1;
+#endif
+
+    // all done - return the newly created table
+    return (HHANDLETABLE)pTable;
+}
+
+
+/*
+ * HndDestroyHandleTable
+ *
+ * Cleans up and frees the specified handle table.
+ *
+ */
+void HndDestroyHandleTable(HHANDLETABLE hTable)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // decrement handle count by number of handles in this table
+    COUNTER_ONLY(GetPerfCounters().m_GC.cHandles -= HndCountHandles(hTable));
+
+    // We are going to free the memory for this HandleTable.
+    // Let us reset the copy in g_pHandleTableArray to NULL.
+    // Otherwise, GC will think this HandleTable is still available.
+
+    // free the lock
+    pTable->Lock.Destroy();
+
+    // fetch the segment list and null out the list pointer
+    TableSegment *pSegment = pTable->pSegmentList;
+    pTable->pSegmentList = NULL;
+
+    // walk the segment list, freeing the segments as we go
+    while (pSegment)
+    {
+        // fetch the next segment
+        TableSegment *pNextSegment = pSegment->pNextSegment;
+
+        // free the current one and advance to the next
+        SegmentFree(pSegment);
+        pSegment = pNextSegment;
+    }
+
+    // free the table's memory
+    delete [] (uint8_t*) pTable;
+}
+/*
+ * HndSetHandleTableIndex
+ *
+ * Sets the index associated with a handle table at creation
+ */
+void HndSetHandleTableIndex(HHANDLETABLE hTable, uint32_t uTableIndex)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    pTable->uTableIndex = uTableIndex;
+}
+#endif // !DACCESS_COMPILE
+
+/*
+ * HndGetHandleTableIndex
+ *
+ * Retrieves the index associated with a handle table at creation
+ */
+uint32_t HndGetHandleTableIndex(HHANDLETABLE hTable)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    _ASSERTE (pTable->uTableIndex != (uint32_t) -1);  // We have not set uTableIndex yet.
+    return pTable->uTableIndex;
+}
+
+/*
+ * HndGetHandleTableIndex
+ *
+ * Retrieves the AppDomain index associated with a handle table at creation
+ */
+ADIndex HndGetHandleTableADIndex(HHANDLETABLE hTable)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    return pTable->uADIndex;
+}
+
+/*
+ * HndGetHandleTableIndex
+ *
+ * Retrieves the AppDomain index associated with a handle table at creation
+ */
+ADIndex HndGetHandleADIndex(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+    SUPPORTS_DAC;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(HndGetHandleTable(handle));
+
+    return pTable->uADIndex;
+}
+
+#ifndef DACCESS_COMPILE
+/*
+ * HndCreateHandle
+ *
+ * Entrypoint for allocating an individual handle.
+ *
+ */
+OBJECTHANDLE HndCreateHandle(HHANDLETABLE hTable, uint32_t uType, OBJECTREF object, uintptr_t lExtraInfo)
+{
+    CONTRACTL
+    {
+#ifdef FEATURE_REDHAWK
+        // Redhawk returns NULL on failure.
+        NOTHROW;
+#else
+        THROWS;
+#endif
+        GC_NOTRIGGER;
+        if (object != NULL) 
+        { 
+            MODE_COOPERATIVE; 
+        }
+        else
+        {
+            MODE_ANY;
+        }
+        SO_INTOLERANT;
+    }
+    CONTRACTL_END;
+
+#if defined( _DEBUG) && !defined(FEATURE_REDHAWK)
+    if (g_pConfig->ShouldInjectFault(INJECTFAULT_HANDLETABLE))
+    {
+        FAULT_NOT_FATAL();
+        char *a = new char;
+        delete a;
+    }
+#endif // _DEBUG && !FEATURE_REDHAWK
+
+    VALIDATEOBJECTREF(object);
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // sanity check the type index
+    _ASSERTE(uType < pTable->uTypeCount);
+
+    // get a handle from the table's cache
+    OBJECTHANDLE handle = TableAllocSingleHandleFromCache(pTable, uType);
+
+    // did the allocation succeed?
+    if (!handle)
+    {
+#ifdef FEATURE_REDHAWK
+        return NULL;
+#else
+        ThrowOutOfMemory();
+#endif
+    }
+
+#ifdef DEBUG_DestroyedHandleValue
+    if (*(_UNCHECKED_OBJECTREF *)handle == DEBUG_DestroyedHandleValue)
+        *(_UNCHECKED_OBJECTREF *)handle = NULL;
+#endif
+
+    // yep - the handle better not point at anything yet
+    _ASSERTE(*(_UNCHECKED_OBJECTREF *)handle == NULL);
+
+    // we are not holding the lock - check to see if there is nonzero extra info
+    if (lExtraInfo)
+    {
+        // initialize the user data BEFORE assigning the referent
+        // this ensures proper behavior if we are currently scanning
+        HandleQuickSetUserData(handle, lExtraInfo);
+    }
+
+    // store the reference
+    HndAssignHandle(handle, object);
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    g_dwHandles++;
+#endif // ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef GC_PROFILING
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        g_profControlBlock.pProfInterface->HandleCreated((uintptr_t)handle, (ObjectID)OBJECTREF_TO_UNCHECKED_OBJECTREF(object));
+        END_PIN_PROFILER();
+    }
+#endif //GC_PROFILING
+
+    STRESS_LOG2(LF_GC, LL_INFO1000, "CreateHandle: %p, type=%d\n", handle, uType);
+
+    // return the result
+    return handle;
+}
+#endif // !DACCESS_COMPILE
+
+#ifdef _DEBUG
+void ValidateFetchObjrefForHandle(OBJECTREF objref, ADIndex appDomainIndex)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+    STATIC_CONTRACT_DEBUG_ONLY;
+
+    BEGIN_DEBUG_ONLY_CODE;
+    VALIDATEOBJECTREF (objref);
+
+    AppDomain *pDomain = SystemDomain::GetAppDomainAtIndex(appDomainIndex);
+
+    // Access to a handle in unloaded domain is not allowed
+    _ASSERTE(pDomain != NULL);
+    _ASSERTE(!pDomain->NoAccessToHandleTable());
+
+#if CHECK_APP_DOMAIN_LEAKS
+    if (g_pConfig->AppDomainLeaks() && objref != NULL)
+    {
+        if (appDomainIndex.m_dwIndex)
+            objref->TryAssignAppDomain(pDomain);
+        else
+            objref->TrySetAppDomainAgile();
+    }
+#endif
+    END_DEBUG_ONLY_CODE;
+}
+
+void ValidateAssignObjrefForHandle(OBJECTREF objref, ADIndex appDomainIndex)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+    STATIC_CONTRACT_DEBUG_ONLY;
+
+    BEGIN_DEBUG_ONLY_CODE;
+
+    VALIDATEOBJECTREF (objref);
+
+    AppDomain *pDomain = SystemDomain::GetAppDomainAtIndex(appDomainIndex);
+
+    // Access to a handle in unloaded domain is not allowed
+    _ASSERTE(pDomain != NULL);
+    _ASSERTE(!pDomain->NoAccessToHandleTable());
+
+#if CHECK_APP_DOMAIN_LEAKS
+    if (g_pConfig->AppDomainLeaks() && objref != NULL)
+    {
+        if (appDomainIndex.m_dwIndex)
+            objref->TryAssignAppDomain(pDomain);
+        else
+            objref->TrySetAppDomainAgile();
+    }
+#endif
+    END_DEBUG_ONLY_CODE;
+}
+
+void ValidateAppDomainForHandle(OBJECTHANDLE handle)
+{
+    STATIC_CONTRACT_DEBUG_ONLY;
+    STATIC_CONTRACT_NOTHROW;
+
+#ifdef DEBUG_DestroyedHandleValue
+    // Verify that we are not trying to access freed handle.
+    _ASSERTE("Attempt to access destroyed handle." && *(_UNCHECKED_OBJECTREF *)handle != DEBUG_DestroyedHandleValue);
+#endif
+#ifdef DACCESS_COMPILE
+    UNREFERENCED_PARAMETER(handle);
+#else
+    BEGIN_DEBUG_ONLY_CODE;
+    ADIndex id = HndGetHandleADIndex(handle);
+    AppDomain *pUnloadingDomain = SystemDomain::AppDomainBeingUnloaded();
+    if (!pUnloadingDomain || pUnloadingDomain->GetIndex() != id)
+    {
+        return;
+    }
+    if (!pUnloadingDomain->NoAccessToHandleTable())
+    {
+        return;
+    }
+    _ASSERTE (!"Access to a handle in unloaded domain is not allowed");
+    END_DEBUG_ONLY_CODE;
+#endif // !DACCESS_COMPILE
+}
+#endif
+
+
+#ifndef DACCESS_COMPILE
+/*
+ * HndDestroyHandle
+ *
+ * Entrypoint for freeing an individual handle.
+ *
+ */
+void HndDestroyHandle(HHANDLETABLE hTable, uint32_t uType, OBJECTHANDLE handle)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        SO_TOLERANT; 
+        CAN_TAKE_LOCK;     // because of TableFreeSingleHandleToCache
+    }
+    CONTRACTL_END;
+
+    STRESS_LOG2(LF_GC, LL_INFO1000, "DestroyHandle: *%p->%p\n", handle, *(_UNCHECKED_OBJECTREF *)handle);
+
+    FireEtwDestroyGCHandle((void*) handle, GetClrInstanceId());
+    FireEtwPrvDestroyGCHandle((void*) handle, GetClrInstanceId());
+
+    // sanity check handle we are being asked to free
+    _ASSERTE(handle);
+
+#ifdef _DEBUG
+    ValidateAppDomainForHandle(handle);
+#endif
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+#ifdef GC_PROFILING
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        g_profControlBlock.pProfInterface->HandleDestroyed((uintptr_t)handle);
+        END_PIN_PROFILER();
+    }        
+#endif //GC_PROFILING
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    g_dwHandles--;
+#endif // ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+    // sanity check the type index
+    _ASSERTE(uType < pTable->uTypeCount);
+
+    _ASSERTE(HandleFetchType(handle) == uType);
+
+    // return the handle to the table's cache
+    TableFreeSingleHandleToCache(pTable, uType, handle);
+}
+
+
+/*
+ * HndDestroyHandleOfUnknownType
+ *
+ * Entrypoint for freeing an individual handle whose type is unknown.
+ *
+ */
+void HndDestroyHandleOfUnknownType(HHANDLETABLE hTable, OBJECTHANDLE handle)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SO_TOLERANT;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    // sanity check handle we are being asked to free
+    _ASSERTE(handle);
+
+#ifdef FEATURE_COMINTEROP
+    // If we're being asked to destroy a WinRT weak handle, that will cause a leak
+    // of the IWeakReference* that it holds in its extra data. Instead of using this
+    // API use DestroyWinRTWeakHandle instead.
+    _ASSERTE(HandleFetchType(handle) != HNDTYPE_WEAK_WINRT);
+#endif // FEATURE_COMINTEROP
+
+    // fetch the type and then free normally
+    HndDestroyHandle(hTable, HandleFetchType(handle), handle);
+}
+
+
+/*
+ * HndCreateHandles
+ *
+ * Entrypoint for allocating handles in bulk.
+ *
+ */
+uint32_t HndCreateHandles(HHANDLETABLE hTable, uint32_t uType, OBJECTHANDLE *pHandles, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // sanity check the type index
+    _ASSERTE(uType < pTable->uTypeCount);
+
+    // keep track of the number of handles we've allocated
+    uint32_t uSatisfied = 0;
+
+    // if this is a large number of handles then bypass the cache
+    if (uCount > SMALL_ALLOC_COUNT)
+    {
+        CrstHolder ch(&pTable->Lock);
+
+        // allocate handles in bulk from the main handle table
+        uSatisfied = TableAllocBulkHandles(pTable, uType, pHandles, uCount);
+    }
+
+    // do we still need to get some handles?
+    if (uSatisfied < uCount)
+    {
+        // get some handles from the cache
+        uSatisfied += TableAllocHandlesFromCache(pTable, uType, pHandles + uSatisfied, uCount - uSatisfied);
+    }
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    g_dwHandles += uSatisfied;
+#endif // ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+#ifdef GC_PROFILING
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        for (uint32_t i = 0; i < uSatisfied; i++)
+            g_profControlBlock.pProfInterface->HandleCreated((uintptr_t)pHandles[i], 0);
+        END_PIN_PROFILER();
+    }
+#endif //GC_PROFILING
+
+    // return the number of handles we allocated
+    return uSatisfied;
+}
+
+
+/*
+ * HndDestroyHandles
+ *
+ * Entrypoint for freeing handles in bulk.
+ *
+ */
+void HndDestroyHandles(HHANDLETABLE hTable, uint32_t uType, const OBJECTHANDLE *pHandles, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+#ifdef _DEBUG
+    ValidateAppDomainForHandle(pHandles[0]);
+#endif
+    
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // sanity check the type index
+    _ASSERTE(uType < pTable->uTypeCount);
+
+#ifdef GC_PROFILING
+    {
+        BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+        for (uint32_t i = 0; i < uCount; i++)
+            g_profControlBlock.pProfInterface->HandleDestroyed((uintptr_t)pHandles[i]);
+        END_PIN_PROFILER();
+    }
+#endif
+
+#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
+    g_dwHandles -= uCount;
+#endif // ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
+
+    // is this a small number of handles?
+    if (uCount <= SMALL_ALLOC_COUNT)
+    {
+        // yes - free them via the handle cache
+        TableFreeHandlesToCache(pTable, uType, pHandles, uCount);
+        return;
+    }
+
+    // acquire the handle manager lock
+    {
+        CrstHolder ch(&pTable->Lock);
+    
+        // free the unsorted handles in bulk to the main handle table
+        TableFreeBulkUnpreparedHandles(pTable, uType, pHandles, uCount);
+    }
+}
+
+/*
+ * HndSetHandleExtraInfo
+ *
+ * Stores owner data with handle.
+ *
+ */
+void HndSetHandleExtraInfo(OBJECTHANDLE handle, uint32_t uType, uintptr_t lExtraInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the user data slot for this handle if we have the right type
+    uintptr_t *pUserData = HandleValidateAndFetchUserDataPointer(handle, uType);
+
+    // is there a slot?
+    if (pUserData)
+    {
+        // yes - store the info
+        *pUserData = lExtraInfo;
+    }
+}
+
+/*
+* HndCompareExchangeHandleExtraInfo
+*
+* Stores owner data with handle.
+*
+*/
+uintptr_t HndCompareExchangeHandleExtraInfo(OBJECTHANDLE handle, uint32_t uType, uintptr_t lOldExtraInfo, uintptr_t lNewExtraInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the user data slot for this handle if we have the right type
+    uintptr_t *pUserData = HandleValidateAndFetchUserDataPointer(handle, uType);
+
+    // is there a slot?
+    if (pUserData)
+    {
+        // yes - attempt to store the info
+        return (uintptr_t)Interlocked::CompareExchangePointer((void**)pUserData, (void*)lNewExtraInfo, (void*)lOldExtraInfo);
+    }
+
+    _ASSERTE(!"Shouldn't be trying to call HndCompareExchangeHandleExtraInfo on handle types without extra info");
+    return NULL;
+}
+#endif // !DACCESS_COMPILE
+
+/*
+ * HndGetHandleExtraInfo
+ *
+ * Retrieves owner data from handle.
+ *
+ */
+uintptr_t HndGetHandleExtraInfo(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // assume zero until we actually get it
+    uintptr_t lExtraInfo = 0L;
+
+    // fetch the user data slot for this handle
+    PTR_uintptr_t pUserData = HandleQuickFetchUserDataPointer(handle);
+
+    // if we did then copy the value
+    if (pUserData)
+    {
+        lExtraInfo = *(pUserData);
+    }
+
+    // return the value to our caller
+    return lExtraInfo;
+}
+
+/*
+ * HndGetHandleTable
+ *
+ * Returns the containing table of a handle.
+ * 
+ */
+HHANDLETABLE HndGetHandleTable(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+    SUPPORTS_DAC;
+
+    PTR_HandleTable pTable = HandleFetchHandleTable(handle);
+
+    return (HHANDLETABLE)pTable;
+}
+
+void HndLogSetEvent(OBJECTHANDLE handle, _UNCHECKED_OBJECTREF value)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+
+#if !defined(DACCESS_COMPILE) && defined(FEATURE_EVENT_TRACE)
+    if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, SetGCHandle) ||
+        ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_Context, SetGCHandle))
+    {
+        uint32_t hndType = HandleFetchType(handle);
+        ADIndex appDomainIndex = HndGetHandleADIndex(handle);   
+        AppDomain* pAppDomain = SystemDomain::GetAppDomainAtIndex(appDomainIndex);
+        uint32_t generation = value != 0 ? GCHeap::GetGCHeap()->WhichGeneration(value) : 0;
+        FireEtwSetGCHandle((void*) handle, value, hndType, generation, (int64_t) pAppDomain, GetClrInstanceId());
+        FireEtwPrvSetGCHandle((void*) handle, value, hndType, generation, (int64_t) pAppDomain, GetClrInstanceId());
+
+#ifndef FEATURE_REDHAWK
+        // Also fire the things pinned by Async pinned handles
+        if (hndType == HNDTYPE_ASYNCPINNED)
+        {
+            if (value->GetMethodTable() == g_pOverlappedDataClass)
+            {
+                OverlappedDataObject* overlapped = (OverlappedDataObject*) value;
+                if (overlapped->m_isArray)
+                {
+                    ArrayBase* pUserObject = (ArrayBase*)OBJECTREFToObject(overlapped->m_userObject);
+                    Object **ppObj = (Object**)pUserObject->GetDataPtr(TRUE);
+                    size_t num = pUserObject->GetNumComponents();
+                    for (size_t i = 0; i < num; i ++)
+                    {
+                        value = ppObj[i];
+                        uint32_t generation = value != 0 ? GCHeap::GetGCHeap()->WhichGeneration(value) : 0;
+                        FireEtwSetGCHandle(overlapped, value, HNDTYPE_PINNED, generation, (int64_t) pAppDomain, GetClrInstanceId());
+                    }
+                }
+                else
+                {
+                    value = OBJECTREF_TO_UNCHECKED_OBJECTREF(overlapped->m_userObject);
+                    uint32_t generation = value != 0 ? GCHeap::GetGCHeap()->WhichGeneration(value) : 0;
+                    FireEtwSetGCHandle(overlapped, value, HNDTYPE_PINNED, generation, (int64_t) pAppDomain, GetClrInstanceId());
+                }
+            }
+        }
+#endif // FEATURE_REDHAWK
+    }
+#else
+    UNREFERENCED_PARAMETER(handle);
+    UNREFERENCED_PARAMETER(value);
+#endif
+}
+
+/*
+ * HndWriteBarrier
+ *
+ * Resets the generation number for the handle's clump to zero.
+ *
+ */
+void HndWriteBarrier(OBJECTHANDLE handle, OBJECTREF objref)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+
+    // unwrap the objectref we were given
+    _UNCHECKED_OBJECTREF value = OBJECTREF_TO_UNCHECKED_OBJECTREF(objref);
+    
+    _ASSERTE (objref != NULL);
+
+    // find the write barrier for this handle
+    uint8_t *barrier = (uint8_t *)((uintptr_t)handle & HANDLE_SEGMENT_ALIGN_MASK);
+    
+    // sanity
+    _ASSERTE(barrier);
+    
+    // find the offset of this handle into the segment
+    uintptr_t offset = (uintptr_t)handle & HANDLE_SEGMENT_CONTENT_MASK;
+    
+    // make sure it is in the handle area and not the header
+    _ASSERTE(offset >= HANDLE_HEADER_SIZE);
+    
+    // compute the clump index for this handle
+    offset = (offset - HANDLE_HEADER_SIZE) / (HANDLE_SIZE * HANDLE_HANDLES_PER_CLUMP);
+
+    // Be careful to read and write the age byte via volatile operations. Otherwise the compiler has been
+    // observed to translate the read + conditional write sequence below into an unconditional read/write
+    // (utilizing a conditional register move to determine whether the write is an update or simply writes
+    // back what was read). This is a legal transformation for non-volatile accesses but obviously leads to a
+    // race condition where we can lose an update (see the comment below for the race condition).
+    volatile uint8_t * pClumpAge = barrier + offset;
+
+    // if this age is smaller than age of the clump, update the clump age
+    if (*pClumpAge != 0) // Perf optimization: if clumpAge is 0, nothing more to do
+    {
+        // find out generation
+        int generation = GCHeap::GetGCHeap()->WhichGeneration(value);
+        uint32_t uType = HandleFetchType(handle);
+
+#ifndef FEATURE_REDHAWK
+        //OverlappedData need special treatment: because all user data pointed by it needs to be reported by this handle,
+        //its age is consider to be min age of the user data, to be simple, we just make it 0
+        if (uType == HNDTYPE_ASYNCPINNED && objref->GetGCSafeMethodTable () == g_pOverlappedDataClass)
+        {
+            generation = 0;
+        }
+#endif // !FEATURE_REDHAWK
+        
+        if (uType == HNDTYPE_DEPENDENT)
+        {
+            generation = 0;
+        }
+
+        if (*pClumpAge > (uint8_t) generation)
+        {
+            // We have to be careful here. HndWriteBarrier is not under any synchronization
+            // Consider the scenario where 2 threads are hitting the line below at the same
+            // time. Only one will win. If the winner has an older age than the loser, we
+            // just created a potential GC hole  (The clump will not be reporting the 
+            // youngest handle in the clump, thus GC may skip the clump). To fix this
+            // we just set the clump age to 0, which means that whoever wins the race
+            // results are the same, as GC will always look at the clump
+            *pClumpAge = (uint8_t)0;
+        }
+    }
+}
+
+/*
+ * HndEnumHandles
+ *
+ * Enumerates all handles of the specified type in the handle table.
+ *
+ * This entrypoint is provided for utility code (debugger support etc) that
+ * needs to enumerate all roots in the handle table.
+ *
+ */
+void HndEnumHandles(HHANDLETABLE hTable, const uint32_t *puType, uint32_t uTypeCount,
+                    HANDLESCANPROC pfnEnum, uintptr_t lParam1, uintptr_t lParam2, bool fAsync)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    PTR_HandleTable pTable = Table(hTable);
+
+    // per-block scanning callback
+    BLOCKSCANPROC pfnBlock;
+
+    // do we need to support user data?
+    BOOL fEnumUserData = TypesRequireUserDataScanning(pTable, puType, uTypeCount);
+
+    if (fEnumUserData)
+    {
+        // scan all handles with user data
+        pfnBlock = BlockScanBlocksWithUserData;
+    }
+    else
+    {
+        // scan all handles without user data
+        pfnBlock = BlockScanBlocksWithoutUserData;
+    }
+
+    // set up parameters for handle enumeration
+    ScanCallbackInfo info;
+
+    info.uFlags          = (fAsync? HNDGCF_ASYNC : HNDGCF_NORMAL);
+    info.fEnumUserData   = fEnumUserData;
+    info.dwAgeMask       = 0;
+    info.pCurrentSegment = NULL;
+    info.pfnScan         = pfnEnum;
+    info.param1          = lParam1;
+    info.param2          = lParam2;
+
+    // choose a scanning method based on the async flag
+    TABLESCANPROC pfnScanTable = TableScanHandles;
+    if (fAsync)
+        pfnScanTable = xxxTableScanHandlesAsync;
+
+    {
+        // acquire the handle manager lock
+        CrstHolderWithState ch(&pTable->Lock);
+
+        // scan the table
+        pfnScanTable(pTable, puType, uTypeCount, FullSegmentIterator, pfnBlock, &info, &ch);
+    }
+}
+
+/*
+ * HndScanHandlesForGC
+ *
+ * Multiple type scanning entrypoint for GC.
+ *
+ * This entrypoint is provided for GC-time scnas of the handle table ONLY.  It
+ * enables ephemeral scanning of the table, and optionally ages the write barrier
+ * as it scans.
+ *
+ */
+void HndScanHandlesForGC(HHANDLETABLE hTable, HANDLESCANPROC scanProc, uintptr_t param1, uintptr_t param2,
+                         const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the table pointer
+    PTR_HandleTable pTable = Table(hTable);
+
+    // per-segment and per-block callbacks
+    SEGMENTITERATOR pfnSegment;
+    BLOCKSCANPROC pfnBlock = NULL;
+
+    // do we need to support user data?
+    BOOL enumUserData =
+        ((flags & HNDGCF_EXTRAINFO) &&
+        TypesRequireUserDataScanning(pTable, types, typeCount));
+
+    // what type of GC are we performing?
+    if (condemned >= maxgen)
+    {
+        // full GC - use our full-service segment iterator
+        pfnSegment = FullSegmentIterator;
+
+        // see if there is a callback
+        if (scanProc)
+        {
+            // do we need to scan blocks with user data?
+            if (enumUserData)
+            {
+                // scan all with user data
+                pfnBlock = BlockScanBlocksWithUserData;
+            }
+            else
+            {
+                // scan all without user data
+                pfnBlock = BlockScanBlocksWithoutUserData;
+            }
+        }
+        else if (flags & HNDGCF_AGE)
+        {
+            // there is only aging to do
+            pfnBlock = BlockAgeBlocks;
+        }
+    }
+    else
+    {
+        // this is an ephemeral GC - is it g0?
+        if (condemned == 0)
+        {
+            // yes - do bare-bones enumeration
+            pfnSegment = QuickSegmentIterator;
+        }
+        else
+        {
+            // no - do normal enumeration
+            pfnSegment = StandardSegmentIterator;
+        }
+
+        // see if there is a callback
+        if (scanProc)
+        {
+            // there is a scan callback - scan the condemned generation
+            pfnBlock = BlockScanBlocksEphemeral;
+        }
+#ifndef DACCESS_COMPILE
+        else if (flags & HNDGCF_AGE)
+        {
+            // there is only aging to do
+            pfnBlock = BlockAgeBlocksEphemeral;
+        }
+#endif
+    }
+
+    // set up parameters for scan callbacks
+    ScanCallbackInfo info;
+
+    info.uFlags          = flags;
+    info.fEnumUserData   = enumUserData;
+    info.dwAgeMask       = BuildAgeMask(condemned, maxgen);
+    info.pCurrentSegment = NULL;
+    info.pfnScan         = scanProc;
+    info.param1          = param1;
+    info.param2          = param2;
+
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+    info.DEBUG_BlocksScanned                = 0;
+    info.DEBUG_BlocksScannedNonTrivially    = 0;
+    info.DEBUG_HandleSlotsScanned           = 0;
+    info.DEBUG_HandlesActuallyScanned       = 0;
+#endif
+
+    // choose a scanning method based on the async flag
+    TABLESCANPROC pfnScanTable = TableScanHandles;
+    if (flags & HNDGCF_ASYNC)
+    {
+        pfnScanTable = xxxTableScanHandlesAsync;
+    }
+
+    {
+        // lock the table down for concurrent GC only
+        CrstHolderWithState ch(&pTable->Lock, (flags & HNDGCF_ASYNC) != 0);
+
+        // perform the scan
+        pfnScanTable(pTable, types, typeCount, pfnSegment, pfnBlock, &info, &ch);
+
+#if defined(_DEBUG) && !defined(DACCESS_COMPILE)
+        // update our scanning statistics for this generation
+        DEBUG_PostGCScanHandler(pTable, types, typeCount, condemned, maxgen, &info);
+    #endif
+    }
+}
+
+#ifndef DACCESS_COMPILE
+
+
+/*
+ * HndResetAgeMap
+ *
+ * Service to forceably reset the age map for a set of handles.
+ *
+ * Provided for GC-time resetting the handle table's write barrier.  This is not
+ * normally advisable, as it increases the amount of work that will be done in
+ * subsequent scans.  Under some circumstances, however, this is precisely what is
+ * desired.  Generally this entrypoint should only be used under some exceptional
+ * condition during garbage collection, like objects being demoted from a higher
+ * generation to a lower one.
+ *
+ */
+void HndResetAgeMap(HHANDLETABLE hTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // set up parameters for scan callbacks
+    ScanCallbackInfo info;
+
+    info.uFlags          = flags;
+    info.fEnumUserData   = FALSE;
+    info.dwAgeMask       = BuildAgeMask(condemned, maxgen);
+    info.pCurrentSegment = NULL;
+    info.pfnScan         = NULL;
+    info.param1          = 0;
+    info.param2          = 0;
+
+    {
+        // lock the table down
+        CrstHolderWithState ch(&pTable->Lock);
+
+        // perform the scan
+        TableScanHandles(pTable, types, typeCount, QuickSegmentIterator, BlockResetAgeMapForBlocks, &info, &ch);
+    }
+}
+
+
+/*
+ * HndVerifyTable
+ *
+ * Service to check the correctness of the handle table for a set of handles
+ *
+ * Provided for checking the correctness of handle table and the gc.
+ * Will validate that each handle points to a valid object.
+ * Will also validate that the generation of the handle is <= generation of the object.
+ * Cannot have == because the handle table only remembers the generation for a group of
+ * 16 handles.
+ *
+ */
+void HndVerifyTable(HHANDLETABLE hTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the table pointer
+    HandleTable *pTable = Table(hTable);
+
+    // set up parameters for scan callbacks
+    ScanCallbackInfo info;
+
+    info.uFlags          = flags;
+    info.fEnumUserData   = FALSE;
+    info.dwAgeMask       = BuildAgeMask(condemned, maxgen);
+    info.pCurrentSegment = NULL;
+    info.pfnScan         = NULL;
+    info.param1          = 0;
+    info.param2          = 0;
+
+    {
+        // lock the table down
+        CrstHolderWithState ch(&pTable->Lock);
+
+        // perform the scan
+        TableScanHandles(pTable, types, typeCount, QuickSegmentIterator, BlockVerifyAgeMapForBlocks, &info, &ch);
+    }
+}
+
+
+/*
+ * HndNotifyGcCycleComplete
+ *
+ * Informs the handle table that a GC has completed.
+ *
+ */
+void HndNotifyGcCycleComplete(HHANDLETABLE hTable, uint32_t condemned, uint32_t maxgen)
+{
+#ifdef _DEBUG
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+
+    {
+        // lock the table down
+        CrstHolder ch(&pTable->Lock);
+
+        // if this was a full GC then dump a cumulative log of scanning stats
+        if (condemned >= maxgen)
+            DEBUG_LogScanningStatistics(pTable, LL_INFO10);
+    }
+#else
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(hTable);
+    UNREFERENCED_PARAMETER(condemned);
+    UNREFERENCED_PARAMETER(maxgen);
+#endif
+}
+
+extern int getNumberOfSlots();
+
+
+/*
+ * HndCountHandles
+ *
+ * Counts the number of handles owned by the handle table that are marked as
+ * "used" that are not currently residing in the handle table's cache.
+ *
+ * Provided to compute the correct value for the GC Handle perfcounter.
+ * The caller is responsible for acquiring the handle table's lock if
+ * it is necessary.
+ *
+ */
+uint32_t HndCountHandles(HHANDLETABLE hTable)
+{
+    WRAPPER_NO_CONTRACT;
+    // fetch the handle table pointer
+    HandleTable *pTable = Table(hTable);
+    
+    // initialize the count of handles in the cache to 0
+    uint32_t uCacheCount = 0;
+
+    // fetch the count of handles marked as "used"
+    uint32_t uCount = pTable->dwCount;
+
+    // loop through the main cache for each handle type
+    HandleTypeCache *pCache = pTable->rgMainCache;
+    HandleTypeCache *pCacheEnd = pCache + pTable->uTypeCount;
+    for (; pCache != pCacheEnd; ++pCache)
+    {
+        // get relevant indexes for the reserve bank and the free bank
+        int32_t lFreeIndex = pCache->lFreeIndex;
+        int32_t lReserveIndex = pCache->lReserveIndex;
+
+        // clamp the min free index and min reserve index to be non-negative;
+        // this is necessary since interlocked operations can set these variables
+        // to negative values, and once negative they stay negative until the
+        // cache is rebalanced
+        if (lFreeIndex < 0) lFreeIndex = 0;
+        if (lReserveIndex < 0) lReserveIndex = 0;
+
+        // compute the number of handles
+        uint32_t uHandleCount = (uint32_t)lReserveIndex + (HANDLES_PER_CACHE_BANK - (uint32_t)lFreeIndex);
+
+        // add the number of handles to the total handle count and update
+        // dwCount in this HandleTable
+        uCacheCount += uHandleCount;
+    }
+
+    // it is not necessary to have the lock while reading the quick cache;
+    // loop through the quick cache for each handle type
+    OBJECTHANDLE * pQuickCache = pTable->rgQuickCache;
+    OBJECTHANDLE * pQuickCacheEnd = pQuickCache + HANDLE_MAX_INTERNAL_TYPES;
+    for (; pQuickCache != pQuickCacheEnd; ++pQuickCache)
+        if (*pQuickCache)
+            ++uCacheCount;
+
+    // return the number of handles marked as "used" that are not
+    // residing in the cache
+    return (uCount - uCacheCount);
+}
+
+
+/*
+ * HndCountAllHandles
+ *
+ * Counts the total number of handles that are marked as "used" that are not 
+ * currently residing in some handle table's cache.
+ *
+ * Provided to compute the correct value for the GC Handle perfcounter.
+ * The 'fUseLocks' flag specifies whether to acquire each handle table's lock 
+ * while its handles are being counted.
+ *
+ */
+uint32_t HndCountAllHandles(BOOL fUseLocks)
+{
+    uint32_t uCount = 0;
+    int offset = 0;
+    
+    // get number of HandleTables per HandleTableBucket
+    int n_slots = getNumberOfSlots();
+
+    // fetch the pointer to the head of the list
+    struct HandleTableMap * walk = &g_HandleTableMap;
+
+    // walk the list
+    while (walk)
+    {
+        int nextOffset = walk->dwMaxIndex;
+        int max = nextOffset - offset;
+        PTR_PTR_HandleTableBucket pBucket = walk->pBuckets;
+        PTR_PTR_HandleTableBucket pLastBucket = pBucket + max;
+
+        // loop through each slot in this node
+        for (; pBucket != pLastBucket; ++pBucket)
+        {
+            // if there is a HandleTableBucket in this slot
+            if (*pBucket)
+            {
+                // loop through the HandleTables inside this HandleTableBucket,
+                // and accumulate the handle count of each HandleTable
+                HHANDLETABLE * pTable = (*pBucket)->pTable;
+                HHANDLETABLE * pLastTable = pTable + n_slots;
+
+                // if the 'fUseLocks' flag is set, acquire the lock for this handle table before 
+                // calling HndCountHandles() - this will prevent dwCount from being modified and 
+                // it will also prevent any of the main caches from being rebalanced
+                if (fUseLocks)
+                    for (; pTable != pLastTable; ++pTable)
+                    {                   
+                        CrstHolder ch(&(Table(*pTable)->Lock));
+                        uCount += HndCountHandles(*pTable);
+                    }
+                else
+                    for (; pTable != pLastTable; ++pTable)
+                        uCount += HndCountHandles(*pTable);
+            }
+        }
+
+        offset = nextOffset;
+        walk = walk->pNext;
+    }
+
+    //return the total number of handles in all HandleTables
+    return uCount;
+}
+
+#ifndef FEATURE_REDHAWK
+BOOL  Ref_HandleAsyncPinHandles()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        if (GetThread()) {MODE_COOPERATIVE;} else {DISABLED(MODE_COOPERATIVE);}
+    }
+    CONTRACTL_END;
+
+    HandleTableBucket *pBucket = g_HandleTableMap.pBuckets[0];
+    BOOL result = FALSE;
+    int limit = getNumberOfSlots();
+    for (int n = 0; n < limit; n ++ )
+    {
+        if (TableHandleAsyncPinHandles(Table(pBucket->pTable[n])))
+        {
+            result = TRUE;
+        }
+    }
+
+    return result;
+}
+
+void  Ref_RelocateAsyncPinHandles(HandleTableBucket *pSource, HandleTableBucket *pTarget)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_TRIGGERS;
+    }
+    CONTRACTL_END;
+
+    int limit = getNumberOfSlots();
+    for (int n = 0; n < limit; n ++ )
+    {
+        TableRelocateAsyncPinHandles(Table(pSource->pTable[n]), Table(pTarget->pTable[n]));
+    }
+}
+#endif // !FEATURE_REDHAWK
+
+BOOL Ref_ContainHandle(HandleTableBucket *pBucket, OBJECTHANDLE handle)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+    }
+    CONTRACTL_END;
+
+    int limit = getNumberOfSlots();
+    for (int n = 0; n < limit; n ++ )
+    {
+        if (TableContainHandle(Table(pBucket->pTable[n]), handle))
+            return TRUE;
+    }
+
+    return FALSE;
+}
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * DEBUG SCANNING STATISTICS
+ *
+ ****************************************************************************/
+#ifdef _DEBUG
+
+void DEBUG_PostGCScanHandler(HandleTable *pTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, ScanCallbackInfo *info)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(types);
+
+    // looks like the GC supports more generations than we expected
+    _ASSERTE(condemned < MAXSTATGEN);
+
+    // remember the highest generation we've seen
+    if (pTable->_DEBUG_iMaxGen < (int)condemned)
+        pTable->_DEBUG_iMaxGen = (int)condemned;
+
+    // update the statistics
+    pTable->_DEBUG_TotalBlocksScanned                [condemned] += info->DEBUG_BlocksScanned;
+    pTable->_DEBUG_TotalBlocksScannedNonTrivially    [condemned] += info->DEBUG_BlocksScannedNonTrivially;
+    pTable->_DEBUG_TotalHandleSlotsScanned           [condemned] += info->DEBUG_HandleSlotsScanned;
+    pTable->_DEBUG_TotalHandlesActuallyScanned       [condemned] += info->DEBUG_HandlesActuallyScanned;
+
+    // if this is an ephemeral GC then dump ephemeral stats for this scan right now
+    if (condemned < maxgen)
+    {
+        // dump a header for the stats with the condemned generation number
+        LOG((LF_GC, LL_INFO1000, "--------------------------------------------------------------\n"));
+        LOG((LF_GC, LL_INFO1000, "Ephemeral Handle Scan Summary:\n"));
+        LOG((LF_GC, LL_INFO1000, "    Generation            = %u\n", condemned));
+
+        // dump the handle types we were asked to scan
+        LOG((LF_GC, LL_INFO1000, "    Handle Type(s)        = %u", *types));
+        for (uint32_t u = 1; u < typeCount; u++)
+            LOG((LF_GC, LL_INFO1000, ",%u", types[u]));
+        LOG((LF_GC, LL_INFO1000,  "\n"));
+
+        // dump the number of blocks and slots we scanned
+        uint32_t blockHandles = info->DEBUG_BlocksScanned * HANDLE_HANDLES_PER_BLOCK;
+        LOG((LF_GC, LL_INFO1000, "    Blocks Scanned        = %u (%u slots)\n", info->DEBUG_BlocksScanned, blockHandles));
+
+        // if we scanned any blocks then summarize some stats
+        if (blockHandles)
+        {
+            uint32_t nonTrivialBlockHandles = info->DEBUG_BlocksScannedNonTrivially * HANDLE_HANDLES_PER_BLOCK;
+            LOG((LF_GC, LL_INFO1000, "    Blocks Examined       = %u (%u slots)\n", info->DEBUG_BlocksScannedNonTrivially, nonTrivialBlockHandles));
+
+            LOG((LF_GC, LL_INFO1000, "    Slots Scanned         = %u\n", info->DEBUG_HandleSlotsScanned));
+            LOG((LF_GC, LL_INFO1000, "    Handles Scanned       = %u\n", info->DEBUG_HandlesActuallyScanned));
+
+            double scanRatio = ((double)info->DEBUG_HandlesActuallyScanned / (double)blockHandles) * 100.0;
+
+            LOG((LF_GC, LL_INFO1000, "    Handle Scanning Ratio = %1.1lf%%\n", scanRatio));
+        }
+
+        // dump a footer for the stats
+        LOG((LF_GC, LL_INFO1000, "--------------------------------------------------------------\n"));
+    }
+}
+
+void DEBUG_LogScanningStatistics(HandleTable *pTable, uint32_t level)
+{
+    WRAPPER_NO_CONTRACT;
+    UNREFERENCED_PARAMETER(level);
+
+    // have we done any GC's yet?
+    if (pTable->_DEBUG_iMaxGen >= 0)
+    {
+        // dump a header for the stats
+        LOG((LF_GC, level, "\n==============================================================\n"));
+        LOG((LF_GC, level, " Cumulative Handle Scan Summary:\n"));
+
+        // for each generation we've collected,  dump the current stats
+        for (int i = 0; i <= pTable->_DEBUG_iMaxGen; i++)
+        {
+            int64_t totalBlocksScanned = pTable->_DEBUG_TotalBlocksScanned[i];
+
+            // dump the generation number and the number of blocks scanned
+            LOG((LF_GC, level,     "--------------------------------------------------------------\n"));
+            LOG((LF_GC, level,     "    Condemned Generation      = %d\n", i));
+            LOG((LF_GC, level,     "    Blocks Scanned            = %I64u\n", totalBlocksScanned));
+
+            // if we scanned any blocks in this generation then dump some interesting numbers
+            if (totalBlocksScanned)
+            {
+                LOG((LF_GC, level, "    Blocks Examined           = %I64u\n", pTable->_DEBUG_TotalBlocksScannedNonTrivially[i]));
+                LOG((LF_GC, level, "    Slots Scanned             = %I64u\n", pTable->_DEBUG_TotalHandleSlotsScanned       [i]));
+                LOG((LF_GC, level, "    Handles Scanned           = %I64u\n", pTable->_DEBUG_TotalHandlesActuallyScanned   [i]));
+
+                double blocksScanned  = (double) totalBlocksScanned;
+                double blocksExamined = (double) pTable->_DEBUG_TotalBlocksScannedNonTrivially[i];
+                double slotsScanned   = (double) pTable->_DEBUG_TotalHandleSlotsScanned       [i];
+                double handlesScanned = (double) pTable->_DEBUG_TotalHandlesActuallyScanned   [i];
+                double totalSlots     = (double) (totalBlocksScanned * HANDLE_HANDLES_PER_BLOCK);
+
+                LOG((LF_GC, level, "    Block Scan Ratio          = %1.1lf%%\n", (100.0 * (blocksExamined / blocksScanned)) ));
+                LOG((LF_GC, level, "    Clump Scan Ratio          = %1.1lf%%\n", (100.0 * (slotsScanned   / totalSlots))    ));
+                LOG((LF_GC, level, "    Scanned Clump Saturation  = %1.1lf%%\n", (100.0 * (handlesScanned / slotsScanned))  ));
+                LOG((LF_GC, level, "    Overall Handle Scan Ratio = %1.1lf%%\n", (100.0 * (handlesScanned / totalSlots))    ));
+            }
+        }
+
+        // dump a footer for the stats
+        LOG((LF_GC, level, "==============================================================\n\n"));
+    }
+}
+
+#endif // _DEBUG
+#endif // !DACCESS_COMPILE
+
+
+/*--------------------------------------------------------------------------*/
+
+
diff --git a/src/gc/handletable.h b/src/gc/handletable.h
new file mode 100644
index 0000000000..bbb8b1db22
--- /dev/null
+++ b/src/gc/handletable.h
@@ -0,0 +1,254 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Entrypoint Header.
+ *
+ * Implements generic support for external handles into a GC heap.
+ *
+
+ *
+ */
+
+#ifndef _HANDLETABLE_H
+#define _HANDLETABLE_H
+
+
+/****************************************************************************
+ *
+ * FLAGS, CONSTANTS AND DATA TYPES
+ *
+ ****************************************************************************/
+
+#ifdef _DEBUG
+#define DEBUG_DestroyedHandleValue ((_UNCHECKED_OBJECTREF)0x7)
+#endif
+
+/*
+ * handle flags used by HndCreateHandleTable
+ */
+#define HNDF_NORMAL         (0x00)
+#define HNDF_EXTRAINFO      (0x01)
+
+/*
+ * handle to handle table
+ */
+typedef DPTR(struct HandleTable) PTR_HandleTable;
+typedef DPTR(PTR_HandleTable) PTR_PTR_HandleTable;
+typedef PTR_HandleTable HHANDLETABLE;
+typedef PTR_PTR_HandleTable PTR_HHANDLETABLE;
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * PUBLIC ROUTINES AND MACROS
+ *
+ ****************************************************************************/
+#ifndef DACCESS_COMPILE
+/*
+ * handle manager init and shutdown routines
+ */
+HHANDLETABLE    HndCreateHandleTable(const uint32_t *pTypeFlags, uint32_t uTypeCount, ADIndex uADIndex);
+void            HndDestroyHandleTable(HHANDLETABLE hTable);
+#endif // !DACCESS_COMPILE
+
+/*
+ * retrieve index stored in table at creation 
+ */
+void            HndSetHandleTableIndex(HHANDLETABLE hTable, uint32_t uTableIndex);
+uint32_t        HndGetHandleTableIndex(HHANDLETABLE hTable);
+ADIndex         HndGetHandleTableADIndex(HHANDLETABLE hTable);
+ADIndex         HndGetHandleADIndex(OBJECTHANDLE handle);
+
+#ifndef DACCESS_COMPILE
+/*
+ * individual handle allocation and deallocation
+ */
+OBJECTHANDLE    HndCreateHandle(HHANDLETABLE hTable, uint32_t uType, OBJECTREF object, uintptr_t lExtraInfo = 0);
+void            HndDestroyHandle(HHANDLETABLE hTable, uint32_t uType, OBJECTHANDLE handle);
+
+void            HndDestroyHandleOfUnknownType(HHANDLETABLE hTable, OBJECTHANDLE handle);
+
+/*
+ * bulk handle allocation and deallocation
+ */
+uint32_t        HndCreateHandles(HHANDLETABLE hTable, uint32_t uType, OBJECTHANDLE *pHandles, uint32_t uCount);
+void            HndDestroyHandles(HHANDLETABLE hTable, uint32_t uType, const OBJECTHANDLE *pHandles, uint32_t uCount);
+
+/*
+ * owner data associated with handles
+ */
+void            HndSetHandleExtraInfo(OBJECTHANDLE handle, uint32_t uType, uintptr_t lExtraInfo);
+uintptr_t          HndCompareExchangeHandleExtraInfo(OBJECTHANDLE handle, uint32_t uType, uintptr_t lOldExtraInfo, uintptr_t lNewExtraInfo);
+#endif // !DACCESS_COMPILE
+
+uintptr_t          HndGetHandleExtraInfo(OBJECTHANDLE handle);
+
+/*
+ * get parent table of handle
+ */
+HHANDLETABLE    HndGetHandleTable(OBJECTHANDLE handle);
+
+/*
+ * write barrier
+ */
+void            HndWriteBarrier(OBJECTHANDLE handle, OBJECTREF value);
+
+/* 
+ * logging an ETW event (for inlined methods)
+ */
+void            HndLogSetEvent(OBJECTHANDLE handle, _UNCHECKED_OBJECTREF value);
+
+ /*
+  * Scanning callback.
+  */
+typedef void (CALLBACK *HANDLESCANPROC)(PTR_UNCHECKED_OBJECTREF pref, uintptr_t *pExtraInfo, uintptr_t param1, uintptr_t param2);
+
+/*
+ * NON-GC handle enumeration
+ */
+void HndEnumHandles(HHANDLETABLE hTable, const uint32_t *puType, uint32_t uTypeCount,
+                    HANDLESCANPROC pfnEnum, uintptr_t lParam1, uintptr_t lParam2, bool fAsync);
+
+/*
+ * GC-time handle scanning
+ */
+#define HNDGCF_NORMAL       (0x00000000)    // normal scan
+#define HNDGCF_AGE          (0x00000001)    // age handles while scanning
+#define HNDGCF_ASYNC        (0x00000002)    // drop the table lock while scanning
+#define HNDGCF_EXTRAINFO    (0x00000004)    // iterate per-handle data while scanning
+
+
+void            HndScanHandlesForGC(HHANDLETABLE hTable,
+                                    HANDLESCANPROC scanProc,
+                                    uintptr_t param1,
+                                    uintptr_t param2,
+                                    const uint32_t *types,
+                                    uint32_t typeCount,
+                                    uint32_t condemned,
+                                    uint32_t maxgen,
+                                    uint32_t flags);
+
+void            HndResetAgeMap(HHANDLETABLE hTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, uint32_t flags);
+void            HndVerifyTable(HHANDLETABLE hTable, const uint32_t *types, uint32_t typeCount, uint32_t condemned, uint32_t maxgen, uint32_t flags);
+
+void            HndNotifyGcCycleComplete(HHANDLETABLE hTable, uint32_t condemned, uint32_t maxgen);
+
+/*
+ * Handle counting
+ */
+
+uint32_t        HndCountHandles(HHANDLETABLE hTable);
+uint32_t        HndCountAllHandles(BOOL fUseLocks);
+
+/*--------------------------------------------------------------------------*/
+
+
+#if defined(USE_CHECKED_OBJECTREFS) && !defined(_NOVM)
+#define OBJECTREF_TO_UNCHECKED_OBJECTREF(objref)    (*((_UNCHECKED_OBJECTREF*)&(objref)))
+#define UNCHECKED_OBJECTREF_TO_OBJECTREF(obj)       (OBJECTREF(obj))
+#else
+#define OBJECTREF_TO_UNCHECKED_OBJECTREF(objref)    (objref)
+#define UNCHECKED_OBJECTREF_TO_OBJECTREF(obj)       (obj)
+#endif
+
+#ifdef _DEBUG_IMPL
+void ValidateAssignObjrefForHandle(OBJECTREF, ADIndex appDomainIndex);
+void ValidateFetchObjrefForHandle(OBJECTREF, ADIndex appDomainIndex);
+void ValidateAppDomainForHandle(OBJECTHANDLE handle);
+#endif
+
+/*
+ * handle assignment
+ */
+void HndAssignHandle(OBJECTHANDLE handle, OBJECTREF objref);
+
+/*
+ * interlocked-exchange assignment
+ */
+void* HndInterlockedCompareExchangeHandle(OBJECTHANDLE handle, OBJECTREF objref, OBJECTREF oldObjref);
+
+/*
+ * Note that HndFirstAssignHandle is similar to HndAssignHandle, except that it only
+ * succeeds if transitioning from NULL to non-NULL.  In other words, if this handle
+ * is being initialized for the first time.
+ */
+BOOL HndFirstAssignHandle(OBJECTHANDLE handle, OBJECTREF objref);
+
+/*
+ * inline handle dereferencing
+ */
+
+FORCEINLINE OBJECTREF HndFetchHandle(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // sanity
+    _ASSERTE(handle);
+
+#ifdef _DEBUG_IMPL
+    _ASSERTE("Attempt to access destroyed handle." && *(_UNCHECKED_OBJECTREF *)handle != DEBUG_DestroyedHandleValue);
+
+    // Make sure the objref for handle is valid
+    ValidateFetchObjrefForHandle(ObjectToOBJECTREF(*(Object **)handle), 
+                            HndGetHandleTableADIndex(HndGetHandleTable(handle)));
+#endif // _DEBUG_IMPL
+
+    // wrap the raw objectref and return it
+    return UNCHECKED_OBJECTREF_TO_OBJECTREF(*PTR_UNCHECKED_OBJECTREF(handle));
+}
+
+
+/*
+ * inline null testing (needed in certain cases where we're in the wrong GC mod)
+ */
+FORCEINLINE BOOL HndIsNull(OBJECTHANDLE handle)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // sanity
+    _ASSERTE(handle);
+
+    return NULL == *(Object **)handle;
+}
+
+
+
+/*
+ * inline handle checking
+ */
+FORCEINLINE BOOL HndCheckForNullUnchecked(OBJECTHANDLE handle)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return (handle == NULL || (*(_UNCHECKED_OBJECTREF *)handle) == NULL);
+}
+
+
+/*
+ *
+ * Checks handle value for null or special value used for free handles in cache.
+ *
+ */
+FORCEINLINE BOOL HndIsNullOrDestroyedHandle(_UNCHECKED_OBJECTREF value)
+{
+    LIMITED_METHOD_CONTRACT;
+
+#ifdef DEBUG_DestroyedHandleValue
+    if (value == DEBUG_DestroyedHandleValue)
+         return TRUE;
+#endif
+
+    return (value == NULL);
+}
+
+/*--------------------------------------------------------------------------*/
+
+#include "handletable.inl"
+
+#endif //_HANDLETABLE_H
+
diff --git a/src/gc/handletable.inl b/src/gc/handletable.inl
new file mode 100644
index 0000000000..ae815c129b
--- /dev/null
+++ b/src/gc/handletable.inl
@@ -0,0 +1,120 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+//
+
+#ifndef _HANDLETABLE_INL
+#define _HANDLETABLE_INL
+
+inline void HndAssignHandle(OBJECTHANDLE handle, OBJECTREF objref)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SO_TOLERANT;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    // sanity
+    _ASSERTE(handle);
+
+#ifdef _DEBUG_IMPL
+    // handle should not be in unloaded domain
+    ValidateAppDomainForHandle(handle);
+
+    // Make sure the objref is valid before it is assigned to a handle
+    ValidateAssignObjrefForHandle(objref, HndGetHandleTableADIndex(HndGetHandleTable(handle)));
+#endif
+    // unwrap the objectref we were given
+    _UNCHECKED_OBJECTREF value = OBJECTREF_TO_UNCHECKED_OBJECTREF(objref);
+
+    HndLogSetEvent(handle, value);
+
+    // if we are doing a non-NULL pointer store then invoke the write-barrier
+    if (value)
+        HndWriteBarrier(handle, objref);
+
+    // store the pointer
+    *(_UNCHECKED_OBJECTREF *)handle = value;
+}
+
+inline void* HndInterlockedCompareExchangeHandle(OBJECTHANDLE handle, OBJECTREF objref, OBJECTREF oldObjref)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // sanity
+    _ASSERTE(handle);
+
+#ifdef _DEBUG_IMPL
+    // handle should not be in unloaded domain
+    ValidateAppDomainForHandle(handle);
+
+    // Make sure the objref is valid before it is assigned to a handle
+    ValidateAssignObjrefForHandle(objref, HndGetHandleTableADIndex(HndGetHandleTable(handle)));
+#endif
+    // unwrap the objectref we were given
+    _UNCHECKED_OBJECTREF value = OBJECTREF_TO_UNCHECKED_OBJECTREF(objref);
+    _UNCHECKED_OBJECTREF oldValue = OBJECTREF_TO_UNCHECKED_OBJECTREF(oldObjref);
+
+    // if we are doing a non-NULL pointer store then invoke the write-barrier
+    if (value)
+        HndWriteBarrier(handle, objref);
+
+    // store the pointer
+    
+    void* ret = Interlocked::CompareExchangePointer(reinterpret_cast<_UNCHECKED_OBJECTREF volatile*>(handle), value, oldValue);
+
+    if (ret == oldValue)
+        HndLogSetEvent(handle, value);
+
+    return ret;
+}
+
+inline BOOL HndFirstAssignHandle(OBJECTHANDLE handle, OBJECTREF objref)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SO_TOLERANT;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    // sanity
+    _ASSERTE(handle);
+
+#ifdef _DEBUG_IMPL
+    // handle should not be in unloaded domain
+    ValidateAppDomainForHandle(handle);
+
+    // Make sure the objref is valid before it is assigned to a handle
+    ValidateAssignObjrefForHandle(objref, HndGetHandleTableADIndex(HndGetHandleTable(handle)));
+#endif
+    // unwrap the objectref we were given
+    _UNCHECKED_OBJECTREF value = OBJECTREF_TO_UNCHECKED_OBJECTREF(objref);
+    _UNCHECKED_OBJECTREF null = NULL;
+
+    // store the pointer if we are the first ones here
+    BOOL success = (NULL == Interlocked::CompareExchangePointer(reinterpret_cast<_UNCHECKED_OBJECTREF volatile*>(handle),
+                                                                value,
+                                                                null));
+
+    // if we successfully did a non-NULL pointer store then invoke the write-barrier
+    if (success)
+    {
+        if (value)
+            HndWriteBarrier(handle, objref);
+
+        HndLogSetEvent(handle, value);
+    }
+
+    // return our result
+    return success;
+}
+
+#endif // _HANDLETABLE_INL
diff --git a/src/gc/handletablecache.cpp b/src/gc/handletablecache.cpp
new file mode 100644
index 0000000000..b2af40c829
--- /dev/null
+++ b/src/gc/handletablecache.cpp
@@ -0,0 +1,881 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Handle Caching Routines.
+ *
+ * Implementation of handle table allocation cache.
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "handletablepriv.h"
+
+/****************************************************************************
+ *
+ * RANDOM HELPERS
+ *
+ ****************************************************************************/
+
+/*
+ * SpinUntil
+ *
+ * Spins on a variable until its state matches a desired state.
+ *
+ * This routine will assert if it spins for a very long time.
+ *
+ */
+void SpinUntil(void *pCond, BOOL fNonZero)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // if we have to sleep then we will keep track of a sleep period
+    uint32_t dwThisSleepPeriod = 1;    // first just give up our timeslice
+    uint32_t dwNextSleepPeriod = 10;   // next try a real delay
+
+#ifdef _DEBUG
+    uint32_t dwTotalSlept = 0;
+    uint32_t dwNextComplain = 1000;
+#endif //_DEBUG
+
+    // on MP machines, allow ourselves some spin time before sleeping
+    uint32_t uNonSleepSpins = 8 * (g_SystemInfo.dwNumberOfProcessors - 1);
+
+    // spin until the specificed condition is met
+    while ((*(uintptr_t *)pCond != 0) != (fNonZero != 0))
+    {
+        // have we exhausted the non-sleep spin count?
+        if (!uNonSleepSpins)
+        {
+#ifdef _DEBUG
+            // yes, missed again - before sleeping, check our current sleep time
+            if (dwTotalSlept >= dwNextComplain)
+            {
+                //
+                // THIS SHOULD NOT NORMALLY HAPPEN
+                //
+                // The only time this assert can be ignored is if you have
+                // another thread intentionally suspended in a way that either
+                // directly or indirectly leaves a thread suspended in the
+                // handle table while the current thread (this assert) is
+                // running normally.
+                //
+                // Otherwise, this assert should be investigated as a bug.
+                //
+                _ASSERTE(FALSE);
+
+                // slow down the assert rate so people can investigate
+                dwNextComplain = 3 * dwNextComplain;
+            }
+
+            // now update our total sleep time
+            dwTotalSlept += dwThisSleepPeriod;
+#endif //_DEBUG
+
+            // sleep for a little while
+            GCToOSInterface::Sleep(dwThisSleepPeriod);
+
+            // now update our sleep period
+            dwThisSleepPeriod = dwNextSleepPeriod;
+
+            // now increase the next sleep period if it is still small
+            if (dwNextSleepPeriod < 1000)
+                dwNextSleepPeriod += 10;
+        }
+        else
+        {
+            // nope - just spin again
+            YieldProcessor();           // indicate to the processor that we are spining 
+            uNonSleepSpins--;
+        }
+    }
+}
+
+
+/*
+ * ReadAndZeroCacheHandles
+ *
+ * Reads a set of handles from a bank in the handle cache, zeroing them as they are taken.
+ *
+ * This routine will assert if a requested handle is missing.
+ *
+ */
+OBJECTHANDLE *ReadAndZeroCacheHandles(OBJECTHANDLE *pDst, OBJECTHANDLE *pSrc, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // set up to loop
+    OBJECTHANDLE *pLast = pDst + uCount;
+
+    // loop until we've copied all of them
+    while (pDst < pLast)
+    {
+        // this version assumes we have handles to read
+        _ASSERTE(*pSrc);
+
+        // copy the handle and zero it from the source
+        *pDst = *pSrc;
+        *pSrc = 0;
+
+        // set up for another handle
+        pDst++;
+        pSrc++;
+    }
+
+    // return the next unfilled slot after what we filled in
+    return pLast;
+}
+
+
+/*
+ * SyncReadAndZeroCacheHandles
+ *
+ * Reads a set of handles from a bank in the handle cache, zeroing them as they are taken.
+ *
+ * This routine will spin until all requested handles are obtained.
+ *
+ */
+OBJECTHANDLE *SyncReadAndZeroCacheHandles(OBJECTHANDLE *pDst, OBJECTHANDLE *pSrc, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // set up to loop
+    // we loop backwards since that is the order handles are added to the bank
+    // this is designed to reduce the chance that we will have to spin on a handle
+    OBJECTHANDLE *pBase = pDst;
+    pSrc += uCount;
+    pDst += uCount;
+
+    // remember the end of the array
+    OBJECTHANDLE *pLast = pDst;
+
+    // loop until we've copied all of them
+    while (pDst > pBase)
+    {
+        // advance to the next slot
+        pDst--;
+        pSrc--;
+
+        // this version spins if there is no handle to read
+        if (!*pSrc)
+            SpinUntil(pSrc, TRUE);
+
+        // copy the handle and zero it from the source
+        *pDst = *pSrc;
+        *pSrc = 0;
+    }
+
+    // return the next unfilled slot after what we filled in
+    return pLast;
+}
+
+
+/*
+ * WriteCacheHandles
+ *
+ * Writes a set of handles to a bank in the handle cache.
+ *
+ * This routine will assert if it is about to clobber an existing handle.
+ *
+ */
+void WriteCacheHandles(OBJECTHANDLE *pDst, OBJECTHANDLE *pSrc, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // set up to loop
+    OBJECTHANDLE *pLimit = pSrc + uCount;
+
+    // loop until we've copied all of them
+    while (pSrc < pLimit)
+    {
+        // this version assumes we have space to store the handles
+        _ASSERTE(!*pDst);
+
+        // copy the handle
+        *pDst = *pSrc;
+
+        // set up for another handle
+        pDst++;
+        pSrc++;
+    }
+}
+
+
+/*
+ * SyncWriteCacheHandles
+ *
+ * Writes a set of handles to a bank in the handle cache.
+ *
+ * This routine will spin until lingering handles in the cache bank are gone.
+ *
+ */
+void SyncWriteCacheHandles(OBJECTHANDLE *pDst, OBJECTHANDLE *pSrc, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // set up to loop
+    // we loop backwards since that is the order handles are removed from the bank
+    // this is designed to reduce the chance that we will have to spin on a handle
+    OBJECTHANDLE *pBase = pSrc;
+    pSrc += uCount;
+    pDst += uCount;
+
+    // loop until we've copied all of them
+    while (pSrc > pBase)
+    {
+        // set up for another handle
+        pDst--;
+        pSrc--;
+
+        // this version spins if there is no handle to read
+        if (*pDst)
+            SpinUntil(pDst, FALSE);
+
+        // copy the handle
+        *pDst = *pSrc;
+    }
+}
+
+
+/*
+ * SyncTransferCacheHandles
+ *
+ * Transfers a set of handles from one bank of the handle cache to another,
+ * zeroing the source bank as the handles are removed.
+ *
+ * The routine will spin until all requested handles can be transferred.
+ *
+ * This routine is equivalent to SyncReadAndZeroCacheHandles + SyncWriteCacheHandles
+ *
+ */
+void SyncTransferCacheHandles(OBJECTHANDLE *pDst, OBJECTHANDLE *pSrc, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // set up to loop
+    // we loop backwards since that is the order handles are added to the bank
+    // this is designed to reduce the chance that we will have to spin on a handle
+    OBJECTHANDLE *pBase = pDst;
+    pSrc += uCount;
+    pDst += uCount;
+
+    // loop until we've copied all of them
+    while (pDst > pBase)
+    {
+        // advance to the next slot
+        pDst--;
+        pSrc--;
+
+        // this version spins if there is no handle to read or no place to write it
+        if (*pDst || !*pSrc)
+        {
+            SpinUntil(pSrc, TRUE);
+            SpinUntil(pDst, FALSE);
+        }
+
+        // copy the handle and zero it from the source
+        *pDst = *pSrc;
+        *pSrc = 0;
+    }
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * HANDLE CACHE
+ *
+ ****************************************************************************/
+
+/*
+ * TableFullRebalanceCache
+ *
+ * Rebalances a handle cache by transferring handles from the cache's
+ * free bank to its reserve bank.  If the free bank does not provide
+ * enough handles to replenish the reserve bank, handles are allocated
+ * in bulk from the main handle table.  If too many handles remain in
+ * the free bank, the extra handles are returned in bulk to the main
+ * handle table.
+ *
+ * This routine attempts to reduce fragmentation in the main handle
+ * table by sorting the handles according to table order, preferring to
+ * refill the reserve bank with lower handles while freeing higher ones.
+ * The sorting also allows the free routine to operate more efficiently,
+ * as it can optimize the case where handles near each other are freed.
+ *
+ */
+void TableFullRebalanceCache(HandleTable *pTable,
+                             HandleTypeCache *pCache,
+                             uint32_t uType,
+                             int32_t lMinReserveIndex,
+                             int32_t lMinFreeIndex,
+                             OBJECTHANDLE *pExtraOutHandle,
+                             OBJECTHANDLE extraInHandle)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+
+    // we need a temporary space to sort our free handles in
+    OBJECTHANDLE rgHandles[HANDLE_CACHE_TYPE_SIZE];
+
+    // set up a base handle pointer to keep track of where we are
+    OBJECTHANDLE *pHandleBase = rgHandles;
+
+    // do we have a spare incoming handle?
+    if (extraInHandle)
+    {
+        // remember the extra handle now
+        *pHandleBase = extraInHandle;
+        pHandleBase++;
+    }
+
+    // if there are handles in the reserve bank then gather them up
+    // (we don't need to wait on these since they are only put there by this
+    //  function inside our own lock)
+    if (lMinReserveIndex > 0)
+        pHandleBase = ReadAndZeroCacheHandles(pHandleBase, pCache->rgReserveBank, (uint32_t)lMinReserveIndex);
+    else
+        lMinReserveIndex = 0;
+
+    // if there are handles in the free bank then gather them up
+    if (lMinFreeIndex < HANDLES_PER_CACHE_BANK)
+    {
+        // this may have underflowed
+        if (lMinFreeIndex < 0)
+            lMinFreeIndex = 0;
+
+        // here we need to wait for all pending freed handles to be written by other threads
+        pHandleBase = SyncReadAndZeroCacheHandles(pHandleBase,
+                                                  pCache->rgFreeBank + lMinFreeIndex,
+                                                  HANDLES_PER_CACHE_BANK - (uint32_t)lMinFreeIndex);
+    }
+
+    // compute the number of handles we have
+    uint32_t uHandleCount = (uint32_t) (pHandleBase - rgHandles);
+
+    // do we have enough handles for a balanced cache?
+    if (uHandleCount < REBALANCE_LOWATER_MARK)
+    {
+        // nope - allocate some more
+        uint32_t uAlloc = HANDLES_PER_CACHE_BANK - uHandleCount;
+
+        // if we have an extra outgoing handle then plan for that too
+        if (pExtraOutHandle)
+            uAlloc++;
+
+        {
+            // allocate the new handles - we intentionally don't check for success here
+            FAULT_NOT_FATAL();
+
+            uHandleCount += TableAllocBulkHandles(pTable, uType, pHandleBase, uAlloc);
+        }
+    }
+
+    // reset the base handle pointer
+    pHandleBase = rgHandles;
+
+    // by default the whole free bank is available
+    lMinFreeIndex = HANDLES_PER_CACHE_BANK;
+
+    // if we have handles left over then we need to do some more work
+    if (uHandleCount)
+    {
+        // do we have too many handles for a balanced cache?
+        if (uHandleCount > REBALANCE_HIWATER_MARK)
+        {
+            //
+            // sort the array by reverse handle order - this does two things:
+            //  (1) combats handle fragmentation by preferring low-address handles to high ones
+            //  (2) allows the free routine to run much more efficiently over the ones we free
+            //
+            QuickSort((uintptr_t *)pHandleBase, 0, uHandleCount - 1, CompareHandlesByFreeOrder);
+
+            // yup, we need to free some - calculate how many
+            uint32_t uFree = uHandleCount - HANDLES_PER_CACHE_BANK;
+
+            // free the handles - they are already 'prepared' (eg zeroed and sorted)
+            TableFreeBulkPreparedHandles(pTable, uType, pHandleBase, uFree);
+
+            // update our array base and length
+            uHandleCount -= uFree;
+            pHandleBase += uFree;
+        }
+
+        // if we have an extra outgoing handle then fill it now
+        if (pExtraOutHandle)
+        {
+            // account for the handle we're giving away
+            uHandleCount--;
+
+            // now give it away
+            *pExtraOutHandle = pHandleBase[uHandleCount];
+        }
+
+        // if we have more than a reserve bank of handles then put some in the free bank
+        if (uHandleCount > HANDLES_PER_CACHE_BANK)
+        {
+            // compute the number of extra handles we need to save away
+            uint32_t uStore = uHandleCount - HANDLES_PER_CACHE_BANK;
+
+            // compute the index to start writing the handles to
+            lMinFreeIndex = HANDLES_PER_CACHE_BANK - uStore;
+
+            // store the handles
+            // (we don't need to wait on these since we already waited while reading them)
+            WriteCacheHandles(pCache->rgFreeBank + lMinFreeIndex, pHandleBase, uStore);
+
+            // update our array base and length
+            uHandleCount -= uStore;
+            pHandleBase += uStore;
+        }
+    }
+
+    // update the write index for the free bank
+    // NOTE: we use an interlocked exchange here to guarantee relative store order on MP
+    // AFTER THIS POINT THE FREE BANK IS LIVE AND COULD RECEIVE NEW HANDLES
+    Interlocked::Exchange(&pCache->lFreeIndex, lMinFreeIndex);
+
+    // now if we have any handles left, store them in the reserve bank
+    if (uHandleCount)
+    {
+        // store the handles
+        // (here we need to wait for all pending allocated handles to be taken
+        //  before we set up new ones in their places)
+        SyncWriteCacheHandles(pCache->rgReserveBank, pHandleBase, uHandleCount);
+    }
+
+    // compute the index to start serving handles from
+    lMinReserveIndex = (int32_t)uHandleCount;
+
+    // update the read index for the reserve bank
+    // NOTE: we use an interlocked exchange here to guarantee relative store order on MP
+    // AT THIS POINT THE RESERVE BANK IS LIVE AND HANDLES COULD BE ALLOCATED FROM IT
+    Interlocked::Exchange(&pCache->lReserveIndex, lMinReserveIndex);
+}
+
+
+/*
+ * TableQuickRebalanceCache
+ *
+ * Rebalances a handle cache by transferring handles from the cache's free bank
+ * to its reserve bank.  If the free bank does not provide enough handles to
+ * replenish the reserve bank or too many handles remain in the free bank, the
+ * routine just punts and calls TableFullRebalanceCache.
+ *
+ */
+void TableQuickRebalanceCache(HandleTable *pTable,
+                              HandleTypeCache *pCache,
+                              uint32_t uType,
+                              int32_t lMinReserveIndex,
+                              int32_t lMinFreeIndex,
+                              OBJECTHANDLE *pExtraOutHandle,
+                              OBJECTHANDLE extraInHandle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // clamp the min free index to be non-negative
+    if (lMinFreeIndex < 0)
+        lMinFreeIndex = 0;
+
+    // clamp the min reserve index to be non-negative
+    if (lMinReserveIndex < 0)
+        lMinReserveIndex = 0;
+
+    // compute the number of slots in the free bank taken by handles
+    uint32_t uFreeAvail = HANDLES_PER_CACHE_BANK - (uint32_t)lMinFreeIndex;
+
+    // compute the number of handles we have to fiddle with
+    uint32_t uHandleCount = (uint32_t)lMinReserveIndex + uFreeAvail + (extraInHandle != 0);
+
+    // can we rebalance these handles in place?
+    if ((uHandleCount < REBALANCE_LOWATER_MARK) ||
+        (uHandleCount > REBALANCE_HIWATER_MARK))
+    {
+        // nope - perform a full rebalance of the handle cache
+        TableFullRebalanceCache(pTable, pCache, uType, lMinReserveIndex, lMinFreeIndex,
+                                pExtraOutHandle, extraInHandle);
+
+        // all done
+        return;
+    }
+
+    // compute the number of empty slots in the reserve bank
+    uint32_t uEmptyReserve = HANDLES_PER_CACHE_BANK - lMinReserveIndex;
+
+    // we want to transfer as many handles as we can from the free bank
+    uint32_t uTransfer = uFreeAvail;
+
+    // but only as many as we have room to store in the reserve bank
+    if (uTransfer > uEmptyReserve)
+        uTransfer = uEmptyReserve;
+
+    // transfer the handles
+    SyncTransferCacheHandles(pCache->rgReserveBank + lMinReserveIndex,
+                             pCache->rgFreeBank    + lMinFreeIndex,
+                             uTransfer);
+
+    // adjust the free and reserve indices to reflect the transfer
+    lMinFreeIndex    += uTransfer;
+    lMinReserveIndex += uTransfer;
+
+    // do we have an extra incoming handle to store?
+    if (extraInHandle)
+    {
+        //
+        // Workaround: For code size reasons, we don't handle all cases here.
+        // We assume an extra IN handle means a cache overflow during a free.
+        //
+        // After the rebalance above, the reserve bank should be full, and
+        // there may be a few handles sitting in the free bank.  The HIWATER
+        // check above guarantees that we have room to store the handle.
+        //
+        _ASSERTE(!pExtraOutHandle);
+
+        // store the handle in the next available free bank slot
+        pCache->rgFreeBank[--lMinFreeIndex] = extraInHandle;
+    }
+    else if (pExtraOutHandle)   // do we have an extra outgoing handle to satisfy?
+    {
+        //
+        // For code size reasons, we don't handle all cases here.
+        // We assume an extra OUT handle means a cache underflow during an alloc.
+        //
+        // After the rebalance above, the free bank should be empty, and
+        // the reserve bank may not be fully populated.  The LOWATER check above
+        // guarantees that the reserve bank has at least one handle we can steal.
+        //
+
+        // take the handle from the reserve bank and update the reserve index
+        *pExtraOutHandle = pCache->rgReserveBank[--lMinReserveIndex];
+
+        // zero the cache slot we chose
+        pCache->rgReserveBank[lMinReserveIndex] = NULL;
+    }
+
+    // update the write index for the free bank
+    // NOTE: we use an interlocked exchange here to guarantee relative store order on MP
+    // AFTER THIS POINT THE FREE BANK IS LIVE AND COULD RECEIVE NEW HANDLES
+    Interlocked::Exchange(&pCache->lFreeIndex, lMinFreeIndex);
+
+    // update the read index for the reserve bank
+    // NOTE: we use an interlocked exchange here to guarantee relative store order on MP
+    // AT THIS POINT THE RESERVE BANK IS LIVE AND HANDLES COULD BE ALLOCATED FROM IT
+    Interlocked::Exchange(&pCache->lReserveIndex, lMinReserveIndex);
+}
+
+
+/*
+ * TableCacheMissOnAlloc
+ *
+ * Gets a single handle of the specified type from the handle table,
+ * making the assumption that the reserve cache for that type was
+ * recently emptied.  This routine acquires the handle manager lock and
+ * attempts to get a handle from the reserve cache again.  If this second
+ * get operation also fails, the handle is allocated by means of a cache
+ * rebalance.
+ *
+ */
+OBJECTHANDLE TableCacheMissOnAlloc(HandleTable *pTable, HandleTypeCache *pCache, uint32_t uType)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // assume we get no handle
+    OBJECTHANDLE handle = NULL;
+
+    // acquire the handle manager lock
+    CrstHolder ch(&pTable->Lock);
+
+    // try again to take a handle (somebody else may have rebalanced)
+    int32_t lReserveIndex = Interlocked::Decrement(&pCache->lReserveIndex);
+
+    // are we still waiting for handles?
+    if (lReserveIndex < 0)
+    {
+        // yup, suspend free list usage...
+        int32_t lFreeIndex = Interlocked::Exchange(&pCache->lFreeIndex, 0);
+
+        // ...and rebalance the cache...
+        TableQuickRebalanceCache(pTable, pCache, uType, lReserveIndex, lFreeIndex, &handle, NULL);
+    }
+    else
+    {
+        // somebody else rebalanced the cache for us - take the handle
+        handle = pCache->rgReserveBank[lReserveIndex];
+
+        // zero the handle slot
+        pCache->rgReserveBank[lReserveIndex] = 0;
+    }
+
+    // return the handle we got
+    return handle;
+}
+
+
+/*
+ * TableCacheMissOnFree
+ *
+ * Returns a single handle of the specified type to the handle table,
+ * making the assumption that the free cache for that type was recently
+ * filled.  This routine acquires the handle manager lock and attempts
+ * to store the handle in the free cache again.  If this second store
+ * operation also fails, the handle is freed by means of a cache
+ * rebalance.
+ *
+ */
+void TableCacheMissOnFree(HandleTable *pTable, HandleTypeCache *pCache, uint32_t uType, OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // acquire the handle manager lock
+    CrstHolder ch(&pTable->Lock);
+
+    // try again to take a slot (somebody else may have rebalanced)
+    int32_t lFreeIndex = Interlocked::Decrement(&pCache->lFreeIndex);
+
+    // are we still waiting for free slots?
+    if (lFreeIndex < 0)
+    {
+        // yup, suspend reserve list usage...
+        int32_t lReserveIndex = Interlocked::Exchange(&pCache->lReserveIndex, 0);
+
+        // ...and rebalance the cache...
+        TableQuickRebalanceCache(pTable, pCache, uType, lReserveIndex, lFreeIndex, NULL, handle);
+    }
+    else
+    {
+        // somebody else rebalanced the cache for us - free the handle
+        pCache->rgFreeBank[lFreeIndex] = handle;
+    }
+}
+
+
+/*
+ * TableAllocSingleHandleFromCache
+ *
+ * Gets a single handle of the specified type from the handle table by
+ * trying to fetch it from the reserve cache for that handle type.  If the
+ * reserve cache is empty, this routine calls TableCacheMissOnAlloc.
+ *
+ */
+OBJECTHANDLE TableAllocSingleHandleFromCache(HandleTable *pTable, uint32_t uType)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // we use this in two places
+    OBJECTHANDLE handle;
+
+    // first try to get a handle from the quick cache
+    if (pTable->rgQuickCache[uType])
+    {
+        // try to grab the handle we saw
+        handle = Interlocked::ExchangePointer(pTable->rgQuickCache + uType, (OBJECTHANDLE)NULL);
+
+        // if it worked then we're done
+        if (handle)
+            return handle;
+    }
+
+    // ok, get the main handle cache for this type
+    HandleTypeCache *pCache = pTable->rgMainCache + uType;
+
+    // try to take a handle from the main cache
+    int32_t lReserveIndex = Interlocked::Decrement(&pCache->lReserveIndex);
+
+    // did we underflow?
+    if (lReserveIndex < 0)
+    {
+        // yep - the cache is out of handles
+        return TableCacheMissOnAlloc(pTable, pCache, uType);
+    }
+
+    // get our handle
+    handle = pCache->rgReserveBank[lReserveIndex];
+
+    // zero the handle slot
+    pCache->rgReserveBank[lReserveIndex] = 0;
+
+    // sanity
+    _ASSERTE(handle);
+
+    // return our handle
+    return handle;
+}
+
+
+/*
+ * TableFreeSingleHandleToCache
+ *
+ * Returns a single handle of the specified type to the handle table
+ * by trying to store it in the free cache for that handle type.  If the
+ * free cache is full, this routine calls TableCacheMissOnFree.
+ *
+ */
+void TableFreeSingleHandleToCache(HandleTable *pTable, uint32_t uType, OBJECTHANDLE handle)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        SO_TOLERANT;
+        CAN_TAKE_LOCK;         // because of TableCacheMissOnFree
+    }
+    CONTRACTL_END;
+
+#ifdef DEBUG_DestroyedHandleValue
+    *(_UNCHECKED_OBJECTREF *)handle = DEBUG_DestroyedHandleValue;
+#else
+    // zero the handle's object pointer
+    *(_UNCHECKED_OBJECTREF *)handle = NULL;
+#endif
+
+    // if this handle type has user data then clear it - AFTER the referent is cleared!
+    if (TypeHasUserData(pTable, uType))
+        HandleQuickSetUserData(handle, 0L);
+
+    // is there room in the quick cache?
+    if (!pTable->rgQuickCache[uType])
+    {
+        // yup - try to stuff our handle in the slot we saw
+        handle = Interlocked::ExchangePointer(&pTable->rgQuickCache[uType], handle);
+
+        // if we didn't end up with another handle then we're done
+        if (!handle)
+            return;
+    }
+
+    // ok, get the main handle cache for this type
+    HandleTypeCache *pCache = pTable->rgMainCache + uType;
+
+    // try to take a free slot from the main cache
+    int32_t lFreeIndex = Interlocked::Decrement(&pCache->lFreeIndex);
+
+    // did we underflow?
+    if (lFreeIndex < 0)
+    {
+        // yep - we're out of free slots
+        TableCacheMissOnFree(pTable, pCache, uType, handle);
+        return;
+    }
+
+    // we got a slot - save the handle in the free bank
+    pCache->rgFreeBank[lFreeIndex] = handle;
+}
+
+
+/*
+ * TableAllocHandlesFromCache
+ *
+ * Allocates multiple handles of the specified type by repeatedly
+ * calling TableAllocSingleHandleFromCache.
+ *
+ */
+uint32_t TableAllocHandlesFromCache(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // loop until we have satisfied all the handles we need to allocate
+    uint32_t uSatisfied = 0;
+    while (uSatisfied < uCount)
+    {
+        // get a handle from the cache
+        OBJECTHANDLE handle = TableAllocSingleHandleFromCache(pTable, uType);
+
+        // if we can't get any more then bail out
+        if (!handle)
+            break;
+
+        // store the handle in the caller's array
+        *pHandleBase = handle;
+
+        // on to the next one
+        uSatisfied++;
+        pHandleBase++;
+    }
+
+    // return the number of handles we allocated
+    return uSatisfied;
+}
+
+
+/*
+ * TableFreeHandlesToCache
+ *
+ * Frees multiple handles of the specified type by repeatedly
+ * calling TableFreeSingleHandleToCache.
+ *
+ */
+void TableFreeHandlesToCache(HandleTable *pTable, uint32_t uType, const OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // loop until we have freed all the handles
+    while (uCount)
+    {
+        // get the next handle to free
+        OBJECTHANDLE handle = *pHandleBase;
+
+        // advance our state
+        uCount--;
+        pHandleBase++;
+
+        // sanity
+        _ASSERTE(handle);
+
+        // return the handle to the cache
+        TableFreeSingleHandleToCache(pTable, uType, handle);
+    }
+}
+
+/*--------------------------------------------------------------------------*/
+
+
diff --git a/src/gc/handletablecore.cpp b/src/gc/handletablecore.cpp
new file mode 100644
index 0000000000..5e077de8a2
--- /dev/null
+++ b/src/gc/handletablecore.cpp
@@ -0,0 +1,2772 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Core Table Implementation.
+ *
+ * Implementation of core table management routines.
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#ifndef FEATURE_REDHAWK
+#include "nativeoverlapped.h"
+#endif // FEATURE_REDHAWK
+
+#include "handletablepriv.h"
+
+/****************************************************************************
+ *
+ * RANDOM HELPERS
+ *
+ ****************************************************************************/
+
+const uint8_t c_rgLowBitIndex[256] =
+{
+    0xff, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+    0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+};
+
+#ifndef DACCESS_COMPILE
+
+/*
+ * A 32/64 neutral quicksort
+ */
+//<TODO>@TODO: move/merge into common util file</TODO>
+typedef int (*PFNCOMPARE)(uintptr_t p, uintptr_t q);
+void QuickSort(uintptr_t *pData, int left, int right, PFNCOMPARE pfnCompare)
+{
+    WRAPPER_NO_CONTRACT;
+
+    do
+    {
+        int i = left;
+        int j = right;
+
+        uintptr_t x = pData[(i + j + 1) / 2];
+
+        do
+        {
+            while (pfnCompare(pData[i], x) < 0)
+                i++;
+
+            while (pfnCompare(x, pData[j]) < 0)
+                j--;
+
+            if (i > j)
+                break;
+
+            if (i < j)
+            {
+                uintptr_t t = pData[i];
+                pData[i] = pData[j];
+                pData[j] = t;
+            }
+
+            i++;
+            j--;
+
+        } while (i <= j);
+
+        if ((j - left) <= (right - i))
+        {
+            if (left < j)
+                QuickSort(pData, left, j, pfnCompare);
+
+            left = i;
+        }
+        else
+        {
+            if (i < right)
+                QuickSort(pData, i, right, pfnCompare);
+
+            right = j;
+        }
+
+    } while (left < right);
+}
+
+
+/*
+ * CompareHandlesByFreeOrder
+ *
+ * Returns:
+ *  <0 - handle P should be freed before handle Q
+ *  =0 - handles are eqivalent for free order purposes
+ *  >0 - handle Q should be freed before handle P
+ *
+ */
+int CompareHandlesByFreeOrder(uintptr_t p, uintptr_t q)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // compute the segments for the handles
+    TableSegment *pSegmentP = (TableSegment *)(p & HANDLE_SEGMENT_ALIGN_MASK);
+    TableSegment *pSegmentQ = (TableSegment *)(q & HANDLE_SEGMENT_ALIGN_MASK);
+
+    // are the handles in the same segment?
+    if (pSegmentP == pSegmentQ)
+    {
+        // return the in-segment handle free order
+        return (int)((intptr_t)q - (intptr_t)p);
+    }
+    else if (pSegmentP)
+    {
+        // do we have two valid segments?
+        if (pSegmentQ)
+        {
+            // return the sequence order of the two segments
+            return (int)(uint32_t)pSegmentQ->bSequence - (int)(uint32_t)pSegmentP->bSequence;
+        }
+        else
+        {
+            // only the P handle is valid - free Q first
+            return 1;
+        }
+    }
+    else if (pSegmentQ)
+    {
+        // only the Q handle is valid - free P first
+        return -1;
+    }
+
+    // neither handle is valid
+    return 0;
+}
+
+
+/*
+ * ZeroHandles
+ *
+ * Zeroes the object pointers for an array of handles.
+ *
+ */
+void ZeroHandles(OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // compute our stopping point
+    OBJECTHANDLE *pLastHandle = pHandleBase + uCount;
+
+    // loop over the array, zeroing as we go
+    while (pHandleBase < pLastHandle)
+    {
+        // get the current handle from the array
+        OBJECTHANDLE handle = *pHandleBase;
+
+        // advance to the next handle
+        pHandleBase++;
+
+        // zero the handle's object pointer
+        *(_UNCHECKED_OBJECTREF *)handle = NULL;
+    }
+}
+
+#ifdef _DEBUG
+void CALLBACK DbgCountEnumeratedBlocks(TableSegment *pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(pSegment);
+    UNREFERENCED_PARAMETER(uBlock);
+
+    // accumulate the block count in pInfo->param1
+    pInfo->param1 += uCount;
+}
+#endif
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * CORE TABLE MANAGEMENT
+ *
+ ****************************************************************************/
+
+/*
+ * TableCanFreeSegmentNow
+ *
+ * Determines if it is OK to free the specified segment at this time.
+ *
+ */
+BOOL TableCanFreeSegmentNow(HandleTable *pTable, TableSegment *pSegment)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // sanity
+    _ASSERTE(pTable);
+    _ASSERTE(pSegment);
+#ifdef _DEBUG
+    // there have been cases in the past where the original assert would
+    // fail but by the time a dump was created the lock was unowned so
+    // there was no way to tell who the previous owner was.
+    EEThreadId threadId = pTable->Lock.GetHolderThreadId();
+    _ASSERTE(threadId.IsCurrentThread());
+#endif // _DEBUG
+
+    // deterine if any segment is currently being scanned asynchronously
+    TableSegment *pSegmentAsync = NULL;
+
+    // do we have async info?
+    AsyncScanInfo *pAsyncInfo = pTable->pAsyncScanInfo;
+    if (pAsyncInfo)
+    {
+        // must always have underlying callback info in an async scan
+        _ASSERTE(pAsyncInfo->pCallbackInfo);
+
+        // yes - if a segment is being scanned asynchronously it is listed here
+        pSegmentAsync = pAsyncInfo->pCallbackInfo->pCurrentSegment;
+    }
+
+    // we can free our segment if it isn't being scanned asynchronously right now
+    return (pSegment != pSegmentAsync);
+}
+
+#endif // !DACCESS_COMPILE
+
+/*
+ * BlockFetchUserDataPointer
+ *
+ * Gets the user data pointer for the first handle in a block.
+ *
+ */
+PTR_uintptr_t BlockFetchUserDataPointer(PTR__TableSegmentHeader pSegment, uint32_t uBlock, BOOL fAssertOnError)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // assume NULL until we actually find the data
+    PTR_uintptr_t pUserData = NULL;
+    // get the user data index for this block
+    uint32_t blockIndex = pSegment->rgUserData[uBlock];
+
+    // is there user data for the block?
+    if (blockIndex != BLOCK_INVALID)
+    {
+        // In DAC builds, we may not have the entire segment table mapped and in any case it will be quite
+        // large. Since we only need one element, we'll retrieve just that one element.  
+        pUserData = PTR_uintptr_t(PTR_TO_TADDR(pSegment) + offsetof(TableSegment, rgValue) + 
+                               (blockIndex * HANDLE_BYTES_PER_BLOCK));
+    }
+    else if (fAssertOnError)
+    {
+        // no user data is associated with this block
+        //
+        // we probably got here for one of the following reasons:
+        //  1) an outside caller tried to do a user data operation on an incompatible handle
+        //  2) the user data map in the segment is corrupt
+        //  3) the global type flags are corrupt
+        //
+        _ASSERTE(FALSE);
+    }
+
+    // return the result
+    return pUserData;
+}
+
+
+/*
+ * HandleFetchSegmentPointer
+ *
+ * Computes the segment pointer for a given handle.
+ *
+ */
+__inline PTR__TableSegmentHeader HandleFetchSegmentPointer(OBJECTHANDLE handle)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    // find the segment for this handle
+    PTR__TableSegmentHeader pSegment = PTR__TableSegmentHeader((uintptr_t)handle & HANDLE_SEGMENT_ALIGN_MASK);
+
+    // sanity
+    _ASSERTE(pSegment);
+
+    // return the segment pointer
+    return pSegment;
+}
+
+
+/*
+ * HandleValidateAndFetchUserDataPointer
+ *
+ * Gets the user data pointer for the specified handle.
+ * ASSERTs and returns NULL if handle is not of the expected type.
+ *
+ */
+uintptr_t *HandleValidateAndFetchUserDataPointer(OBJECTHANDLE handle, uint32_t uTypeExpected)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // get the segment for this handle
+    PTR__TableSegmentHeader pSegment = HandleFetchSegmentPointer(handle);
+
+    // find the offset of this handle into the segment
+    uintptr_t offset = (uintptr_t)handle & HANDLE_SEGMENT_CONTENT_MASK;
+
+    // make sure it is in the handle area and not the header
+    _ASSERTE(offset >= HANDLE_HEADER_SIZE);
+
+    // convert the offset to a handle index
+    uint32_t uHandle = (uint32_t)((offset - HANDLE_HEADER_SIZE) / HANDLE_SIZE);
+
+    // compute the block this handle resides in
+    uint32_t uBlock = uHandle / HANDLE_HANDLES_PER_BLOCK;
+
+    // fetch the user data for this block
+    PTR_uintptr_t pUserData = BlockFetchUserDataPointer(pSegment, uBlock, TRUE);
+
+    // did we get the user data block?
+    if (pUserData)
+    {
+        // yup - adjust the pointer to be handle-specific
+        pUserData += (uHandle - (uBlock * HANDLE_HANDLES_PER_BLOCK));
+
+        // validate the block type before returning the pointer
+        if (pSegment->rgBlockType[uBlock] != uTypeExpected)
+        {
+            // type mismatch - caller error
+            _ASSERTE(FALSE);
+
+            // don't return a pointer to the caller
+            pUserData = NULL;
+        }
+    }
+
+    // return the result
+    return pUserData;
+}
+
+/*
+ * HandleQuickFetchUserDataPointer
+ *
+ * Gets the user data pointer for a handle.
+ * Less validation is performed.
+ *
+ */
+PTR_uintptr_t HandleQuickFetchUserDataPointer(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    SUPPORTS_DAC;
+    
+    // get the segment for this handle
+    PTR__TableSegmentHeader pSegment = HandleFetchSegmentPointer(handle);
+
+    // find the offset of this handle into the segment
+    uintptr_t offset = (uintptr_t)handle & HANDLE_SEGMENT_CONTENT_MASK;
+
+    // make sure it is in the handle area and not the header
+    _ASSERTE(offset >= HANDLE_HEADER_SIZE);
+
+    // convert the offset to a handle index
+    uint32_t uHandle = (uint32_t)((offset - HANDLE_HEADER_SIZE) / HANDLE_SIZE);
+
+    // compute the block this handle resides in
+    uint32_t uBlock = uHandle / HANDLE_HANDLES_PER_BLOCK;
+
+    // fetch the user data for this block
+    PTR_uintptr_t pUserData = BlockFetchUserDataPointer(pSegment, uBlock, TRUE);
+
+    // if we got the user data block then adjust the pointer to be handle-specific
+    if (pUserData)
+        pUserData += (uHandle - (uBlock * HANDLE_HANDLES_PER_BLOCK));
+
+    // return the result
+    return pUserData;
+}
+
+#ifndef DACCESS_COMPILE
+/*
+ * HandleQuickSetUserData
+ *
+ * Stores user data with a handle.
+ *
+ */
+void HandleQuickSetUserData(OBJECTHANDLE handle, uintptr_t lUserData)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // fetch the user data slot for this handle
+    uintptr_t *pUserData = HandleQuickFetchUserDataPointer(handle);
+
+    // is there a slot?
+    if (pUserData)
+    {
+        // yes - store the info
+        *pUserData = lUserData;
+    }
+}
+
+#endif // !DACCESS_COMPILE
+
+/*
+ * HandleFetchType
+ *
+ * Computes the type index for a given handle.
+ *
+ */
+uint32_t HandleFetchType(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // get the segment for this handle
+    PTR__TableSegmentHeader pSegment = HandleFetchSegmentPointer(handle);
+
+    // find the offset of this handle into the segment
+    uintptr_t offset = (uintptr_t)handle & HANDLE_SEGMENT_CONTENT_MASK;
+
+    // make sure it is in the handle area and not the header
+    _ASSERTE(offset >= HANDLE_HEADER_SIZE);
+
+    // convert the offset to a handle index
+    uint32_t uHandle = (uint32_t)((offset - HANDLE_HEADER_SIZE) / HANDLE_SIZE);
+
+    // compute the block this handle resides in
+    uint32_t uBlock = uHandle / HANDLE_HANDLES_PER_BLOCK;
+
+    // return the block's type
+    return pSegment->rgBlockType[uBlock];
+}
+    
+/*
+ * HandleFetchHandleTable
+ *
+ * Computes the type index for a given handle.
+ *
+ */
+PTR_HandleTable HandleFetchHandleTable(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+    SUPPORTS_DAC;
+
+    // get the segment for this handle
+    PTR__TableSegmentHeader pSegment = HandleFetchSegmentPointer(handle);
+
+    // return the table
+    return pSegment->pHandleTable;
+}
+
+#ifndef DACCESS_COMPILE
+/*
+ * SegmentInitialize
+ *
+ * Initializes a segment.
+ *
+ */
+BOOL SegmentInitialize(TableSegment *pSegment, HandleTable *pTable)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // we want to commit enough for the header PLUS some handles
+    uint32_t dwCommit = HANDLE_HEADER_SIZE;
+
+#ifndef FEATURE_REDHAWK // todo: implement SafeInt
+    // Prefast overflow sanity check the addition
+    if (!ClrSafeInt<uint32_t>::addition(dwCommit, g_SystemInfo.dwPageSize, dwCommit))
+    {
+        return FALSE;
+    }
+#endif // !FEATURE_REDHAWK
+
+    // Round down to the dwPageSize
+    dwCommit &= ~(g_SystemInfo.dwPageSize - 1);
+
+    // commit the header
+    if (!GCToOSInterface::VirtualCommit(pSegment, dwCommit))
+    {
+        //_ASSERTE(FALSE);
+        return FALSE;
+    }
+
+    // remember how many blocks we commited
+    pSegment->bCommitLine = (uint8_t)((dwCommit - HANDLE_HEADER_SIZE) / HANDLE_BYTES_PER_BLOCK);
+
+    // now preinitialize the 0xFF guys
+    memset(pSegment->rgGeneration, 0xFF,            sizeof(pSegment->rgGeneration));
+    memset(pSegment->rgTail,       BLOCK_INVALID,   sizeof(pSegment->rgTail));
+    memset(pSegment->rgHint,       BLOCK_INVALID,   sizeof(pSegment->rgHint));
+    memset(pSegment->rgFreeMask,   0xFF,            sizeof(pSegment->rgFreeMask));
+    memset(pSegment->rgBlockType,  TYPE_INVALID,    sizeof(pSegment->rgBlockType));
+    memset(pSegment->rgUserData,   BLOCK_INVALID,   sizeof(pSegment->rgUserData));
+
+    // prelink the free chain
+    _ASSERTE(FitsInU1(HANDLE_BLOCKS_PER_SEGMENT));
+    uint8_t u = 0;
+    while (u < (HANDLE_BLOCKS_PER_SEGMENT - 1))
+    {
+        uint8_t next = u + 1;
+        pSegment->rgAllocation[u] = next;
+        u = next;
+    }
+
+    // and terminate the last node
+    pSegment->rgAllocation[u] = BLOCK_INVALID;
+
+    // store the back pointer from our new segment to its owning table
+    pSegment->pHandleTable = pTable;
+
+    // all done
+    return TRUE;
+}
+
+
+/*
+ * SegmentFree
+ *
+ * Frees the specified segment.
+ *
+ */
+void SegmentFree(TableSegment *pSegment)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // free the segment's memory
+    GCToOSInterface::VirtualRelease(pSegment, HANDLE_SEGMENT_SIZE);
+}
+
+
+/*
+ * SegmentAlloc
+ *
+ * Allocates a new segment.
+ *
+ */
+TableSegment *SegmentAlloc(HandleTable *pTable)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // allocate the segment's address space
+    TableSegment *pSegment = NULL;
+
+    // All platforms currently require 64Kb aligned table segments, which is what VirtualAlloc guarantees.
+    // The actual requirement is that the alignment of the reservation equals or exceeds the size of the
+    // reservation. This requirement stems from the method the handle table uses to map quickly from a handle
+    // address back to the handle table segment header.
+    _ASSERTE(HANDLE_SEGMENT_ALIGNMENT >= HANDLE_SEGMENT_SIZE);
+    _ASSERTE(HANDLE_SEGMENT_ALIGNMENT == 0x10000);
+
+    pSegment = (TableSegment *)GCToOSInterface::VirtualReserve(NULL, HANDLE_SEGMENT_SIZE, HANDLE_SEGMENT_ALIGNMENT, VirtualReserveFlags::None);
+    _ASSERTE(((size_t)pSegment % HANDLE_SEGMENT_ALIGNMENT) == 0);
+    
+    // bail out if we couldn't get any memory
+    if (!pSegment)
+    {
+        return NULL;
+    }
+
+    // initialize the header
+    if (!SegmentInitialize(pSegment, pTable))
+    {
+        SegmentFree(pSegment);
+        pSegment = NULL;
+    }
+
+    // all done
+    return pSegment;
+}
+
+// Mark a handle being free.
+__inline void SegmentMarkFreeMask(TableSegment *pSegment, _UNCHECKED_OBJECTREF* h)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+    
+    uint32_t uMask = (uint32_t)(h - pSegment->rgValue);
+    uint32_t uBit = uMask % HANDLE_HANDLES_PER_MASK;
+    uMask = uMask / HANDLE_HANDLES_PER_MASK;
+    pSegment->rgFreeMask[uMask] |= (1<<uBit);
+}
+
+// Mark a handle being used.
+__inline void SegmentUnMarkFreeMask(TableSegment *pSegment, _UNCHECKED_OBJECTREF* h)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+    
+    uint32_t uMask = (uint32_t)(h - pSegment->rgValue);
+    uint32_t uBit = uMask % HANDLE_HANDLES_PER_MASK;
+    uMask = uMask / HANDLE_HANDLES_PER_MASK;
+    pSegment->rgFreeMask[uMask] &= ~(1<<uBit);
+}
+
+#ifndef FEATURE_REDHAWK
+// Prepare a segment to be moved to default domain.
+// Remove all non-async pin handles.
+void SegmentPreCompactAsyncPinHandles(TableSegment *pSegment)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    pSegment->fResortChains = true;
+    pSegment->fNeedsScavenging = true;
+    
+    // Zero out all non-async pin handles
+    uint32_t uBlock;
+    for (uBlock = 0; uBlock < pSegment->bEmptyLine; uBlock ++)
+    {
+        if (pSegment->rgBlockType[uBlock]  == TYPE_INVALID)
+        {
+            continue;
+        }
+        else if (pSegment->rgBlockType[uBlock] != HNDTYPE_ASYNCPINNED)
+        {
+            _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+            _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+            do
+            {
+                *pValue = NULL;
+                pValue ++;
+            } while (pValue < pLast);
+
+            ((uint32_t*)pSegment->rgGeneration)[uBlock] = (uint32_t)-1;
+
+            uint32_t *pdwMask = pSegment->rgFreeMask + (uBlock * HANDLE_MASKS_PER_BLOCK);
+            uint32_t *pdwMaskLast = pdwMask + HANDLE_MASKS_PER_BLOCK;
+            do
+            {
+                *pdwMask = MASK_EMPTY;
+                pdwMask ++;
+            } while (pdwMask < pdwMaskLast);
+
+            pSegment->rgBlockType[uBlock] = TYPE_INVALID;
+            pSegment->rgUserData[uBlock] = BLOCK_INVALID;
+            pSegment->rgLocks[uBlock] = 0;
+        }
+    }
+
+    // Return all non-async pin handles to free list
+    uint32_t uType;
+    for (uType = 0; uType < HANDLE_MAX_INTERNAL_TYPES; uType ++)
+    {
+        if (uType == HNDTYPE_ASYNCPINNED)
+        {
+            continue;
+        }
+        pSegment->rgFreeCount[uType] = 0;
+        if (pSegment->rgHint[uType] != BLOCK_INVALID)
+        {
+            uint32_t uLast = pSegment->rgHint[uType];
+            uint8_t uFirst = pSegment->rgAllocation[uLast];
+            pSegment->rgAllocation[uLast] = pSegment->bFreeList;
+            pSegment->bFreeList = uFirst;
+            pSegment->rgHint[uType] = BLOCK_INVALID;
+            pSegment->rgTail[uType] = BLOCK_INVALID;
+        }
+    }
+
+    // make sure the remaining async handle has MethodTable that exists in default domain
+    uBlock = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+    if (uBlock == BLOCK_INVALID)
+    {
+        return;
+    }
+    uint32_t freeCount = 0;
+    for (uBlock = 0; uBlock < pSegment->bEmptyLine; uBlock ++)
+    {
+        if (pSegment->rgBlockType[uBlock] != HNDTYPE_ASYNCPINNED)
+        {
+            continue;
+        }
+        if (pSegment->rgFreeMask[uBlock*2] == (uint32_t)-1 && pSegment->rgFreeMask[uBlock*2+1] == (uint32_t)-1)
+        {
+            continue;
+        }
+        _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+        
+        do
+        {
+            _UNCHECKED_OBJECTREF value = *pValue;
+            if (!HndIsNullOrDestroyedHandle(value))
+            {
+                _ASSERTE (value->GetMethodTable() == g_pOverlappedDataClass);
+                OVERLAPPEDDATAREF overlapped = (OVERLAPPEDDATAREF)(ObjectToOBJECTREF((Object*)(value)));
+                if (overlapped->HasCompleted())
+                {
+                    // IO has finished.  We don't need to pin the user buffer any longer.
+                    overlapped->m_userObject = NULL;
+                }
+                BashMTForPinnedObject(ObjectToOBJECTREF(value));
+            }
+            else
+            {
+                // reset free mask
+                SegmentMarkFreeMask(pSegment, pValue);
+                freeCount ++;
+            }
+            pValue ++;
+        } while (pValue != pLast);
+    }
+
+    pSegment->rgFreeCount[HNDTYPE_ASYNCPINNED] = freeCount;
+}
+
+// Copy a handle to a different segment in the same HandleTable
+BOOL SegmentCopyAsyncPinHandle(TableSegment *pSegment, _UNCHECKED_OBJECTREF *h)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE (HandleFetchSegmentPointer((OBJECTHANDLE)h) != pSegment);
+
+    if (pSegment->rgFreeCount[HNDTYPE_ASYNCPINNED] == 0)
+    {
+        uint8_t uBlock = pSegment->bFreeList;
+        if (uBlock == BLOCK_INVALID)
+        {
+            // All slots are used up.
+            return FALSE;
+        }
+        pSegment->bFreeList = pSegment->rgAllocation[uBlock];
+        pSegment->rgBlockType[uBlock] = HNDTYPE_ASYNCPINNED;
+        pSegment->rgAllocation[uBlock] = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+        pSegment->rgHint[HNDTYPE_ASYNCPINNED] = uBlock;
+        pSegment->rgFreeCount[HNDTYPE_ASYNCPINNED] += HANDLE_HANDLES_PER_BLOCK;
+    }
+    uint8_t uBlock = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+    uint8_t uLast = uBlock;
+    do
+    {
+        uint32_t n = uBlock * (HANDLE_HANDLES_PER_BLOCK/HANDLE_HANDLES_PER_MASK);
+        uint32_t* pMask = pSegment->rgFreeMask + n;
+        if (pMask[0] != 0 || pMask[1] != 0)
+        {
+            break;
+        }
+        uBlock = pSegment->rgAllocation[uBlock];
+    } while (uBlock != uLast);
+    _ASSERTE (uBlock != uLast);
+    pSegment->rgHint[HNDTYPE_ASYNCPINNED] = uBlock;
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+    _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+    do
+    {
+        if (*pValue == NULL)
+        {
+            SegmentUnMarkFreeMask(pSegment,pValue);
+            *pValue = *h;
+            *h = NULL;
+            break;
+        }
+        pValue ++;
+    } while (pValue != pLast);
+    _ASSERTE (pValue != pLast);
+    pSegment->rgFreeCount[HNDTYPE_ASYNCPINNED] --;
+    return TRUE;
+}
+
+void SegmentCompactAsyncPinHandles(TableSegment *pSegment, TableSegment **ppWorkerSegment)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    uint32_t uBlock = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+    if (uBlock == BLOCK_INVALID)
+    {
+        return;
+    }
+    for (uBlock = 0; uBlock < pSegment->bEmptyLine; uBlock ++)
+    {
+        if (pSegment->rgBlockType[uBlock] != HNDTYPE_ASYNCPINNED)
+        {
+            continue;
+        }
+        if (pSegment->rgFreeMask[uBlock*2] == (uint32_t)-1 && pSegment->rgFreeMask[uBlock*2+1] == (uint32_t)-1)
+        {
+            continue;
+        }
+        _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+
+        do
+        {
+            BOOL fNeedNewSegment = FALSE;
+            _UNCHECKED_OBJECTREF value = *pValue;
+            if (!HndIsNullOrDestroyedHandle(value))
+            {
+                _ASSERTE (value->GetMethodTable() == g_pOverlappedDataClass);
+                OVERLAPPEDDATAREF overlapped = (OVERLAPPEDDATAREF)(ObjectToOBJECTREF((Object*)value));
+                if (overlapped->HasCompleted())
+                {
+                    // IO has finished.  We don't need to pin the user buffer any longer.
+                    overlapped->m_userObject = NULL;
+                }
+                BashMTForPinnedObject(ObjectToOBJECTREF(value));
+                fNeedNewSegment = !SegmentCopyAsyncPinHandle(*ppWorkerSegment,pValue);
+            }
+            if (fNeedNewSegment)
+            {
+                _ASSERTE ((*ppWorkerSegment)->rgFreeCount[HNDTYPE_ASYNCPINNED] == 0 &&
+                          (*ppWorkerSegment)->bFreeList == BLOCK_INVALID);
+                TableSegment *pNextSegment = (*ppWorkerSegment)->pNextSegment;
+                SegmentPreCompactAsyncPinHandles(pNextSegment);
+                *ppWorkerSegment = pNextSegment;
+                if (pNextSegment == pSegment)
+                {
+                    // The current segment will be moved to default domain.
+                    return;
+                }
+            }
+            else
+            {
+                pValue ++;
+            }
+        } while (pValue != pLast);
+    }
+}
+
+
+// Mark AsyncPinHandles ready to be cleaned when the marker job is processed
+BOOL SegmentHandleAsyncPinHandles (TableSegment *pSegment)
+{
+    CONTRACTL
+    {
+        GC_NOTRIGGER;
+        NOTHROW;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+    
+    uint32_t uBlock = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+    if (uBlock == BLOCK_INVALID)
+    {
+        // There is no pinning handles.
+        return FALSE;
+    }
+
+    BOOL result = FALSE;
+
+    for (uBlock = 0; uBlock < pSegment->bEmptyLine; uBlock ++)
+    {
+        if (pSegment->rgBlockType[uBlock] != HNDTYPE_ASYNCPINNED)
+        {
+            continue;
+        }
+        if (pSegment->rgFreeMask[uBlock*2] == (uint32_t)-1 && pSegment->rgFreeMask[uBlock*2+1] == (uint32_t)-1)
+        {
+            continue;
+        }
+        _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+
+        do
+        {
+            _UNCHECKED_OBJECTREF value = *pValue;
+            if (!HndIsNullOrDestroyedHandle(value))
+            {
+                _ASSERTE (value->GetMethodTable() == g_pOverlappedDataClass);
+                OVERLAPPEDDATAREF overlapped = (OVERLAPPEDDATAREF)(ObjectToOBJECTREF((Object*)value));
+                if (overlapped->GetAppDomainId() != DefaultADID && overlapped->HasCompleted())
+                {
+                    overlapped->HandleAsyncPinHandle();
+                    result = TRUE;
+                }
+            }
+            pValue ++;
+        } while (pValue != pLast);
+    }
+
+    return result;
+}
+
+// Replace an async pin handle with one from default domain
+void SegmentRelocateAsyncPinHandles (TableSegment *pSegment, HandleTable *pTargetTable)
+{
+    CONTRACTL
+    {
+        GC_NOTRIGGER;
+        THROWS;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+    
+    uint32_t uBlock = pSegment->rgHint[HNDTYPE_ASYNCPINNED];
+    if (uBlock == BLOCK_INVALID)
+    {
+        // There is no pinning handles.
+        return;
+    }
+    for (uBlock = 0; uBlock < pSegment->bEmptyLine; uBlock ++)
+    {
+        if (pSegment->rgBlockType[uBlock] != HNDTYPE_ASYNCPINNED)
+        {
+            continue;
+        }
+        if (pSegment->rgFreeMask[uBlock*2] == (uint32_t)-1 && pSegment->rgFreeMask[uBlock*2+1] == (uint32_t)-1)
+        {
+            continue;
+        }
+        _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_BLOCK;
+
+        do
+        {
+            _UNCHECKED_OBJECTREF value = *pValue;
+            if (!HndIsNullOrDestroyedHandle(value))
+            {
+                _ASSERTE (value->GetMethodTable() == g_pOverlappedDataClass);
+                OVERLAPPEDDATAREF overlapped = (OVERLAPPEDDATAREF)(ObjectToOBJECTREF((Object*)value));
+                if (overlapped->HasCompleted())
+                {
+                    // IO has finished.  We don't need to pin the user buffer any longer.
+                    overlapped->m_userObject = NULL;
+                }
+                BashMTForPinnedObject(ObjectToOBJECTREF(value));
+                overlapped->m_pinSelf = CreateAsyncPinningHandle((HHANDLETABLE)pTargetTable,ObjectToOBJECTREF(value));
+                *pValue = NULL;
+            }
+            pValue ++;
+        } while (pValue != pLast);
+    }
+}
+
+// Mark all non-pending AsyncPinHandle ready for cleanup.
+// We will queue a marker Overlapped to io completion port.  We use the marker
+// to make sure that all iocompletion jobs before this marker have been processed.
+// After that we can free the async pinned handles.
+BOOL TableHandleAsyncPinHandles(HandleTable *pTable)
+{
+    CONTRACTL
+    {
+        GC_NOTRIGGER;
+        NOTHROW;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE (pTable->uADIndex.m_dwIndex == DefaultADID);
+
+    BOOL result = FALSE;
+    TableSegment *pSegment = pTable->pSegmentList;
+
+    CrstHolder ch(&pTable->Lock);
+
+    while (pSegment)
+    {
+        if (SegmentHandleAsyncPinHandles (pSegment))
+        {
+            result = TRUE;
+        }
+        pSegment = pSegment->pNextSegment;
+    }
+
+    return result;
+}
+
+// Keep needed async Pin Handle by moving them to default domain.
+// Strategy:
+// 1. Try to create pin handles in default domain to replace it.
+// 2. If 1 failed due to OOM, we will relocate segments from this HandleTable to default domain.
+//    a. Clean the segment so that only saved pin handles exist.  This segment becomes the worker segment.
+//    b. Copy pin handles from remaining segments to the worker segment.  If worker segment is full, start
+//       from a again.
+//    c. After copying all handles to worker segments, move the segments to default domain.
+// It is very important that in step 2, we should not fail for OOM, which means no memory allocation.
+void TableRelocateAsyncPinHandles(HandleTable *pTable, HandleTable *pTargetTable)
+{
+    CONTRACTL
+    {
+        GC_TRIGGERS;
+        NOTHROW;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE (pTargetTable->uADIndex == SystemDomain::System()->DefaultDomain()->GetIndex());  // must be for default domain
+
+    BOOL fGotException = FALSE;
+    TableSegment *pSegment = pTable->pSegmentList;
+    
+#ifdef _DEBUG
+    // on debug builds, execute the OOM path 10% of the time.
+    if (GetRandomInt(100) < 10)
+        goto SLOW_PATH;
+#endif
+
+    // Step 1: replace pinning handles with ones from default domain
+    EX_TRY
+    {
+        while (pSegment)
+        {
+            SegmentRelocateAsyncPinHandles (pSegment, pTargetTable);
+            pSegment = pSegment->pNextSegment;
+        }
+    }
+    EX_CATCH
+    {
+        fGotException = TRUE;
+    }
+    EX_END_CATCH(SwallowAllExceptions);
+
+    if (!fGotException)
+    {
+        return;
+    }
+
+#ifdef _DEBUG
+SLOW_PATH:
+#endif
+
+    // step 2: default domain runs out of space
+    //         compact all remaining pinning handles and move the segments to default domain
+
+    while (true)
+    {
+        CrstHolderWithState ch(&pTable->Lock);
+
+        // We cannot move segments to a different table if we're asynchronously scanning the current table as
+        // part of a concurrent GC. That's because the async table scanning code does most of its work without
+        // the table lock held. So we'll take the table lock and then look to see if we're in a concurrent GC.
+        // If we are we'll back out and try again. This doesn't prevent a concurrent GC from initiating while
+        // we have the lock held but the part we care about (the async table scan) takes the table lock during
+        // a preparation step so we'll be able to complete our segment moves before the async scan has a
+        // chance to interfere with us (or vice versa).
+        if (GCHeap::GetGCHeap()->IsConcurrentGCInProgress())
+        {
+            // A concurrent GC is in progress so someone might be scanning our segments asynchronously.
+            // Release the lock, wait for the GC to complete and try again. The order is important; if we wait
+            // before releasing the table lock we can deadlock with an async table scan.
+            ch.Release();
+            GCHeap::GetGCHeap()->WaitUntilConcurrentGCComplete();
+            continue;
+        }
+
+        // If we get here then we managed to acquire the table lock and observe that no concurrent GC was in
+        // progress. A concurrent GC could start at any time so that state may have changed, but since we took
+        // the table lock first we know that the GC could only have gotten as far as attempting to initiate an
+        // async handle table scan (which attempts to acquire the table lock). So as long as we complete our
+        // segment compaction and moves without releasing the table lock we're guaranteed to complete before
+        // the async scan can get in and observe any of the segments.
+
+        // Compact async pinning handles into the smallest number of leading segments we can (the worker
+        // segments).
+        TableSegment *pWorkerSegment = pTable->pSegmentList;
+        SegmentPreCompactAsyncPinHandles (pWorkerSegment);
+
+        pSegment = pWorkerSegment->pNextSegment;
+        while (pSegment)
+        {
+            SegmentCompactAsyncPinHandles (pSegment, &pWorkerSegment);
+            pSegment= pSegment->pNextSegment;
+        }
+
+        // Empty the remaining segments.
+        pSegment = pWorkerSegment->pNextSegment;
+        while (pSegment)
+        {
+            memset(pSegment->rgValue, 0, (uint32_t)pSegment->bCommitLine * HANDLE_BYTES_PER_BLOCK);
+            pSegment = pSegment->pNextSegment;
+        }
+
+        // Move the worker segments over to the tail end of the default domain's segment list.
+        {
+            CrstHolder ch1(&pTargetTable->Lock);
+
+            // Locate the segment currently at the tail of the default domain's segment list.
+            TableSegment *pTargetSegment = pTargetTable->pSegmentList;
+            while (pTargetSegment->pNextSegment)
+            {
+                pTargetSegment = pTargetSegment->pNextSegment;
+            }
+
+            // Take the worker segments and point them to their new handle table and recalculate their
+            // sequence numbers to be consistent with the queue they're moving to.
+            uint8_t bLastSequence = pTargetSegment->bSequence;
+            pSegment = pTable->pSegmentList;
+            while (pSegment != pWorkerSegment->pNextSegment)
+            {
+                pSegment->pHandleTable = pTargetTable;
+                pSegment->bSequence = (uint8_t)(((uint32_t)bLastSequence + 1) % 0x100);
+                bLastSequence = pSegment->bSequence;
+                pSegment = pSegment->pNextSegment;
+            }
+
+            // Join the worker segments to the tail of the default domain segment list.
+            pTargetSegment->pNextSegment = pTable->pSegmentList;
+
+            // Reset the current handle table segment list to omit the removed worker segments and start at
+            // the first non-worker.
+            pTable->pSegmentList = pWorkerSegment->pNextSegment;
+
+            // The last worker segment is now the end of the default domain's segment list.
+            pWorkerSegment->pNextSegment = NULL;
+        }
+
+        break;
+    }
+}
+#endif // !FEATURE_REDHAWK
+
+/*
+ * Check if a handle is part of a HandleTable
+ */
+BOOL TableContainHandle(HandleTable *pTable, OBJECTHANDLE handle)
+{
+    _ASSERTE (handle);
+
+    // get the segment for this handle
+    TableSegment *pSegment = (TableSegment *)HandleFetchSegmentPointer(handle);
+
+    CrstHolder ch(&pTable->Lock);
+    TableSegment *pWorkerSegment = pTable->pSegmentList;
+    while (pWorkerSegment)
+    {
+        if (pWorkerSegment == pSegment)
+        {
+            return TRUE;
+        }
+        pWorkerSegment = pWorkerSegment->pNextSegment;
+    }
+    return FALSE;
+}
+
+/*
+ * SegmentRemoveFreeBlocks
+ *
+ * Scans a segment for free blocks of the specified type
+ * and moves them to the segment's free list.
+ *
+ */
+void SegmentRemoveFreeBlocks(TableSegment *pSegment, uint32_t uType, BOOL *pfScavengeLater)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // fetch the tail block for the specified chain
+    uint32_t uPrev = pSegment->rgTail[uType];
+
+    // if it's a terminator then there are no blocks in the chain
+    if (uPrev == BLOCK_INVALID)
+        return;
+
+    // we may need to clean up user data blocks later
+    BOOL fCleanupUserData = FALSE;
+
+    // start iterating with the head block
+    uint32_t uStart = pSegment->rgAllocation[uPrev];
+    uint32_t uBlock = uStart;
+
+    // keep track of how many blocks we removed
+    uint32_t uRemoved = 0;
+
+    // we want to preserve the relative order of any blocks we free
+    // this is the best we can do until the free list is resorted
+    uint32_t uFirstFreed = BLOCK_INVALID;
+    uint32_t uLastFreed  = BLOCK_INVALID;
+
+    // loop until we've processed the whole chain
+    for (;;)
+    {
+        // fetch the next block index
+        uint32_t uNext = pSegment->rgAllocation[uBlock];
+
+#ifdef HANDLE_OPTIMIZE_FOR_64_HANDLE_BLOCKS
+        // determine whether this block is empty
+        if (((uint64_t*)pSegment->rgFreeMask)[uBlock] == UI64(0xFFFFFFFFFFFFFFFF))
+#else
+        // assume this block is empty until we know otherwise
+        BOOL fEmpty = TRUE;
+
+        // get the first mask for this block
+        uint32_t *pdwMask     = pSegment->rgFreeMask + (uBlock * HANDLE_MASKS_PER_BLOCK);
+        uint32_t *pdwMaskLast = pdwMask              + HANDLE_MASKS_PER_BLOCK;
+
+        // loop through the masks until we've processed them all or we've found handles
+        do
+        {
+            // is this mask empty?
+            if (*pdwMask != MASK_EMPTY)
+            {
+                // nope - this block still has handles in it
+                fEmpty = FALSE;
+                break;
+            }
+
+            // on to the next mask
+            pdwMask++;
+
+        } while (pdwMask < pdwMaskLast);
+
+        // is this block empty?
+        if (fEmpty)
+#endif
+        {
+            // is this block currently locked?
+            if (BlockIsLocked(pSegment, uBlock))
+            {
+                // block cannot be freed, if we were passed a scavenge flag then set it
+                if (pfScavengeLater)
+                    *pfScavengeLater = TRUE;
+            }
+            else
+            {
+                // safe to free - did it have user data associated?
+                uint32_t uData = pSegment->rgUserData[uBlock];
+                if (uData != BLOCK_INVALID)
+                {
+                    // data blocks are 'empty' so we keep them locked
+                    // unlock the block so it can be reclaimed below
+                    BlockUnlock(pSegment, uData);
+
+                    // unlink the data block from the handle block
+                    pSegment->rgUserData[uBlock] = BLOCK_INVALID;
+
+                    // remember that we need to scavenge the data block chain
+                    fCleanupUserData = TRUE;
+                }
+
+                // mark the block as free
+                pSegment->rgBlockType[uBlock] = TYPE_INVALID;
+
+                // have we freed any other blocks yet?
+                if (uFirstFreed == BLOCK_INVALID)
+                {
+                    // no - this is the first one - remember it as the new head
+                    uFirstFreed = uBlock;
+                }
+                else
+                {
+                    // yes - link this block to the other ones in order
+                    pSegment->rgAllocation[uLastFreed] = (uint8_t)uBlock;
+                }
+
+                // remember this block for later
+                uLastFreed = uBlock;
+
+                // are there other blocks in the chain?
+                if (uPrev != uBlock)
+                {
+                    // yes - unlink this block from the chain
+                    pSegment->rgAllocation[uPrev] = (uint8_t)uNext;
+
+                    // if we are removing the tail then pick a new tail
+                    if (pSegment->rgTail[uType] == uBlock)
+                        pSegment->rgTail[uType] = (uint8_t)uPrev;
+
+                    // if we are removing the hint then pick a new hint
+                    if (pSegment->rgHint[uType] == uBlock)
+                        pSegment->rgHint[uType] = (uint8_t)uNext;
+
+                    // we removed the current block - reset uBlock to a valid block
+                    uBlock = uPrev;
+
+                    // N.B. we'll check if we freed uStart later when it's safe to recover
+                }
+                else
+                {
+                    // we're removing last block - sanity check the loop condition
+                    _ASSERTE(uNext == uStart);
+
+                    // mark this chain as completely empty
+                    pSegment->rgAllocation[uBlock] = BLOCK_INVALID;
+                    pSegment->rgTail[uType]        = BLOCK_INVALID;
+                    pSegment->rgHint[uType]        = BLOCK_INVALID;
+                }
+
+                // update the number of blocks we've removed
+                uRemoved++;
+            }
+        }
+
+        // if we are back at the beginning then it is time to stop
+        if (uNext == uStart)
+            break;
+
+        // now see if we need to reset our start block
+        if (uStart == uLastFreed)
+            uStart = uNext;
+
+        // on to the next block
+        uPrev = uBlock;
+        uBlock = uNext;
+    }
+
+    // did we remove any blocks?
+    if (uRemoved)
+    {
+        // yes - link the new blocks into the free list
+        pSegment->rgAllocation[uLastFreed] = pSegment->bFreeList;
+        pSegment->bFreeList = (uint8_t)uFirstFreed;
+
+        // update the free count for this chain
+        pSegment->rgFreeCount[uType] -= (uRemoved * HANDLE_HANDLES_PER_BLOCK);
+
+        // mark for a resort - the free list (and soon allocation chains) may be out of order
+        pSegment->fResortChains = TRUE;
+
+        // if we removed blocks that had user data then we need to reclaim those too
+        if (fCleanupUserData)
+            SegmentRemoveFreeBlocks(pSegment, HNDTYPE_INTERNAL_DATABLOCK, NULL);
+    }
+}
+
+
+/*
+ * SegmentInsertBlockFromFreeListWorker
+ *
+ * Inserts a block into a block list within a segment.  Blocks are obtained from the
+ * segment's free list.  Returns the index of the block inserted, or BLOCK_INVALID
+ * if no blocks were avaliable.
+ *
+ * This routine is the core implementation for SegmentInsertBlockFromFreeList.
+ *
+ */
+uint32_t SegmentInsertBlockFromFreeListWorker(TableSegment *pSegment, uint32_t uType, BOOL fUpdateHint)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+
+    // fetch the next block from the free list
+    uint8_t uBlock = pSegment->bFreeList;
+
+    // if we got the terminator then there are no more blocks
+    if (uBlock != BLOCK_INVALID)
+    {
+        // are we eating out of the last empty range of blocks?
+        if (uBlock >= pSegment->bEmptyLine)
+        {
+            // get the current commit line
+            uint32_t uCommitLine = pSegment->bCommitLine;
+
+            // if this block is uncommitted then commit some memory now
+            if (uBlock >= uCommitLine)
+            {
+                // figure out where to commit next
+                void * pvCommit = pSegment->rgValue + (uCommitLine * HANDLE_HANDLES_PER_BLOCK);
+
+                // we should commit one more page of handles
+                uint32_t dwCommit = g_SystemInfo.dwPageSize;
+
+                // commit the memory
+                if (!GCToOSInterface::VirtualCommit(pvCommit, dwCommit))
+                    return BLOCK_INVALID;
+
+                // use the previous commit line as the new decommit line
+                pSegment->bDecommitLine = (uint8_t)uCommitLine;
+
+                // adjust the commit line by the number of blocks we commited
+                pSegment->bCommitLine = (uint8_t)(uCommitLine + (dwCommit / HANDLE_BYTES_PER_BLOCK));
+            }
+
+            // update our empty line
+            pSegment->bEmptyLine = uBlock + 1;
+        }
+
+        // unlink our block from the free list
+        pSegment->bFreeList = pSegment->rgAllocation[uBlock];
+
+        // link our block into the specified chain
+        uint32_t uOldTail = pSegment->rgTail[uType];
+        if (uOldTail == BLOCK_INVALID)
+        {
+            // first block, set as head and link to itself
+            pSegment->rgAllocation[uBlock] = (uint8_t)uBlock;
+
+            // there are no other blocks - update the hint anyway
+            fUpdateHint = TRUE;
+        }
+        else
+        {
+            // not first block - link circularly
+            pSegment->rgAllocation[uBlock] = pSegment->rgAllocation[uOldTail];
+            pSegment->rgAllocation[uOldTail] = (uint8_t)uBlock;
+        
+            // chain may need resorting depending on what we added
+            pSegment->fResortChains = TRUE;
+        }
+
+        // mark this block with the type we're using it for
+        pSegment->rgBlockType[uBlock] = (uint8_t)uType;
+
+        // update the chain tail
+        pSegment->rgTail[uType] = (uint8_t)uBlock;
+
+        // if we are supposed to update the hint, then point it at the new block
+        if (fUpdateHint)
+            pSegment->rgHint[uType] = (uint8_t)uBlock;
+
+        // increment the chain's free count to reflect the additional block
+        pSegment->rgFreeCount[uType] += HANDLE_HANDLES_PER_BLOCK;
+    }
+
+    // all done
+    return uBlock;
+}
+
+
+/*
+ * SegmentInsertBlockFromFreeList
+ *
+ * Inserts a block into a block list within a segment.  Blocks are obtained from the
+ * segment's free list.  Returns the index of the block inserted, or BLOCK_INVALID
+ * if no blocks were avaliable.
+ *
+ * This routine does the work of securing a parallel user data block if required.
+ *
+ */
+uint32_t SegmentInsertBlockFromFreeList(TableSegment *pSegment, uint32_t uType, BOOL fUpdateHint)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    uint32_t uBlock, uData = 0;
+
+    // does this block type require user data?
+    BOOL fUserData = TypeHasUserData(pSegment->pHandleTable, uType);
+
+    // if we need user data then we need to make sure it can go in the same segment as the handles
+    if (fUserData)
+    {
+        // if we can't also fit the user data in this segment then bail
+        uBlock = pSegment->bFreeList;
+        if ((uBlock == BLOCK_INVALID) || (pSegment->rgAllocation[uBlock] == BLOCK_INVALID))
+            return BLOCK_INVALID;
+
+        // allocate our user data block (we do it in this order so that free order is nicer)
+        uData = SegmentInsertBlockFromFreeListWorker(pSegment, HNDTYPE_INTERNAL_DATABLOCK, FALSE);
+    }
+
+    // now allocate the requested block
+    uBlock = SegmentInsertBlockFromFreeListWorker(pSegment, uType, fUpdateHint);
+
+    // should we have a block for user data too?
+    if (fUserData)
+    {
+        // did we get them both?
+        if ((uBlock != BLOCK_INVALID) && (uData != BLOCK_INVALID))
+        {
+            // link the data block to the requested block
+            pSegment->rgUserData[uBlock] = (uint8_t)uData;
+
+            // no handles are ever allocated out of a data block
+            // lock the block so it won't be reclaimed accidentally
+            BlockLock(pSegment, uData);
+        }
+        else
+        {
+            // NOTE: We pre-screened that the blocks exist above, so we should only
+            //       get here under heavy load when a MEM_COMMIT operation fails.
+
+            // if the type block allocation succeeded then scavenge the type block list
+            if (uBlock != BLOCK_INVALID)
+                SegmentRemoveFreeBlocks(pSegment, uType, NULL);
+
+            // if the user data allocation succeeded then scavenge the user data list
+            if (uData != BLOCK_INVALID)
+                SegmentRemoveFreeBlocks(pSegment, HNDTYPE_INTERNAL_DATABLOCK, NULL);
+
+            // make sure we return failure
+            uBlock = BLOCK_INVALID;
+        }
+    }
+
+    // all done
+    return uBlock;
+}
+
+
+/*
+ * SegmentResortChains
+ *
+ * Sorts the block chains for optimal scanning order.
+ * Sorts the free list to combat fragmentation.
+ *
+ */
+void SegmentResortChains(TableSegment *pSegment)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // clear the sort flag for this segment
+    pSegment->fResortChains = FALSE;
+
+    // first, do we need to scavenge any blocks?
+    if (pSegment->fNeedsScavenging)
+    {
+        // clear the scavenge flag
+        pSegment->fNeedsScavenging = FALSE;
+
+        // we may need to explicitly scan the user data chain too
+        BOOL fCleanupUserData = FALSE;
+
+        // fetch the empty line for this segment
+        uint32_t uLast = pSegment->bEmptyLine;
+
+        // loop over all active blocks, scavenging the empty ones as we go
+        for (uint32_t uBlock = 0; uBlock < uLast; uBlock++)
+        {
+            // fetch the block type of this block
+            uint32_t uType = pSegment->rgBlockType[uBlock];
+
+            // only process public block types - we handle data blocks separately
+            if (uType < HANDLE_MAX_PUBLIC_TYPES)
+            {
+#ifdef HANDLE_OPTIMIZE_FOR_64_HANDLE_BLOCKS
+                // determine whether this block is empty
+                if (((uint64_t*)pSegment->rgFreeMask)[uBlock] == UI64(0xFFFFFFFFFFFFFFFF))
+#else
+                // assume this block is empty until we know otherwise
+                BOOL fEmpty = TRUE;
+    
+                // get the first mask for this block
+                uint32_t *pdwMask     = pSegment->rgFreeMask + (uBlock * HANDLE_MASKS_PER_BLOCK);
+                uint32_t *pdwMaskLast = pdwMask              + HANDLE_MASKS_PER_BLOCK;
+
+                // loop through the masks until we've processed them all or we've found handles
+                do
+                {
+                    // is this mask empty?
+                    if (*pdwMask != MASK_EMPTY)
+                    {
+                        // nope - this block still has handles in it
+                        fEmpty = FALSE;
+                        break;
+                    }
+
+                    // on to the next mask
+                    pdwMask++;
+
+                } while (pdwMask < pdwMaskLast);
+
+                // is this block empty?
+                if (fEmpty)
+#endif
+                {
+                    // is the block unlocked?
+                    if (!BlockIsLocked(pSegment, uBlock))
+                    {
+                        // safe to free - did it have user data associated?
+                        uint32_t uData = pSegment->rgUserData[uBlock];
+                        if (uData != BLOCK_INVALID)
+                        {
+                            // data blocks are 'empty' so we keep them locked
+                            // unlock the block so it can be reclaimed below
+                            BlockUnlock(pSegment, uData);
+
+                            // unlink the data block from the handle block
+                            pSegment->rgUserData[uBlock] = BLOCK_INVALID;
+
+                            // remember that we need to scavenge the data block chain
+                            fCleanupUserData = TRUE;
+                        }
+
+                        // mark the block as free
+                        pSegment->rgBlockType[uBlock] = TYPE_INVALID;
+
+                        // fix up the free count for the block's type
+                        pSegment->rgFreeCount[uType] -= HANDLE_HANDLES_PER_BLOCK;
+
+                        // N.B. we don't update the list linkages here since they are rebuilt below
+                    }
+                }
+            }
+        }
+
+        // if we have to clean up user data then do that now
+        if (fCleanupUserData)
+            SegmentRemoveFreeBlocks(pSegment, HNDTYPE_INTERNAL_DATABLOCK, NULL);
+    }
+
+    // keep some per-chain data
+    uint8_t rgChainCurr[HANDLE_MAX_INTERNAL_TYPES];
+    uint8_t rgChainHigh[HANDLE_MAX_INTERNAL_TYPES];
+    uint8_t bChainFree = BLOCK_INVALID;
+    uint32_t uEmptyLine = BLOCK_INVALID;
+    BOOL fContiguousWithFreeList = TRUE;
+
+    // preinit the chain data to no blocks
+    uint32_t uType;
+    for (uType = 0; uType < HANDLE_MAX_INTERNAL_TYPES; uType++)
+        rgChainHigh[uType] = rgChainCurr[uType] = BLOCK_INVALID;
+
+    // scan back through the block types
+    uint8_t uBlock = HANDLE_BLOCKS_PER_SEGMENT;
+    while (uBlock > 0)
+    {
+        // decrement the block index
+        uBlock--;
+
+        // fetch the type for this block
+        uType = pSegment->rgBlockType[uBlock];
+
+        // is this block allocated?
+        if (uType != TYPE_INVALID)
+        {
+            // looks allocated
+            fContiguousWithFreeList = FALSE;
+             
+            // hope the segment's not corrupt :)
+            _ASSERTE(uType < HANDLE_MAX_INTERNAL_TYPES);
+
+            // remember the first block we see for each type
+            if (rgChainHigh[uType] == BLOCK_INVALID)
+                rgChainHigh[uType] = uBlock;
+
+            // link this block to the last one we saw of this type
+            pSegment->rgAllocation[uBlock] = rgChainCurr[uType];
+
+            // remember this block in type chain
+            rgChainCurr[uType] = (uint8_t)uBlock;
+        }
+        else
+        {
+            // block is free - is it also contiguous with the free list?
+            if (fContiguousWithFreeList)
+                uEmptyLine = uBlock;
+
+            // link this block to the last one in the free chain
+            pSegment->rgAllocation[uBlock] = bChainFree;
+
+            // add this block to the free list
+            bChainFree = (uint8_t)uBlock;
+        }
+    }
+
+    // now close the loops and store the tails
+    for (uType = 0; uType < HANDLE_MAX_INTERNAL_TYPES; uType++)
+    {
+        // get the first block in the list
+        uint8_t bBlock = rgChainCurr[uType];
+
+        // if there is a list then make it circular and save it
+        if (bBlock != BLOCK_INVALID)
+        {
+            // highest block we saw becomes tail
+            uint32_t uTail = rgChainHigh[uType];
+
+            // store tail in segment
+            pSegment->rgTail[uType] = (uint8_t)uTail;
+
+            // link tail to head
+            pSegment->rgAllocation[uTail] = bBlock;
+
+            // If we scavenged blocks above then we might have left the hint pointing at the free chain. Reset
+            // it back into this chain if so (the choice of block is arbitrary, this case is very rare).
+            if (pSegment->rgBlockType[pSegment->rgHint[uType]] != uType)
+                pSegment->rgHint[uType] = bBlock;
+        }
+    }
+
+    // store the new free list head
+    pSegment->bFreeList = bChainFree;
+
+    // compute the new empty line
+    if (uEmptyLine > HANDLE_BLOCKS_PER_SEGMENT)
+        uEmptyLine = HANDLE_BLOCKS_PER_SEGMENT;
+
+    // store the updated empty line
+    pSegment->bEmptyLine = (uint8_t)uEmptyLine;
+}
+
+/*
+ * DoesSegmentNeedsToTrimExcessPages
+ *
+ * Checks to see if any pages can be decommitted from the segment
+ *
+ */
+BOOL DoesSegmentNeedsToTrimExcessPages(TableSegment *pSegment)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the empty and decommit lines
+    uint32_t uEmptyLine    = pSegment->bEmptyLine;
+    uint32_t uDecommitLine = pSegment->bDecommitLine;
+
+    // check to see if we can decommit some handles
+    // NOTE: we use '<' here to avoid playing ping-pong on page boundaries
+    //       this is OK since the zero case is handled elsewhere (segment gets freed)
+    if (uEmptyLine < uDecommitLine)
+    {
+        // derive some useful info about the page size
+        uintptr_t dwPageRound = (uintptr_t)g_SystemInfo.dwPageSize - 1;
+        uintptr_t dwPageMask  = ~dwPageRound;
+
+        // compute the address corresponding to the empty line
+        uintptr_t dwLo = (uintptr_t)pSegment->rgValue + (uEmptyLine  * HANDLE_BYTES_PER_BLOCK);
+
+        // adjust the empty line address to the start of the nearest whole empty page
+        dwLo = (dwLo + dwPageRound) & dwPageMask;
+
+        // compute the address corresponding to the old commit line
+        uintptr_t dwHi = (uintptr_t)pSegment->rgValue + ((uint32_t)pSegment->bCommitLine * HANDLE_BYTES_PER_BLOCK);
+
+        // is there anything to decommit?
+        if (dwHi > dwLo)
+        {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+
+/*
+ * SegmentTrimExcessPages
+ *
+ * Checks to see if any pages can be decommitted from the segment.
+ * In case there any unused pages it goes and decommits them.
+ *
+ */
+void SegmentTrimExcessPages(TableSegment *pSegment)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the empty and decommit lines
+    uint32_t uEmptyLine    = pSegment->bEmptyLine;
+    uint32_t uDecommitLine = pSegment->bDecommitLine;
+
+    // check to see if we can decommit some handles
+    // NOTE: we use '<' here to avoid playing ping-pong on page boundaries
+    //       this is OK since the zero case is handled elsewhere (segment gets freed)
+    if (uEmptyLine < uDecommitLine)
+    {
+        // derive some useful info about the page size
+        uintptr_t dwPageRound = (uintptr_t)g_SystemInfo.dwPageSize - 1;
+        uintptr_t dwPageMask  = ~dwPageRound;
+
+        // compute the address corresponding to the empty line
+        uintptr_t dwLo = (uintptr_t)pSegment->rgValue + (uEmptyLine  * HANDLE_BYTES_PER_BLOCK);
+
+        // adjust the empty line address to the start of the nearest whole empty page
+        dwLo = (dwLo + dwPageRound) & dwPageMask;
+
+        // compute the address corresponding to the old commit line
+        uintptr_t dwHi = (uintptr_t)pSegment->rgValue + ((uint32_t)pSegment->bCommitLine * HANDLE_BYTES_PER_BLOCK);
+
+        // is there anything to decommit?
+        if (dwHi > dwLo)
+        {
+            // decommit the memory
+            GCToOSInterface::VirtualDecommit((void *)dwLo, dwHi - dwLo);
+
+            // update the commit line
+            pSegment->bCommitLine = (uint8_t)((dwLo - (size_t)pSegment->rgValue) / HANDLE_BYTES_PER_BLOCK);
+
+            // compute the address for the new decommit line
+            size_t dwDecommitAddr = dwLo - g_SystemInfo.dwPageSize;
+
+            // assume a decommit line of zero until we know otheriwse
+            uDecommitLine = 0;
+
+            // if the address is within the handle area then compute the line from the address
+            if (dwDecommitAddr > (size_t)pSegment->rgValue)
+                uDecommitLine = (uint32_t)((dwDecommitAddr - (size_t)pSegment->rgValue) / HANDLE_BYTES_PER_BLOCK);
+
+            // update the decommit line
+            pSegment->bDecommitLine = (uint8_t)uDecommitLine;
+        }
+    }
+}
+
+
+/*
+ * BlockAllocHandlesInMask
+ *
+ * Attempts to allocate the requested number of handes of the specified type,
+ * from the specified mask of the specified handle block.
+ *
+ * Returns the number of available handles actually allocated.
+ *
+ */
+uint32_t BlockAllocHandlesInMask(TableSegment *pSegment, uint32_t uBlock,
+                             uint32_t *pdwMask, uint32_t uHandleMaskDisplacement,
+                             OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(uBlock);
+
+    // keep track of how many handles we have left to allocate
+    uint32_t uRemain = uCount;
+
+    // fetch the free mask into a local so we can play with it
+    uint32_t dwFree = *pdwMask;
+
+    // keep track of our displacement within the mask
+    uint32_t uByteDisplacement = 0;
+
+    // examine the mask byte by byte for free handles
+    do
+    {
+        // grab the low byte of the mask
+        uint32_t dwLowByte = (dwFree & MASK_LOBYTE);
+
+        // are there any free handles here?
+        if (dwLowByte)
+        {
+            // remember which handles we've taken
+            uint32_t dwAlloc = 0;
+
+            // loop until we've allocated all the handles we can from here
+            do
+            {
+                // get the index of the next handle
+                uint32_t uIndex = c_rgLowBitIndex[dwLowByte];
+
+                // compute the mask for the handle we chose
+                dwAlloc |= (1 << uIndex);
+
+                // remove this handle from the mask byte
+                dwLowByte &= ~dwAlloc;
+
+                // compute the index of this handle in the segment
+                uIndex += uHandleMaskDisplacement + uByteDisplacement;
+
+                // store the allocated handle in the handle array
+                *pHandleBase = (OBJECTHANDLE)(pSegment->rgValue + uIndex);
+
+                // adjust our count and array pointer
+                uRemain--;
+                pHandleBase++;
+
+            } while (dwLowByte && uRemain);
+
+            // shift the allocation mask into position
+            dwAlloc <<= uByteDisplacement;
+
+            // update the mask to account for the handles we allocated
+            *pdwMask &= ~dwAlloc;
+        }
+
+        // on to the next byte in the mask
+        dwFree >>= BITS_PER_BYTE;
+        uByteDisplacement += BITS_PER_BYTE;
+
+    } while (uRemain && dwFree);
+
+    // return the number of handles we got
+    return (uCount - uRemain);
+
+}
+
+
+/*
+ * BlockAllocHandlesInitial
+ *
+ * Allocates a specified number of handles from a newly committed (empty) block.
+ *
+ */
+uint32_t BlockAllocHandlesInitial(TableSegment *pSegment, uint32_t uType, uint32_t uBlock,
+                                  OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(uType);
+
+    // sanity check
+    _ASSERTE(uCount);
+
+    // validate the number of handles we were asked to allocate
+    if (uCount > HANDLE_HANDLES_PER_BLOCK)
+    {
+        _ASSERTE(FALSE);
+        uCount = HANDLE_HANDLES_PER_BLOCK;
+    }
+
+    // keep track of how many handles we have left to mark in masks
+    uint32_t uRemain = uCount;
+
+    // get the first mask for this block
+    uint32_t *pdwMask = pSegment->rgFreeMask + (uBlock * HANDLE_MASKS_PER_BLOCK);
+
+    // loop through the masks, zeroing the appropriate free bits
+    do
+    {
+        // this is a brand new block - all masks we encounter should be totally free
+        _ASSERTE(*pdwMask == MASK_EMPTY);
+
+        // pick an initial guess at the number to allocate
+        uint32_t uAlloc = uRemain;
+
+        // compute the default mask based on that count
+        uint32_t dwNewMask;
+        // are we allocating all of them?
+        if (uAlloc >= HANDLE_HANDLES_PER_MASK)
+        {
+            dwNewMask = MASK_FULL; // avoid unpredictable shift
+            uAlloc = HANDLE_HANDLES_PER_MASK;
+        }
+        else
+        {
+            dwNewMask = (MASK_EMPTY << uAlloc);
+        }
+
+        // set the free mask
+        *pdwMask = dwNewMask;
+
+        // update our count and mask pointer
+        uRemain -= uAlloc;
+        pdwMask++;
+
+    } while (uRemain);
+
+    // compute the bounds for allocation so we can copy the handles
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+    _UNCHECKED_OBJECTREF *pLast  = pValue + uCount;
+
+    // loop through filling in the output array with handles
+    do
+    {
+        // store the next handle in the next array slot
+        *pHandleBase = (OBJECTHANDLE)pValue;
+
+        // increment our source and destination
+        pValue++;
+        pHandleBase++;
+
+    } while (pValue < pLast);
+
+    // return the number of handles we allocated
+    return uCount;
+}
+
+
+/*
+ * BlockAllocHandles
+ *
+ * Attempts to allocate the requested number of handes of the specified type,
+ * from the specified handle block.
+ *
+ * Returns the number of available handles actually allocated.
+ *
+ */
+uint32_t BlockAllocHandles(TableSegment *pSegment, uint32_t uBlock, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // keep track of how many handles we have left to allocate
+    uint32_t uRemain = uCount;
+
+    // set up our loop and limit mask pointers
+    uint32_t *pdwMask     = pSegment->rgFreeMask + (uBlock * HANDLE_MASKS_PER_BLOCK);
+    uint32_t *pdwMaskLast = pdwMask + HANDLE_MASKS_PER_BLOCK;
+
+    // keep track of the handle displacement for the mask we're scanning
+    uint32_t uDisplacement = uBlock * HANDLE_HANDLES_PER_BLOCK;
+
+    // loop through all the masks, allocating handles as we go
+    do
+    {
+        // if this mask indicates free handles then grab them
+        if (*pdwMask)
+        {
+            // allocate as many handles as we need from this mask
+            uint32_t uSatisfied = BlockAllocHandlesInMask(pSegment, uBlock, pdwMask, uDisplacement, pHandleBase, uRemain);
+
+            // adjust our count and array pointer
+            uRemain     -= uSatisfied;
+            pHandleBase += uSatisfied;
+    
+            // if there are no remaining slots to be filled then we are done
+            if (!uRemain)
+                break;
+        }
+
+        // on to the next mask
+        pdwMask++;
+        uDisplacement += HANDLE_HANDLES_PER_MASK;
+
+    } while (pdwMask < pdwMaskLast);
+
+    // return the number of handles we got
+    return (uCount - uRemain);
+}
+
+
+/*
+ * SegmentAllocHandlesFromTypeChain
+ *
+ * Attempts to allocate the requested number of handes of the specified type,
+ * from the specified segment's block chain for the specified type.  This routine
+ * ONLY scavenges existing blocks in the type chain.  No new blocks are committed.
+ *
+ * Returns the number of available handles actually allocated.
+ *
+ */
+uint32_t SegmentAllocHandlesFromTypeChain(TableSegment *pSegment, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // fetch the number of handles available in this chain
+    uint32_t uAvail = pSegment->rgFreeCount[uType];
+
+    // is the available count greater than the requested count?
+    if (uAvail > uCount)
+    {
+        // yes - all requested handles are available
+        uAvail = uCount;
+    }
+    else
+    {
+        // no - we can only satisfy some of the request
+        uCount = uAvail;
+    }
+
+    // did we find that any handles are available?
+    if (uAvail)
+    {
+        // yes - fetch the head of the block chain and set up a loop limit
+        uint32_t uBlock = pSegment->rgHint[uType];
+        uint32_t uLast = uBlock;
+
+        // loop until we have found all handles known to be available
+        for (;;)
+        {
+            // try to allocate handles from the current block
+            uint32_t uSatisfied = BlockAllocHandles(pSegment, uBlock, pHandleBase, uAvail);
+
+            // did we get everything we needed?
+            if (uSatisfied == uAvail)
+            {
+                // yes - update the hint for this type chain and get out
+                pSegment->rgHint[uType] = (uint8_t)uBlock;
+                break;
+            }
+
+            // adjust our count and array pointer
+            uAvail      -= uSatisfied;
+            pHandleBase += uSatisfied;
+
+            // fetch the next block in the type chain
+            uBlock = pSegment->rgAllocation[uBlock];
+
+            // are we out of blocks?
+            if (uBlock == uLast)
+            {
+                // free count is corrupt
+                _ASSERTE(FALSE);
+
+                // avoid making the problem any worse
+                uCount -= uAvail;
+                break;
+            }
+        }
+
+        // update the free count
+        pSegment->rgFreeCount[uType] -= uCount;
+    }
+
+    // return the number of handles we got
+    return uCount;
+}
+
+
+/*
+ * SegmentAllocHandlesFromFreeList
+ *
+ * Attempts to allocate the requested number of handes of the specified type,
+ * by committing blocks from the free list to that type's type chain.
+ *
+ * Returns the number of available handles actually allocated.
+ *
+ */
+uint32_t SegmentAllocHandlesFromFreeList(TableSegment *pSegment, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // keep track of how many handles we have left to allocate
+    uint32_t uRemain = uCount;
+
+    // loop allocating handles until we are done or we run out of free blocks
+    do
+    {
+        // start off assuming we can allocate all the handles
+        uint32_t uAlloc = uRemain;
+
+        // we can only get a block-full of handles at a time
+        if (uAlloc > HANDLE_HANDLES_PER_BLOCK)
+            uAlloc = HANDLE_HANDLES_PER_BLOCK;
+
+        // try to get a block from the free list
+        uint32_t uBlock = SegmentInsertBlockFromFreeList(pSegment, uType, (uRemain == uCount));
+
+        // if there are no free blocks left then we are done
+        if (uBlock == BLOCK_INVALID)
+            break;
+
+        // initialize the block by allocating the required handles into the array
+        uAlloc = BlockAllocHandlesInitial(pSegment, uType, uBlock, pHandleBase, uAlloc);
+
+        // adjust our count and array pointer
+        uRemain     -= uAlloc;
+        pHandleBase += uAlloc;
+
+    } while (uRemain);
+
+    // compute the number of handles we took
+    uCount -= uRemain;
+
+    // update the free count by the number of handles we took
+    pSegment->rgFreeCount[uType] -= uCount;
+
+    // return the number of handles we got
+    return uCount;
+}
+
+
+/*
+ * SegmentAllocHandles
+ *
+ * Attempts to allocate the requested number of handes of the specified type,
+ * from the specified segment.
+ *
+ * Returns the number of available handles actually allocated.
+ *
+ */
+uint32_t SegmentAllocHandles(TableSegment *pSegment, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // first try to get some handles from the existing type chain
+    uint32_t uSatisfied = SegmentAllocHandlesFromTypeChain(pSegment, uType, pHandleBase, uCount);
+
+    // if there are still slots to be filled then we need to commit more blocks to the type chain
+    if (uSatisfied < uCount)
+    {
+        // adjust our count and array pointer
+        uCount      -= uSatisfied;
+        pHandleBase += uSatisfied;
+
+        // get remaining handles by committing blocks from the free list
+        uSatisfied += SegmentAllocHandlesFromFreeList(pSegment, uType, pHandleBase, uCount);
+    }
+
+    // return the number of handles we got
+    return uSatisfied;
+}
+
+
+/*
+ * TableAllocBulkHandles
+ *
+ * Attempts to allocate the requested number of handes of the specified type.
+ *
+ * Returns the number of handles that were actually allocated.  This is always
+ * the same as the number of handles requested except in out-of-memory conditions,
+ * in which case it is the number of handles that were successfully allocated.
+ *
+ */
+uint32_t TableAllocBulkHandles(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // keep track of how many handles we have left to allocate
+    uint32_t uRemain = uCount;
+
+    // start with the first segment and loop until we are done
+    TableSegment *pSegment = pTable->pSegmentList;
+
+    uint8_t bLastSequence = 0;
+    BOOL fNewSegment = FALSE;
+
+    for (;;)
+    {
+        // get some handles from the current segment
+        uint32_t uSatisfied = SegmentAllocHandles(pSegment, uType, pHandleBase, uRemain);
+
+        // adjust our count and array pointer
+        uRemain     -= uSatisfied;
+        pHandleBase += uSatisfied;
+
+        // if there are no remaining slots to be filled then we are done
+        if (!uRemain)
+            break;
+
+        // fetch the next segment in the chain.
+        TableSegment *pNextSegment = NULL;
+        
+        if (!fNewSegment)
+        {
+            pNextSegment = pSegment->pNextSegment;
+            if (!pNextSegment)
+            {
+                bLastSequence = pSegment->bSequence;
+                fNewSegment = TRUE;
+            }
+        }
+
+        // if are no more segments then allocate another
+        if (fNewSegment)
+        {
+            // ok if this fails then we're out of luck
+            pNextSegment = SegmentAlloc(pTable);
+            if (!pNextSegment)
+            {
+                // we ran out of memory allocating a new segment.
+                // this may not be catastrophic - if there are still some
+                // handles in the cache then some allocations may succeed.
+                break;
+            }
+
+            // set up the correct sequence number for the new segment
+            pNextSegment->bSequence = (uint8_t)(((uint32_t)bLastSequence + 1) % 0x100);
+            bLastSequence = pNextSegment->bSequence;
+
+            // link the new segment into the list by the order of segment address
+            TableSegment* pWalk = pTable->pSegmentList;
+            if ((uintptr_t)pNextSegment < (uintptr_t)pWalk)
+            {
+                pNextSegment->pNextSegment = pWalk;
+                pTable->pSegmentList = pNextSegment;
+            }
+            else
+            {
+                while (pWalk)
+                {
+                    if (pWalk->pNextSegment == NULL)
+                    {
+                        pWalk->pNextSegment = pNextSegment;
+                        break;
+                    }
+                    else if ((uintptr_t)pWalk->pNextSegment > (uintptr_t)pNextSegment)
+                    {
+                        pNextSegment->pNextSegment = pWalk->pNextSegment;
+                        pWalk->pNextSegment = pNextSegment;
+                        break;
+                    }
+                    pWalk = pWalk->pNextSegment;
+                }
+            }
+        }
+
+        // try again with new segment
+        pSegment = pNextSegment;
+    }
+
+    // compute the number of handles we actually got
+    uint32_t uAllocated = (uCount - uRemain);
+
+    // update the count of handles marked as "used"
+    pTable->dwCount += uAllocated;
+
+    // return the number of handles we actually got
+    return uAllocated;
+}
+
+
+/*
+ * BlockFreeHandlesInMask
+ *
+ * Frees some portion of an array of handles of the specified type.
+ * The array is scanned forward and handles are freed until a handle
+ * from a different mask is encountered.
+ *
+ * Returns the number of handles that were freed from the front of the array.
+ *
+ */
+uint32_t BlockFreeHandlesInMask(TableSegment *pSegment, uint32_t uBlock, uint32_t uMask, OBJECTHANDLE *pHandleBase, uint32_t uCount,
+                                uintptr_t *pUserData, uint32_t *puActualFreed, BOOL *pfAllMasksFree)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // keep track of how many handles we have left to free
+    uint32_t uRemain = uCount;
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:6305) // "This code deals with a bit vector mapped piece of code, so there is no mismatch between sizeof and countof"
+#endif
+
+    // if this block has user data, convert the pointer to be mask-relative
+    if (pUserData)
+        pUserData += (uMask * HANDLE_HANDLES_PER_MASK);
+
+    // convert our mask index to be segment-relative
+    uMask += (uBlock * HANDLE_MASKS_PER_BLOCK);
+
+    // compute the handle bounds for our mask
+    OBJECTHANDLE firstHandle = (OBJECTHANDLE)(pSegment->rgValue + (uMask * HANDLE_HANDLES_PER_MASK));
+    OBJECTHANDLE lastHandle  = (OBJECTHANDLE)((_UNCHECKED_OBJECTREF *)firstHandle + HANDLE_HANDLES_PER_MASK);
+
+#ifdef _PREFAST_ 
+#pragma warning(pop)
+#endif
+
+    // keep a local copy of the free mask to update as we free handles
+    uint32_t dwFreeMask = pSegment->rgFreeMask[uMask];
+
+    // keep track of how many bogus frees we are asked to do
+    uint32_t uBogus = 0;
+
+    // loop freeing handles until we encounter one outside our block or there are none left
+    do
+    {
+        // fetch the next handle in the array
+        OBJECTHANDLE handle = *pHandleBase;
+
+        // if the handle is outside our segment then we are done
+        if ((handle < firstHandle) || (handle >= lastHandle))
+            break;
+
+        // sanity check - the handle should no longer refer to an object here
+        _ASSERTE(HndIsNullOrDestroyedHandle(*(_UNCHECKED_OBJECTREF *)handle));
+
+        // compute the handle index within the mask
+        uint32_t uHandle = (uint32_t)(handle - firstHandle);
+
+        // if there is user data then clear the user data for this handle
+        if (pUserData)
+            pUserData[uHandle] = 0L;
+
+        // compute the mask bit for this handle
+        uint32_t dwFreeBit = (1 << uHandle);
+
+        // the handle should not already be free
+        if ((dwFreeMask & dwFreeBit) != 0)
+        {
+            // SOMEONE'S FREEING A HANDLE THAT ISN'T ALLOCATED
+            uBogus++;
+            _ASSERTE(FALSE);
+        }
+
+        // add this handle to the tally of freed handles
+        dwFreeMask |= dwFreeBit;
+
+        // adjust our count and array pointer
+        uRemain--;
+        pHandleBase++;
+
+    } while (uRemain);
+
+    // update the mask to reflect the handles we changed
+    pSegment->rgFreeMask[uMask] = dwFreeMask;
+
+    // if not all handles in this mask are free then tell our caller not to check the block
+    if (dwFreeMask != MASK_EMPTY)
+        *pfAllMasksFree = FALSE;
+
+    // compute the number of handles we processed from the array
+    uint32_t uFreed = (uCount - uRemain);
+
+    // tell the caller how many handles we actually freed
+    *puActualFreed += (uFreed - uBogus);
+
+    // return the number of handles we actually freed
+    return uFreed;
+}
+
+
+/*
+ * BlockFreeHandles
+ *
+ * Frees some portion of an array of handles of the specified type.
+ * The array is scanned forward and handles are freed until a handle
+ * from a different block is encountered.
+ *
+ * Returns the number of handles that were freed from the front of the array.
+ *
+ */
+uint32_t BlockFreeHandles(TableSegment *pSegment, uint32_t uBlock, OBJECTHANDLE *pHandleBase, uint32_t uCount,
+                          uint32_t *puActualFreed, BOOL *pfScanForFreeBlocks)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // keep track of how many handles we have left to free
+    uint32_t uRemain = uCount;
+
+    // fetch the user data for this block, if any
+    uintptr_t *pBlockUserData = BlockFetchUserDataPointer(pSegment, uBlock, FALSE);
+
+    // compute the handle bounds for our block
+    OBJECTHANDLE firstHandle = (OBJECTHANDLE)(pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK));
+    OBJECTHANDLE lastHandle  = (OBJECTHANDLE)((_UNCHECKED_OBJECTREF *)firstHandle + HANDLE_HANDLES_PER_BLOCK);
+
+    // this variable will only stay TRUE if all masks we touch end up in the free state
+    BOOL fAllMasksWeTouchedAreFree = TRUE;
+
+    // loop freeing handles until we encounter one outside our block or there are none left
+    do
+    {
+        // fetch the next handle in the array
+        OBJECTHANDLE handle = *pHandleBase;
+
+        // if the handle is outside our segment then we are done
+        if ((handle < firstHandle) || (handle >= lastHandle))
+            break;
+
+        // compute the mask that this handle resides in
+        uint32_t uMask = (uint32_t)((handle - firstHandle) / HANDLE_HANDLES_PER_MASK);
+
+        // free as many handles as this mask owns from the front of the array
+        uint32_t uFreed = BlockFreeHandlesInMask(pSegment, uBlock, uMask, pHandleBase, uRemain,
+                                             pBlockUserData, puActualFreed, &fAllMasksWeTouchedAreFree);
+
+        // adjust our count and array pointer
+        uRemain     -= uFreed;
+        pHandleBase += uFreed;
+
+    } while (uRemain);
+
+    // are all masks we touched free?
+    if (fAllMasksWeTouchedAreFree)
+    {
+        // is the block unlocked?
+        if (!BlockIsLocked(pSegment, uBlock))
+        {
+            // tell the caller it might be a good idea to scan for free blocks
+            *pfScanForFreeBlocks = TRUE;
+        }
+    }
+
+    // return the number of handles we actually freed
+    return (uCount - uRemain);
+}
+
+
+/*
+ * SegmentFreeHandles
+ *
+ * Frees some portion of an array of handles of the specified type.
+ * The array is scanned forward and handles are freed until a handle
+ * from a different segment is encountered.
+ *
+ * Returns the number of handles that were freed from the front of the array.
+ *
+ */
+uint32_t SegmentFreeHandles(TableSegment *pSegment, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // keep track of how many handles we have left to free
+    uint32_t uRemain = uCount;
+
+    // compute the handle bounds for our segment
+    OBJECTHANDLE firstHandle = (OBJECTHANDLE)pSegment->rgValue;
+    OBJECTHANDLE lastHandle  = (OBJECTHANDLE)((_UNCHECKED_OBJECTREF *)firstHandle + HANDLE_HANDLES_PER_SEGMENT);
+
+    // the per-block free routines will set this if there is a chance some blocks went free
+    BOOL fScanForFreeBlocks = FALSE;
+
+    // track the number of handles we actually free
+    uint32_t uActualFreed = 0;
+
+    // loop freeing handles until we encounter one outside our segment or there are none left
+    do
+    {
+        // fetch the next handle in the array
+        OBJECTHANDLE handle = *pHandleBase;
+
+        // if the handle is outside our segment then we are done
+        if ((handle < firstHandle) || (handle >= lastHandle))
+            break;
+
+        // compute the block that this handle resides in
+        uint32_t uBlock = (uint32_t)(((uintptr_t)handle - (uintptr_t)firstHandle) / (HANDLE_SIZE * HANDLE_HANDLES_PER_BLOCK));
+
+        // sanity check that this block is the type we expect to be freeing
+        _ASSERTE(pSegment->rgBlockType[uBlock] == uType);
+
+        // free as many handles as this block owns from the front of the array
+        uint32_t uFreed = BlockFreeHandles(pSegment, uBlock, pHandleBase, uRemain, &uActualFreed, &fScanForFreeBlocks);
+
+        // adjust our count and array pointer
+        uRemain     -= uFreed;
+        pHandleBase += uFreed;
+
+    } while (uRemain);
+
+    // compute the number of handles we actually freed
+    uint32_t uFreed = (uCount - uRemain);
+
+    // update the free count
+    pSegment->rgFreeCount[uType] += uActualFreed;
+
+    // if we saw blocks that may have gone totally free then do a free scan
+    if (fScanForFreeBlocks)
+    {
+        // assume we no scavenging is required
+        BOOL fNeedsScavenging = FALSE;
+
+        // try to remove any free blocks we may have created
+        SegmentRemoveFreeBlocks(pSegment, uType, &fNeedsScavenging);
+
+        // did SegmentRemoveFreeBlocks have to skip over any free blocks?
+        if (fNeedsScavenging)
+        {
+            // yup, arrange to scavenge them later
+            pSegment->fResortChains    = TRUE;
+            pSegment->fNeedsScavenging = TRUE;
+        }
+    }
+
+    // return the total number of handles we freed
+    return uFreed;
+}
+
+
+/*
+ * TableFreeBulkPreparedHandles
+ *
+ * Frees an array of handles of the specified type.
+ *
+ * This routine is optimized for a sorted array of handles but will accept any order.
+ *
+ */
+void TableFreeBulkPreparedHandles(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount)
+{
+    //Update the count of handles marked as "used"
+    pTable->dwCount -= uCount;
+
+    WRAPPER_NO_CONTRACT;
+
+    /*
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    */
+    
+    // loop until all handles are freed
+    do
+    {
+        // get the segment for the first handle
+        TableSegment * pSegment = (TableSegment *)HandleFetchSegmentPointer(*pHandleBase);
+
+        // sanity
+        _ASSERTE(pSegment->pHandleTable == pTable);
+
+        // free as many handles as this segment owns from the front of the array
+        uint32_t uFreed = SegmentFreeHandles(pSegment, uType, pHandleBase, uCount);
+
+        // adjust our count and array pointer
+        uCount      -= uFreed;
+        pHandleBase += uFreed;
+
+    } while (uCount);
+}
+
+
+/*
+ * TableFreeBulkUnpreparedHandlesWorker
+ *
+ * Frees an array of handles of the specified type by preparing them and calling TableFreeBulkPreparedHandles.
+ * Uses the supplied scratch buffer to prepare the handles.
+ *
+ */
+void TableFreeBulkUnpreparedHandlesWorker(HandleTable *pTable, uint32_t uType, const OBJECTHANDLE *pHandles, uint32_t uCount,
+                                          OBJECTHANDLE *pScratchBuffer)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // copy the handles into the destination buffer
+    memcpy(pScratchBuffer, pHandles, uCount * sizeof(OBJECTHANDLE));
+ 
+    // sort them for optimal free order
+    QuickSort((uintptr_t *)pScratchBuffer, 0, uCount - 1, CompareHandlesByFreeOrder);
+ 
+    // make sure the handles are zeroed too
+    ZeroHandles(pScratchBuffer, uCount);
+ 
+    // prepare and free these handles
+    TableFreeBulkPreparedHandles(pTable, uType, pScratchBuffer, uCount);
+}
+ 
+
+/*
+ * TableFreeBulkUnpreparedHandles
+ *
+ * Frees an array of handles of the specified type by preparing them and calling
+ * TableFreeBulkPreparedHandlesWorker one or more times.
+ *
+ */
+void TableFreeBulkUnpreparedHandles(HandleTable *pTable, uint32_t uType, const OBJECTHANDLE *pHandles, uint32_t uCount)
+{
+    CONTRACTL
+    {
+        THROWS;
+        WRAPPER(GC_TRIGGERS);
+        INJECT_FAULT(COMPlusThrowOM());
+    }
+    CONTRACTL_END;
+
+    // preparation / free buffer
+    OBJECTHANDLE rgStackHandles[HANDLE_HANDLES_PER_BLOCK];
+    OBJECTHANDLE *pScratchBuffer  = rgStackHandles;
+    OBJECTHANDLE *pLargeScratchBuffer  = NULL;
+    uint32_t     uFreeGranularity = _countof(rgStackHandles);
+ 
+    // if there are more handles than we can put on the stack then try to allocate a sorting buffer
+    if (uCount > uFreeGranularity)
+    {
+        // try to allocate a bigger buffer to work in
+        pLargeScratchBuffer = new (nothrow) OBJECTHANDLE[uCount];
+ 
+        // did we get it?
+        if (pLargeScratchBuffer)
+        {
+            // yes - use this buffer to prepare and free the handles
+            pScratchBuffer   = pLargeScratchBuffer;
+            uFreeGranularity = uCount;
+        }
+    }
+ 
+    // loop freeing handles until we have freed them all
+    while (uCount)
+    {
+        // decide how many we can process in this iteration
+        if (uFreeGranularity > uCount)
+            uFreeGranularity = uCount;
+ 
+        // prepare and free these handles
+        TableFreeBulkUnpreparedHandlesWorker(pTable, uType, pHandles, uFreeGranularity, pScratchBuffer);
+ 
+        // adjust our pointers and move on
+        uCount   -= uFreeGranularity;
+        pHandles += uFreeGranularity;
+    }
+ 
+    // if we allocated a sorting buffer then free it now
+    if (pLargeScratchBuffer)
+        delete [] pLargeScratchBuffer;
+}
+
+#endif // !DACCESS_COMPILE
+
+/*--------------------------------------------------------------------------*/
+
+
diff --git a/src/gc/handletablepriv.h b/src/gc/handletablepriv.h
new file mode 100644
index 0000000000..59c08ca744
--- /dev/null
+++ b/src/gc/handletablepriv.h
@@ -0,0 +1,1069 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Internal Implementation Header.
+ *
+ * Shared defines and declarations for handle table implementation.
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "handletable.h"
+
+/*--------------------------------------------------------------------------*/
+
+//<TODO>@TODO: find a home for this in a project-level header file</TODO>
+#define BITS_PER_BYTE               (8)
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * MAJOR TABLE DEFINITIONS THAT CHANGE DEPENDING ON THE WEATHER
+ *
+ ****************************************************************************/
+
+// 64k reserved per segment with 4k as header.
+#define HANDLE_SEGMENT_SIZE     (0x10000)   // MUST be a power of 2 (and currently must be 64K due to VirtualAlloc semantics)
+#define HANDLE_HEADER_SIZE      (0x1000)    // SHOULD be <= OS page size
+
+#define HANDLE_SEGMENT_ALIGNMENT     HANDLE_SEGMENT_SIZE 
+
+
+#if !BIGENDIAN
+
+    // little-endian write barrier mask manipulation
+    #define GEN_CLUMP_0_MASK        (0x000000FF)
+    #define NEXT_CLUMP_IN_MASK(dw)  (dw >> BITS_PER_BYTE)
+
+#else
+
+    // big-endian write barrier mask manipulation
+    #define GEN_CLUMP_0_MASK        (0xFF000000)
+    #define NEXT_CLUMP_IN_MASK(dw)  (dw << BITS_PER_BYTE)
+
+#endif
+
+
+// if the above numbers change than these will likely change as well
+#define HANDLE_HANDLES_PER_CLUMP    (16)        // segment write-barrier granularity
+#define HANDLE_HANDLES_PER_BLOCK    (64)        // segment suballocation granularity
+#define HANDLE_OPTIMIZE_FOR_64_HANDLE_BLOCKS    // flag for certain optimizations
+
+// maximum number of internally supported handle types
+#define HANDLE_MAX_INTERNAL_TYPES   (12)                             // should be a multiple of 4
+
+// number of types allowed for public callers
+#define HANDLE_MAX_PUBLIC_TYPES     (HANDLE_MAX_INTERNAL_TYPES - 1) // reserve one internal type
+
+// internal block types
+#define HNDTYPE_INTERNAL_DATABLOCK  (HANDLE_MAX_INTERNAL_TYPES - 1) // reserve last type for data blocks
+
+// max number of generations to support statistics on
+#define MAXSTATGEN                  (5)
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * MORE DEFINITIONS
+ *
+ ****************************************************************************/
+
+// fast handle-to-segment mapping
+#define HANDLE_SEGMENT_CONTENT_MASK     (HANDLE_SEGMENT_SIZE - 1)
+#define HANDLE_SEGMENT_ALIGN_MASK       (~HANDLE_SEGMENT_CONTENT_MASK)
+
+// table layout metrics
+#define HANDLE_SIZE                     sizeof(_UNCHECKED_OBJECTREF)
+#define HANDLE_HANDLES_PER_SEGMENT      ((HANDLE_SEGMENT_SIZE - HANDLE_HEADER_SIZE) / HANDLE_SIZE)
+#define HANDLE_BLOCKS_PER_SEGMENT       (HANDLE_HANDLES_PER_SEGMENT / HANDLE_HANDLES_PER_BLOCK)
+#define HANDLE_CLUMPS_PER_SEGMENT       (HANDLE_HANDLES_PER_SEGMENT / HANDLE_HANDLES_PER_CLUMP)
+#define HANDLE_CLUMPS_PER_BLOCK         (HANDLE_HANDLES_PER_BLOCK / HANDLE_HANDLES_PER_CLUMP)
+#define HANDLE_BYTES_PER_BLOCK          (HANDLE_HANDLES_PER_BLOCK * HANDLE_SIZE)
+#define HANDLE_HANDLES_PER_MASK         (sizeof(uint32_t) * BITS_PER_BYTE)
+#define HANDLE_MASKS_PER_SEGMENT        (HANDLE_HANDLES_PER_SEGMENT / HANDLE_HANDLES_PER_MASK)
+#define HANDLE_MASKS_PER_BLOCK          (HANDLE_HANDLES_PER_BLOCK / HANDLE_HANDLES_PER_MASK)
+#define HANDLE_CLUMPS_PER_MASK          (HANDLE_HANDLES_PER_MASK / HANDLE_HANDLES_PER_CLUMP)
+
+// We use this relation to check for free mask per block.
+C_ASSERT (HANDLE_HANDLES_PER_MASK * 2 == HANDLE_HANDLES_PER_BLOCK);
+
+
+// cache layout metrics
+#define HANDLE_CACHE_TYPE_SIZE          128 // 128 == 63 handles per bank
+#define HANDLES_PER_CACHE_BANK          ((HANDLE_CACHE_TYPE_SIZE / 2) - 1)
+
+// cache policy defines
+#define REBALANCE_TOLERANCE             (HANDLES_PER_CACHE_BANK / 3)
+#define REBALANCE_LOWATER_MARK          (HANDLES_PER_CACHE_BANK - REBALANCE_TOLERANCE)
+#define REBALANCE_HIWATER_MARK          (HANDLES_PER_CACHE_BANK + REBALANCE_TOLERANCE)
+
+// bulk alloc policy defines
+#define SMALL_ALLOC_COUNT               (HANDLES_PER_CACHE_BANK / 10)
+
+// misc constants
+#define MASK_FULL                       (0)
+#define MASK_EMPTY                      (0xFFFFFFFF)
+#define MASK_LOBYTE                     (0x000000FF)
+#define TYPE_INVALID                    ((uint8_t)0xFF)
+#define BLOCK_INVALID                   ((uint8_t)0xFF)
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * CORE TABLE LAYOUT STRUCTURES
+ *
+ ****************************************************************************/
+
+/*
+ * we need byte packing for the handle table layout to work
+ */
+#pragma pack(push,1)
+
+
+/*
+ * Table Segment Header
+ *
+ * Defines the layout for a segment's header data.
+ */
+struct _TableSegmentHeader
+{
+    /*
+     * Write Barrier Generation Numbers
+     *
+     * Each slot holds four bytes.  Each byte corresponds to a clump of handles.
+     * The value of the byte corresponds to the lowest possible generation that a
+     * handle in that clump could point into.
+     *
+     * WARNING: Although this array is logically organized as a uint8_t[], it is sometimes
+     *  accessed as uint32_t[] when processing bytes in parallel.  Code which treats the
+     *  array as an array of ULONG32s must handle big/little endian issues itself.
+     */
+    uint8_t rgGeneration[HANDLE_BLOCKS_PER_SEGMENT * sizeof(uint32_t) / sizeof(uint8_t)];
+
+    /*
+     * Block Allocation Chains
+     *
+     * Each slot indexes the next block in an allocation chain.
+     */
+    uint8_t rgAllocation[HANDLE_BLOCKS_PER_SEGMENT];
+
+    /*
+     * Block Free Masks
+     *
+     * Masks - 1 bit for every handle in the segment.
+     */
+    uint32_t rgFreeMask[HANDLE_MASKS_PER_SEGMENT];
+
+    /*
+     * Block Handle Types
+     *
+     * Each slot holds the handle type of the associated block.
+     */
+    uint8_t rgBlockType[HANDLE_BLOCKS_PER_SEGMENT];
+
+    /*
+     * Block User Data Map
+     *
+     * Each slot holds the index of a user data block (if any) for the associated block.
+     */
+    uint8_t rgUserData[HANDLE_BLOCKS_PER_SEGMENT];
+
+    /*
+     * Block Lock Count
+     *
+     * Each slot holds a lock count for its associated block.
+     * Locked blocks are not freed, even when empty.
+     */
+    uint8_t rgLocks[HANDLE_BLOCKS_PER_SEGMENT];
+
+    /*
+     * Allocation Chain Tails
+     *
+     * Each slot holds the tail block index for an allocation chain.
+     */
+    uint8_t rgTail[HANDLE_MAX_INTERNAL_TYPES];
+
+    /*
+     * Allocation Chain Hints
+     *
+     * Each slot holds a hint block index for an allocation chain.
+     */
+    uint8_t rgHint[HANDLE_MAX_INTERNAL_TYPES];
+
+    /*
+     * Free Count
+     *
+     * Each slot holds the number of free handles in an allocation chain.
+     */
+    uint32_t rgFreeCount[HANDLE_MAX_INTERNAL_TYPES];
+
+    /*
+     * Next Segment
+     *
+     * Points to the next segment in the chain (if we ran out of space in this one).
+     */
+#ifdef DACCESS_COMPILE     
+    TADDR pNextSegment;
+#else
+    struct TableSegment *pNextSegment;
+#endif // DACCESS_COMPILE
+
+    /*
+     * Handle Table
+     *
+     * Points to owning handle table for this table segment.
+     */
+    PTR_HandleTable pHandleTable;
+
+    /*
+     * Flags
+     */
+    uint8_t fResortChains      : 1;    // allocation chains need sorting
+    uint8_t fNeedsScavenging   : 1;    // free blocks need scavenging
+    uint8_t _fUnused           : 6;    // unused
+
+    /*
+     * Free List Head
+     *
+     * Index of the first free block in the segment.
+     */
+    uint8_t bFreeList;
+
+    /*
+     * Empty Line
+     *
+     * Index of the first KNOWN block of the last group of unused blocks in the segment.
+     */
+    uint8_t bEmptyLine;
+
+    /*
+     * Commit Line
+     *
+     * Index of the first uncommited block in the segment.
+     */
+    uint8_t bCommitLine;
+
+    /*
+     * Decommit Line
+     *
+     * Index of the first block in the highest committed page of the segment.
+     */
+    uint8_t bDecommitLine;
+
+    /*
+     * Sequence
+     *
+     * Indicates the segment sequence number.
+     */
+    uint8_t bSequence;
+};
+
+typedef DPTR(struct _TableSegmentHeader) PTR__TableSegmentHeader;
+typedef DPTR(uintptr_t) PTR_uintptr_t;
+
+// The handle table is large and may not be entirely mapped. That's one reason for splitting out the table
+// segment and the header as two separate classes. In DAC builds, we generally need only a single element from
+// the table segment, so we can use the DAC to retrieve just the information we require. 
+/*
+ * Table Segment
+ *
+ * Defines the layout for a handle table segment.
+ */
+struct TableSegment : public _TableSegmentHeader
+{
+    /*
+     * Filler
+     */
+    uint8_t rgUnused[HANDLE_HEADER_SIZE - sizeof(_TableSegmentHeader)];
+
+    /*
+     * Handles
+     */
+    _UNCHECKED_OBJECTREF rgValue[HANDLE_HANDLES_PER_SEGMENT];
+    
+#ifdef DACCESS_COMPILE
+    static uint32_t DacSize(TADDR addr);
+#endif
+};
+
+typedef SPTR(struct TableSegment) PTR_TableSegment;
+
+/*
+ * restore default packing
+ */
+#pragma pack(pop)
+
+
+/*
+ * Handle Type Cache
+ *
+ * Defines the layout of a per-type handle cache.
+ */
+struct HandleTypeCache
+{
+    /*
+     * reserve bank
+     */
+    OBJECTHANDLE rgReserveBank[HANDLES_PER_CACHE_BANK];
+
+    /*
+     * index of next available handle slot in the reserve bank
+     */
+    int32_t lReserveIndex;
+    
+
+    /*---------------------------------------------------------------------------------
+     * N.B. this structure is split up this way so that when HANDLES_PER_CACHE_BANK is
+     * large enough, lReserveIndex and lFreeIndex will reside in different cache lines
+     *--------------------------------------------------------------------------------*/
+
+    /*
+     * free bank
+     */
+    OBJECTHANDLE rgFreeBank[HANDLES_PER_CACHE_BANK];
+
+    /*
+     * index of next empty slot in the free bank
+     */
+    int32_t lFreeIndex;
+};
+
+
+/*---------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * SCANNING PROTOTYPES
+ *
+ ****************************************************************************/
+
+/*
+ * ScanCallbackInfo
+ *
+ * Carries parameters for per-segment and per-block scanning callbacks.
+ *
+ */
+struct ScanCallbackInfo
+{
+    PTR_TableSegment pCurrentSegment;   // segment we are presently scanning, if any
+    uint32_t         uFlags;            // HNDGCF_* flags
+    BOOL             fEnumUserData;     // whether user data is being enumerated as well
+    HANDLESCANPROC   pfnScan;           // per-handle scan callback
+    uintptr_t        param1;            // callback param 1
+    uintptr_t        param2;            // callback param 2
+    uint32_t         dwAgeMask;         // generation mask for ephemeral GCs
+
+#ifdef _DEBUG
+    uint32_t DEBUG_BlocksScanned;
+    uint32_t DEBUG_BlocksScannedNonTrivially;
+    uint32_t DEBUG_HandleSlotsScanned;
+    uint32_t DEBUG_HandlesActuallyScanned;
+#endif
+};
+
+
+/*
+ * BLOCKSCANPROC
+ *
+ * Prototype for callbacks that implement per-block scanning logic.
+ *
+ */
+typedef void (CALLBACK *BLOCKSCANPROC)(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * SEGMENTITERATOR
+ *
+ * Prototype for callbacks that implement per-segment scanning logic.
+ *
+ */
+typedef PTR_TableSegment (CALLBACK *SEGMENTITERATOR)(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *pCrstHolder);
+
+
+/*
+ * TABLESCANPROC
+ *
+ * Prototype for TableScanHandles and xxxTableScanHandlesAsync.
+ *
+ */
+typedef void (CALLBACK *TABLESCANPROC)(PTR_HandleTable pTable,
+                                       const uint32_t *puType, uint32_t uTypeCount,
+                                       SEGMENTITERATOR pfnSegmentIterator,
+                                       BLOCKSCANPROC pfnBlockHandler,
+                                       ScanCallbackInfo *pInfo,
+                                       CrstHolderWithState *pCrstHolder);
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * ADDITIONAL TABLE STRUCTURES
+ *
+ ****************************************************************************/
+
+/*
+ * AsyncScanInfo
+ *
+ * Tracks the state of an async scan for a handle table.
+ *
+ */
+struct AsyncScanInfo
+{
+    /*
+     * Underlying Callback Info
+     *
+     * Specifies callback info for the underlying block handler.
+     */
+    struct ScanCallbackInfo *pCallbackInfo;
+
+    /*
+     * Underlying Segment Iterator
+     *
+     * Specifies the segment iterator to be used during async scanning.
+     */
+    SEGMENTITERATOR   pfnSegmentIterator;
+
+    /*
+     * Underlying Block Handler
+     *
+     * Specifies the block handler to be used during async scanning.
+     */
+    BLOCKSCANPROC     pfnBlockHandler;
+
+    /*
+     * Scan Queue
+     *
+     * Specifies the nodes to be processed asynchronously.
+     */
+    struct ScanQNode *pScanQueue;
+
+    /*
+     * Queue Tail
+     *
+     * Specifies the tail node in the queue, or NULL if the queue is empty.
+     */
+    struct ScanQNode *pQueueTail;
+};
+
+
+/*
+ * Handle Table
+ *
+ * Defines the layout of a handle table object.
+ */
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4200 )  // zero-sized array
+#endif
+struct HandleTable
+{
+    /*
+     * flags describing handle attributes
+     *
+     * N.B. this is at offset 0 due to frequent access by cache free codepath
+     */
+    uint32_t rgTypeFlags[HANDLE_MAX_INTERNAL_TYPES];
+
+    /*
+     * lock for this table
+     */
+    CrstStatic Lock;
+
+    /*
+     * number of types this table supports
+     */
+    uint32_t uTypeCount;
+
+    /*
+     * number of handles owned by this table that are marked as "used"
+     * (this includes the handles residing in rgMainCache and rgQuickCache)
+     */
+    uint32_t dwCount;
+
+    /*
+     * head of segment list for this table
+     */
+    PTR_TableSegment pSegmentList;
+
+    /*
+     * information on current async scan (if any)
+     */
+    AsyncScanInfo *pAsyncScanInfo;
+
+    /*
+     * per-table user info
+     */
+    uint32_t uTableIndex;
+
+    /*
+     * per-table AppDomain info
+     */
+    ADIndex uADIndex;
+
+    /*
+     * one-level per-type 'quick' handle cache
+     */
+    OBJECTHANDLE rgQuickCache[HANDLE_MAX_INTERNAL_TYPES];   // interlocked ops used here
+
+    /*
+     * debug-only statistics
+     */
+#ifdef _DEBUG
+    int     _DEBUG_iMaxGen;
+    int64_t _DEBUG_TotalBlocksScanned            [MAXSTATGEN];
+    int64_t _DEBUG_TotalBlocksScannedNonTrivially[MAXSTATGEN];
+    int64_t _DEBUG_TotalHandleSlotsScanned       [MAXSTATGEN];
+    int64_t _DEBUG_TotalHandlesActuallyScanned   [MAXSTATGEN];
+#endif
+
+    /*
+     * primary per-type handle cache
+     */
+    HandleTypeCache rgMainCache[0];                         // interlocked ops used here
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * HELPERS
+ *
+ ****************************************************************************/
+
+/*
+ * A 32/64 comparison callback
+ *<TODO>
+ * @TODO: move/merge into common util file
+ *</TODO>
+ */
+typedef int (*PFNCOMPARE)(uintptr_t p, uintptr_t q);
+
+
+/*
+ * A 32/64 neutral quicksort
+ *<TODO>
+ * @TODO: move/merge into common util file
+ *</TODO>
+ */
+void QuickSort(uintptr_t *pData, int left, int right, PFNCOMPARE pfnCompare);
+
+
+/*
+ * CompareHandlesByFreeOrder
+ *
+ * Returns:
+ *  <0 - handle P should be freed before handle Q
+ *  =0 - handles are eqivalent for free order purposes
+ *  >0 - handle Q should be freed before handle P
+ *
+ */
+int CompareHandlesByFreeOrder(uintptr_t p, uintptr_t q);
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * CORE TABLE MANAGEMENT
+ *
+ ****************************************************************************/
+
+/*
+ * TypeHasUserData
+ *
+ * Determines whether a given handle type has user data.
+ *
+ */
+__inline BOOL TypeHasUserData(HandleTable *pTable, uint32_t uType)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // sanity
+    _ASSERTE(uType < HANDLE_MAX_INTERNAL_TYPES);
+
+    // consult the type flags
+    return (pTable->rgTypeFlags[uType] & HNDF_EXTRAINFO);
+}
+
+
+/*
+ * TableCanFreeSegmentNow
+ *
+ * Determines if it is OK to free the specified segment at this time.
+ *
+ */
+BOOL TableCanFreeSegmentNow(HandleTable *pTable, TableSegment *pSegment);
+
+
+/*
+ * BlockIsLocked
+ *
+ * Determines if the lock count for the specified block is currently non-zero.
+ *
+ */
+__inline BOOL BlockIsLocked(TableSegment *pSegment, uint32_t uBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // sanity
+    _ASSERTE(uBlock < HANDLE_BLOCKS_PER_SEGMENT);
+
+    // fetch the lock count and compare it to zero
+    return (pSegment->rgLocks[uBlock] != 0);
+}
+
+
+/*
+ * BlockLock
+ *
+ * Increases the lock count for a block.
+ *
+ */
+__inline void BlockLock(TableSegment *pSegment, uint32_t uBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // fetch the old lock count
+    uint8_t bLocks = pSegment->rgLocks[uBlock];
+
+    // assert if we are about to trash the count
+    _ASSERTE(bLocks < 0xFF);
+
+    // store the incremented lock count
+    pSegment->rgLocks[uBlock] = bLocks + 1;
+}
+
+
+/*
+ * BlockUnlock
+ *
+ * Decreases the lock count for a block.
+ *
+ */
+__inline void BlockUnlock(TableSegment *pSegment, uint32_t uBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // fetch the old lock count
+    uint8_t bLocks = pSegment->rgLocks[uBlock];
+
+    // assert if we are about to trash the count
+    _ASSERTE(bLocks > 0);
+
+    // store the decremented lock count
+    pSegment->rgLocks[uBlock] = bLocks - 1;
+}
+
+
+/*
+ * BlockFetchUserDataPointer
+ *
+ * Gets the user data pointer for the first handle in a block.
+ *
+ */
+PTR_uintptr_t BlockFetchUserDataPointer(PTR__TableSegmentHeader pSegment, uint32_t uBlock, BOOL fAssertOnError);
+
+
+/*
+ * HandleValidateAndFetchUserDataPointer
+ *
+ * Gets the user data pointer for a handle.
+ * ASSERTs and returns NULL if handle is not of the expected type.
+ *
+ */
+uintptr_t *HandleValidateAndFetchUserDataPointer(OBJECTHANDLE handle, uint32_t uTypeExpected);
+
+
+/*
+ * HandleQuickFetchUserDataPointer
+ *
+ * Gets the user data pointer for a handle.
+ * Less validation is performed.
+ *
+ */
+PTR_uintptr_t HandleQuickFetchUserDataPointer(OBJECTHANDLE handle);
+
+
+/*
+ * HandleQuickSetUserData
+ *
+ * Stores user data with a handle.
+ * Less validation is performed.
+ *
+ */
+void HandleQuickSetUserData(OBJECTHANDLE handle, uintptr_t lUserData);
+
+
+/*
+ * HandleFetchType
+ *
+ * Computes the type index for a given handle.
+ *
+ */
+uint32_t HandleFetchType(OBJECTHANDLE handle);
+
+
+/*
+ * HandleFetchHandleTable
+ *
+ * Returns the containing handle table of a given handle.
+ *
+ */
+PTR_HandleTable HandleFetchHandleTable(OBJECTHANDLE handle);
+
+
+/*
+ * SegmentAlloc
+ *
+ * Allocates a new segment.
+ *
+ */
+TableSegment *SegmentAlloc(HandleTable *pTable);
+
+
+/*
+ * SegmentFree
+ *
+ * Frees the specified segment.
+ *
+ */
+void SegmentFree(TableSegment *pSegment);
+
+/*
+ * TableHandleAsyncPinHandles
+ *
+ * Mark ready for all non-pending OverlappedData that get moved to default domain.
+ *
+ */
+BOOL TableHandleAsyncPinHandles(HandleTable *pTable);
+
+/*
+ * TableRelocateAsyncPinHandles
+ *
+ * Replaces async pin handles with ones in default domain.
+ *
+ */
+void TableRelocateAsyncPinHandles(HandleTable *pTable, HandleTable *pTargetTable);
+
+/*
+ * Check if a handle is part of a HandleTable
+ */
+BOOL TableContainHandle(HandleTable *pTable, OBJECTHANDLE handle);
+
+/*
+ * SegmentRemoveFreeBlocks
+ *
+ * Removes a block from a block list in a segment.  The block is returned to
+ * the segment's free list.
+ *
+ */
+void SegmentRemoveFreeBlocks(TableSegment *pSegment, uint32_t uType);
+
+
+/*
+ * SegmentResortChains
+ *
+ * Sorts the block chains for optimal scanning order.
+ * Sorts the free list to combat fragmentation.
+ *
+ */
+void SegmentResortChains(TableSegment *pSegment);
+
+
+/*
+ * DoesSegmentNeedsToTrimExcessPages
+ *
+ * Checks to see if any pages can be decommitted from the segment.
+ *
+ */
+BOOL DoesSegmentNeedsToTrimExcessPages(TableSegment *pSegment);
+
+/*
+ * SegmentTrimExcessPages
+ *
+ * Checks to see if any pages can be decommitted from the segment.
+ * In case there any unused pages it goes and decommits them.
+ *
+ */
+void SegmentTrimExcessPages(TableSegment *pSegment);
+
+
+/*
+ * TableAllocBulkHandles
+ *
+ * Attempts to allocate the requested number of handes of the specified type.
+ *
+ * Returns the number of handles that were actually allocated.  This is always
+ * the same as the number of handles requested except in out-of-memory conditions,
+ * in which case it is the number of handles that were successfully allocated.
+ *
+ */
+uint32_t TableAllocBulkHandles(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount);
+
+
+/*
+ * TableFreeBulkPreparedHandles
+ *
+ * Frees an array of handles of the specified type.
+ *
+ * This routine is optimized for a sorted array of handles but will accept any order.
+ *
+ */
+void TableFreeBulkPreparedHandles(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount);
+
+
+/*
+ * TableFreeBulkUnpreparedHandles
+ *
+ * Frees an array of handles of the specified type by preparing them and calling TableFreeBulkPreparedHandles.
+ *
+ */
+void TableFreeBulkUnpreparedHandles(HandleTable *pTable, uint32_t uType, const OBJECTHANDLE *pHandles, uint32_t uCount);
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * HANDLE CACHE
+ *
+ ****************************************************************************/
+
+/*
+ * TableAllocSingleHandleFromCache
+ *
+ * Gets a single handle of the specified type from the handle table by
+ * trying to fetch it from the reserve cache for that handle type.  If the
+ * reserve cache is empty, this routine calls TableCacheMissOnAlloc.
+ *
+ */
+OBJECTHANDLE TableAllocSingleHandleFromCache(HandleTable *pTable, uint32_t uType);
+
+
+/*
+ * TableFreeSingleHandleToCache
+ *
+ * Returns a single handle of the specified type to the handle table
+ * by trying to store it in the free cache for that handle type.  If the
+ * free cache is full, this routine calls TableCacheMissOnFree.
+ *
+ */
+void TableFreeSingleHandleToCache(HandleTable *pTable, uint32_t uType, OBJECTHANDLE handle);
+
+
+/*
+ * TableAllocHandlesFromCache
+ *
+ * Allocates multiple handles of the specified type by repeatedly
+ * calling TableAllocSingleHandleFromCache.
+ *
+ */
+uint32_t TableAllocHandlesFromCache(HandleTable *pTable, uint32_t uType, OBJECTHANDLE *pHandleBase, uint32_t uCount);
+
+
+/*
+ * TableFreeHandlesToCache
+ *
+ * Frees multiple handles of the specified type by repeatedly
+ * calling TableFreeSingleHandleToCache.
+ *
+ */
+void TableFreeHandlesToCache(HandleTable *pTable, uint32_t uType, const OBJECTHANDLE *pHandleBase, uint32_t uCount);
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * TABLE SCANNING
+ *
+ ****************************************************************************/
+
+/*
+ * TableScanHandles
+ *
+ * Implements the core handle scanning loop for a table.
+ *
+ */
+void CALLBACK TableScanHandles(PTR_HandleTable pTable,
+                               const uint32_t *puType,
+                               uint32_t uTypeCount,
+                               SEGMENTITERATOR pfnSegmentIterator,
+                               BLOCKSCANPROC pfnBlockHandler,
+                               ScanCallbackInfo *pInfo,
+                               CrstHolderWithState *pCrstHolder);
+
+
+/*
+ * xxxTableScanHandlesAsync
+ *
+ * Implements asynchronous handle scanning for a table.
+ *
+ */
+void CALLBACK xxxTableScanHandlesAsync(PTR_HandleTable pTable,
+                                       const uint32_t *puType,
+                                       uint32_t uTypeCount,
+                                       SEGMENTITERATOR pfnSegmentIterator,
+                                       BLOCKSCANPROC pfnBlockHandler,
+                                       ScanCallbackInfo *pInfo,
+                                       CrstHolderWithState *pCrstHolder);
+
+
+/*
+ * TypesRequireUserDataScanning
+ *
+ * Determines whether the set of types listed should get user data during scans
+ *
+ * if ALL types passed have user data then this function will enable user data support
+ * otherwise it will disable user data support
+ *
+ * IN OTHER WORDS, SCANNING WITH A MIX OF USER-DATA AND NON-USER-DATA TYPES IS NOT SUPPORTED
+ *
+ */
+BOOL TypesRequireUserDataScanning(HandleTable *pTable, const uint32_t *types, uint32_t typeCount);
+
+
+/*
+ * BuildAgeMask
+ *
+ * Builds an age mask to be used when examining/updating the write barrier.
+ *
+ */
+uint32_t BuildAgeMask(uint32_t uGen, uint32_t uMaxGen);
+
+
+/*
+ * QuickSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ */
+PTR_TableSegment CALLBACK QuickSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *pCrstHolder = 0);
+
+
+/*
+ * StandardSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ * This iterator performs some maintenance on the segments,
+ * primarily making sure the block chains are sorted so that
+ * g0 scans are more likely to operate on contiguous blocks.
+ *
+ */
+PTR_TableSegment CALLBACK StandardSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *pCrstHolder = 0);
+
+
+/*
+ * FullSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ * This iterator performs full maintenance on the segments,
+ * including freeing those it notices are empty along the way.
+ *
+ */
+PTR_TableSegment CALLBACK FullSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *pCrstHolder = 0);
+
+
+/*
+ * BlockScanBlocksWithoutUserData
+ *
+ * Calls the specified callback for each handle, optionally aging the corresponding generation clumps.
+ * NEVER propagates per-handle user data to the callback.
+ *
+ */
+void CALLBACK BlockScanBlocksWithoutUserData(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockScanBlocksWithUserData
+ *
+ * Calls the specified callback for each handle, optionally aging the corresponding generation clumps.
+ * ALWAYS propagates per-handle user data to the callback.
+ *
+ */
+void CALLBACK BlockScanBlocksWithUserData(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockScanBlocksEphemeral
+ *
+ * Calls the specified callback for each handle from the specified generation.
+ * Propagates per-handle user data to the callback if present.
+ *
+ */
+void CALLBACK BlockScanBlocksEphemeral(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockAgeBlocks
+ *
+ * Ages all clumps in a range of consecutive blocks.
+ *
+ */
+void CALLBACK BlockAgeBlocks(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockAgeBlocksEphemeral
+ *
+ * Ages all clumps within the specified generation.
+ *
+ */
+void CALLBACK BlockAgeBlocksEphemeral(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockResetAgeMapForBlocks
+ *
+ * Clears the age maps for a range of blocks.
+ *
+ */
+void CALLBACK BlockResetAgeMapForBlocks(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * BlockVerifyAgeMapForBlocks
+ *
+ * Verifies the age maps for a range of blocks, and also validates the objects pointed to.
+ *
+ */
+void CALLBACK BlockVerifyAgeMapForBlocks(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo);
+
+
+/*
+ * xxxAsyncSegmentIterator
+ *
+ * Implements the core handle scanning loop for a table.
+ *
+ */
+PTR_TableSegment CALLBACK xxxAsyncSegmentIterator(PTR_HandleTable pTable, TableSegment *pPrevSegment, CrstHolderWithState *pCrstHolder);
+
+/*--------------------------------------------------------------------------*/
diff --git a/src/gc/handletablescan.cpp b/src/gc/handletablescan.cpp
new file mode 100644
index 0000000000..863b5a52b0
--- /dev/null
+++ b/src/gc/handletablescan.cpp
@@ -0,0 +1,1861 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Generational GC handle manager.  Table Scanning Routines.
+ *
+ * Implements support for scanning handles in the table.
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+
+#include "objecthandle.h"
+#include "handletablepriv.h"
+
+#ifndef FEATURE_REDHAWK
+#include "nativeoverlapped.h"
+#endif // FEATURE_REDHAWK
+
+
+/****************************************************************************
+ *
+ * DEFINITIONS FOR WRITE-BARRIER HANDLING
+ *
+ ****************************************************************************/
+ /*
+How the macros work:
+Handle table's generation (TableSegmentHeader::rgGeneration) is actually a byte array, each byte is generation of a clump. 
+However it's often used as a uint32_t array for perf reasons, 1 uint32_t contains 4 bytes for ages of 4 clumps. Operations on such 
+a uint32_t include:
+
+1. COMPUTE_CLUMP_MASK. For some GC operations, we only want to scan handles in certain generation. To do that, we calculate 
+a Mask uint32_t from the original generation uint32_t:
+    MaskDWORD = COMPUTE_CLUMP_MASK (GenerationDWORD, BuildAgeMask(generationToScan, MaxGen))
+so that if a byte in GenerationDWORD is smaller than or equals to generationToScan, the corresponding byte in MaskDWORD is non-zero, 
+otherwise it is zero. However, if a byte in GenerationDWORD is between [2, 3E] and generationToScan is 2, the corresponding byte in 
+MaskDWORD is also non-zero.
+
+2. AgeEphemeral. When Ephemeral GC happens, ages for handles which belong to the GC condemned generation should be 
+incremented by 1. The operation is done by calculating a new uint32_t using the old uint32_t value:
+    NewGenerationDWORD = COMPUTE_AGED_CLUMPS(OldGenerationDWORD, BuildAgeMask(condemnedGeneration, MaxGen))
+so that if a byte in OldGenerationDWORD is smaller than or equals to condemnedGeneration. the coresponding byte in 
+NewGenerationDWORD is 1 bigger than the old value, otherwise it remains unchanged.
+
+3. Age. Similar as AgeEphemeral, but we use a special mask if condemned generation is max gen (2):
+    NewGenerationDWORD = COMPUTE_AGED_CLUMPS(OldGenerationDWORD, GEN_FULLGC)
+under this operation, if a byte in OldGenerationDWORD is bigger than or equals to max gen(2) but smaller than 3F, the corresponding byte in 
+NewGenerationDWORD will be incremented by 1. Basically, a handle clump's age could be in [0, 3E]. But from GC's point of view, [2,3E] 
+are all considered as gen 2.
+
+If you change any of those algorithm, please verify it by this program:
+
+        void Verify ()
+        {
+            //the initial value of each byte is 0xff, which means there's no handle in the clump
+            VerifyMaskCalc (0xff, 0xff, 0xff, 0xff, 0);
+            VerifyMaskCalc (0xff, 0xff, 0xff, 0xff, 1);
+            VerifyMaskCalc (0xff, 0xff, 0xff, 0xff, 2);
+
+            VerifyAgeEphemeralCalc (0xff, 0xff, 0xff, 0xff, 0);
+            VerifyAgeEphemeralCalc (0xff, 0xff, 0xff, 0xff, 1);
+            VerifyAgeCalc (0xff, 0xff, 0xff, 0xff);
+		       
+            //each byte could independently change from 0 to 0x3e
+            for (byte b0 = 0; b0 <= 0x3f; b0++)
+            {
+                for (byte b1 = 0; b1 <= 0x3f; b1++)
+                {
+                    for (byte b2 = 0; b2 <= 0x3f; b2++)
+                    {
+                        for (byte b3 = 0; b3 <= 0x3f; b3++)
+                        {
+                            //verify we calculate mask correctly
+                            VerifyMaskCalc (b0, b1, b2, b3, 0);					
+                            VerifyMaskCalc (b0, b1, b2, b3, 1);
+                            VerifyMaskCalc (b0, b1, b2, b3, 2);
+
+                            //verify BlockAgeBlocksEphemeral would work correctly
+                            VerifyAgeEphemeralCalc (b0, b1, b2, b3, 0);
+                            VerifyAgeEphemeralCalc (b0, b1, b2, b3, 1);
+
+                            //verify BlockAgeBlock would work correctly
+                            VerifyAgeCalc (b0, b1, b2, b3);
+                        }
+                    }
+                }
+            }
+        }
+
+        void VerifyMaskCalc (byte b0, byte b1, byte b2, byte b3, uint gennum)
+        {
+            uint genDword = (uint)(b0 | b1 << 8 | b2 << 16 | b3 << 24);
+							
+            uint maskedByGen0 = COMPUTE_CLUMP_MASK(genDword, BuildAgeMask (gennum, 2));
+            byte b0_ = (byte)(maskedByGen0 & 0xff);
+            byte b1_ = (byte)((maskedByGen0 & 0xff00) >> 8);
+            byte b2_ = (byte)((maskedByGen0 & 0xff0000) >> 16);
+            byte b3_ = (byte)((maskedByGen0 & 0xff000000)>> 24);
+							
+            AssertGenMask (b0, b0_, gennum);
+            AssertGenMask (b1, b1_, gennum);
+            AssertGenMask (b2, b2_, gennum);
+            AssertGenMask (b3, b3_, gennum);
+        }
+
+        void AssertGenMask (byte gen, byte mask, uint gennum)
+        {
+            //3f or ff is not a valid generation
+            if (gen == 0x3f || gen == 0xff)
+            {
+                    assert (mask == 0);
+                    return;
+            }
+            //any generaion bigger than 2 is actually 2
+            if (gen > 2)
+                gen = 2;
+
+            if (gen <= gennum)
+                assert (mask != 0);
+            else
+                assert (mask == 0);
+        }
+
+        void VerifyAgeEphemeralCalc (byte b0, byte b1, byte b2, byte b3, uint gennum)
+        {
+            uint genDword = (uint)(b0 | b1 << 8 | b2 << 16 | b3 << 24);
+							
+            uint agedClump = COMPUTE_AGED_CLUMPS(genDword, BuildAgeMask (gennum, 2));
+            byte b0_ = (byte)(agedClump & 0xff);
+            byte b1_ = (byte)((agedClump & 0xff00) >> 8);
+            byte b2_ = (byte)((agedClump & 0xff0000) >> 16);
+            byte b3_ = (byte)((agedClump & 0xff000000) >> 24);
+							
+            AssertAgedClump (b0, b0_, gennum);
+            AssertAgedClump (b1, b1_, gennum);
+            AssertAgedClump (b2, b2_, gennum);
+            AssertAgedClump (b3, b3_, gennum);
+        }
+
+        void AssertAgedClump (byte gen, byte agedGen, uint gennum)
+        {
+            //generation will stop growing at 0x3e
+            if (gen >= 0x3e)
+            {
+                assert (agedGen == gen);
+                return;
+            }
+			
+            if (gen <= gennum || (gen > 2 && gennum >= 2))
+                assert (agedGen == gen + 1);
+            else
+                assert (agedGen == gen);
+        }
+
+        void VerifyAgeCalc (byte b0, byte b1, byte b2, byte b3)
+        {
+            uint genDword = (uint)(b0 | b1 << 8 | b2 << 16 | b3 << 24);
+							
+            uint agedClump = COMPUTE_AGED_CLUMPS(genDword, GEN_FULLGC);
+            byte b0_ = (byte)(agedClump & 0xff);
+            byte b1_ = (byte)((agedClump & 0xff00) >> 8);
+            byte b2_ = (byte)((agedClump & 0xff0000) >> 16);
+            byte b3_ = (byte)((agedClump & 0xff000000) >> 24);
+							
+            AssertAgedClump (b0, b0_, 2);
+            AssertAgedClump (b1, b1_, 2);
+            AssertAgedClump (b2, b2_, 2);
+            AssertAgedClump (b3, b3_, 2);
+        }    
+ */
+
+#define GEN_MAX_AGE                         (0x3F)
+#define GEN_CLAMP                           (0x3F3F3F3F)
+#define GEN_AGE_LIMIT                       (0x3E3E3E3E)
+#define GEN_INVALID                         (0xC0C0C0C0)
+#define GEN_FILL                            (0x80808080)
+#define GEN_MASK                            (0x40404040)
+#define GEN_INC_SHIFT                       (6)
+
+#define PREFOLD_FILL_INTO_AGEMASK(msk)      (1 + (msk) + (~GEN_FILL))
+
+#define GEN_FULLGC                          PREFOLD_FILL_INTO_AGEMASK(GEN_AGE_LIMIT)
+
+#define MAKE_CLUMP_MASK_ADDENDS(bytes)      (bytes >> GEN_INC_SHIFT)
+#define APPLY_CLUMP_ADDENDS(gen, addend)    (gen + addend)
+
+#define COMPUTE_CLUMP_MASK(gen, msk)        (((gen & GEN_CLAMP) - msk) & GEN_MASK)
+#define COMPUTE_CLUMP_ADDENDS(gen, msk)     MAKE_CLUMP_MASK_ADDENDS(COMPUTE_CLUMP_MASK(gen, msk))
+#define COMPUTE_AGED_CLUMPS(gen, msk)       APPLY_CLUMP_ADDENDS(gen, COMPUTE_CLUMP_ADDENDS(gen, msk))
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * SUPPORT STRUCTURES FOR ASYNCHRONOUS SCANNING
+ *
+ ****************************************************************************/
+
+/*
+ * ScanRange
+ *
+ * Specifies a range of blocks for scanning.
+ *
+ */
+struct ScanRange
+{
+    /*
+     * Start Index
+     *
+     * Specifies the first block in the range.
+     */
+    uint32_t uIndex;
+
+    /*
+     * Count
+     *
+     * Specifies the number of blocks in the range.
+     */
+    uint32_t uCount;
+};
+
+
+/*
+ * ScanQNode
+ *
+ * Specifies a set of block ranges in a scan queue.
+ *
+ */
+struct ScanQNode
+{
+    /*
+     * Next Node
+     *
+     * Specifies the next node in a scan list.
+     */
+    struct ScanQNode *pNext;
+
+    /*
+     * Entry Count
+     *
+     * Specifies how many entries in this block are valid.
+     */
+    uint32_t          uEntries;
+
+    /*
+     * Range Entries
+     *
+     * Each entry specifies a range of blocks to process.
+     */
+    ScanRange         rgRange[HANDLE_BLOCKS_PER_SEGMENT / 4];
+};
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * MISCELLANEOUS HELPER ROUTINES AND DEFINES
+ *
+ ****************************************************************************/
+
+/*
+ * INCLUSION_MAP_SIZE
+ *
+ * Number of elements in a type inclusion map.
+ *
+ */
+#define INCLUSION_MAP_SIZE (HANDLE_MAX_INTERNAL_TYPES + 1)
+
+
+/*
+ * BuildInclusionMap
+ *
+ * Creates an inclusion map for the specified type array.
+ *
+ */
+void BuildInclusionMap(BOOL *rgTypeInclusion, const uint32_t *puType, uint32_t uTypeCount)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // by default, no types are scanned
+    ZeroMemory(rgTypeInclusion, INCLUSION_MAP_SIZE * sizeof(BOOL));
+
+    // add the specified types to the inclusion map
+    for (uint32_t u = 0; u < uTypeCount; u++)
+    {
+        // fetch a type we are supposed to scan
+        uint32_t uType = puType[u];
+
+        // hope we aren't about to trash the stack :)
+        _ASSERTE(uType < HANDLE_MAX_INTERNAL_TYPES);
+
+        // add this type to the inclusion map
+        rgTypeInclusion[uType + 1] = TRUE;
+    }
+}
+
+
+/*
+ * IsBlockIncluded
+ *
+ * Checks a type inclusion map for the inclusion of a particular block.
+ *
+ */
+__inline BOOL IsBlockIncluded(TableSegment *pSegment, uint32_t uBlock, const BOOL *rgTypeInclusion)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // fetch the adjusted type for this block
+    uint32_t uType = (uint32_t)(((int)(signed char)pSegment->rgBlockType[uBlock]) + 1);
+
+    // hope the adjusted type was valid
+    _ASSERTE(uType <= HANDLE_MAX_INTERNAL_TYPES);
+
+    // return the inclusion value for the block's type
+    return rgTypeInclusion[uType];
+}
+
+
+/*
+ * TypesRequireUserDataScanning
+ *
+ * Determines whether the set of types listed should get user data during scans
+ *
+ * if ALL types passed have user data then this function will enable user data support
+ * otherwise it will disable user data support
+ *
+ * IN OTHER WORDS, SCANNING WITH A MIX OF USER-DATA AND NON-USER-DATA TYPES IS NOT SUPPORTED
+ *
+ */
+BOOL TypesRequireUserDataScanning(HandleTable *pTable, const uint32_t *types, uint32_t typeCount)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // count up the number of types passed that have user data associated
+    uint32_t userDataCount = 0;
+    for (uint32_t u = 0; u < typeCount; u++)
+    {
+        if (TypeHasUserData(pTable, types[u]))
+            userDataCount++;
+    }
+
+    // if all have user data then we can enum user data
+    if (userDataCount == typeCount)
+        return TRUE;
+
+    // WARNING: user data is all or nothing in scanning!!!
+    // since we have some types which don't support user data, we can't use the user data scanning code
+    // this means all callbacks will get NULL for user data!!!!!
+    _ASSERTE(userDataCount == 0);
+
+    // no user data
+    return FALSE;
+}
+
+/*
+ * BuildAgeMask
+ *
+ * Builds an age mask to be used when examining/updating the write barrier.
+ *
+ */
+uint32_t BuildAgeMask(uint32_t uGen, uint32_t uMaxGen)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // an age mask is composed of repeated bytes containing the next older generation
+
+    if (uGen == uMaxGen)
+        uGen = GEN_MAX_AGE;
+
+    uGen++;
+
+    // clamp the generation to the maximum age we support in our macros
+    if (uGen > GEN_MAX_AGE)
+        uGen = GEN_MAX_AGE;
+
+    // pack up a word with age bytes and fill bytes pre-folded as well
+    return PREFOLD_FILL_INTO_AGEMASK(uGen | (uGen << 8) | (uGen << 16) | (uGen << 24));
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * SYNCHRONOUS HANDLE AND BLOCK SCANNING ROUTINES
+ *
+ ****************************************************************************/
+
+/*
+ * ARRAYSCANPROC
+ *
+ * Prototype for callbacks that implement handle array scanning logic.
+ *
+ */
+typedef void (CALLBACK *ARRAYSCANPROC)(PTR_UNCHECKED_OBJECTREF pValue, PTR_UNCHECKED_OBJECTREF pLast,
+                                       ScanCallbackInfo *pInfo, uintptr_t *pUserData);
+
+
+/*
+ * ScanConsecutiveHandlesWithoutUserData
+ *
+ * Unconditionally scans a consecutive range of handles.
+ *
+ * USER DATA PASSED TO CALLBACK PROC IS ALWAYS NULL!
+ *
+ */
+void CALLBACK ScanConsecutiveHandlesWithoutUserData(PTR_UNCHECKED_OBJECTREF pValue,
+                                                    PTR_UNCHECKED_OBJECTREF pLast,
+                                                    ScanCallbackInfo *pInfo,
+                                                    uintptr_t *)
+{
+    WRAPPER_NO_CONTRACT;
+
+#ifdef _DEBUG
+    // update our scanning statistics
+    pInfo->DEBUG_HandleSlotsScanned += (int)(pLast - pValue);
+#endif
+
+    // get frequently used params into locals
+    HANDLESCANPROC pfnScan = pInfo->pfnScan;
+    uintptr_t      param1  = pInfo->param1;
+    uintptr_t      param2  = pInfo->param2;
+
+    // scan for non-zero handles
+    do
+    {
+        // call the callback for any we find
+        if (!HndIsNullOrDestroyedHandle(*pValue))
+        {
+#ifdef _DEBUG
+            // update our scanning statistics
+            pInfo->DEBUG_HandlesActuallyScanned++;
+#endif
+
+            // process this handle
+            pfnScan(pValue, NULL, param1, param2);
+        }
+
+        // on to the next handle
+        pValue++;
+
+    } while (pValue < pLast);
+}
+
+
+/*
+ * ScanConsecutiveHandlesWithUserData
+ *
+ * Unconditionally scans a consecutive range of handles.
+ *
+ * USER DATA IS ASSUMED TO BE CONSECUTIVE!
+ *
+ */
+void CALLBACK ScanConsecutiveHandlesWithUserData(PTR_UNCHECKED_OBJECTREF pValue,
+                                                 PTR_UNCHECKED_OBJECTREF pLast,
+                                                 ScanCallbackInfo *pInfo,
+                                                 uintptr_t *pUserData)
+{
+    WRAPPER_NO_CONTRACT;
+
+#ifdef _DEBUG
+    // this function will crash if it is passed bad extra info
+    _ASSERTE(pUserData);
+
+    // update our scanning statistics
+    pInfo->DEBUG_HandleSlotsScanned += (int)(pLast - pValue);
+#endif
+
+    // get frequently used params into locals
+    HANDLESCANPROC pfnScan = pInfo->pfnScan;
+    uintptr_t      param1  = pInfo->param1;
+    uintptr_t      param2  = pInfo->param2;
+
+    // scan for non-zero handles
+    do
+    {
+        // call the callback for any we find
+        if (!HndIsNullOrDestroyedHandle(*pValue))
+        {
+#ifdef _DEBUG
+            // update our scanning statistics
+            pInfo->DEBUG_HandlesActuallyScanned++;
+#endif
+
+            // process this handle
+            pfnScan(pValue, pUserData, param1, param2);
+        }
+
+        // on to the next handle
+        pValue++;
+        pUserData++;
+
+    } while (pValue < pLast);
+}
+
+/*
+ * BlockAgeBlocks
+ *
+ * Ages all clumps in a range of consecutive blocks.
+ *
+ */
+void CALLBACK BlockAgeBlocks(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(pInfo);
+
+#ifdef DACCESS_COMPILE
+    UNREFERENCED_PARAMETER(pSegment);
+    UNREFERENCED_PARAMETER(uBlock);
+    UNREFERENCED_PARAMETER(uCount);
+#else
+    // set up to update the specified blocks
+    uint32_t *pdwGen     = (uint32_t *)pSegment->rgGeneration + uBlock;
+    uint32_t *pdwGenLast =             pdwGen                 + uCount;
+
+    // loop over all the blocks, aging their clumps as we go
+    do
+    {
+        // compute and store the new ages in parallel
+        *pdwGen = COMPUTE_AGED_CLUMPS(*pdwGen, GEN_FULLGC);
+
+    } while (++pdwGen < pdwGenLast);
+#endif
+}
+
+/*
+ * BlockScanBlocksWithoutUserData
+ *
+ * Calls the specified callback once for each handle in a range of blocks,
+ * optionally aging the corresponding generation clumps.
+ *
+ */
+void CALLBACK BlockScanBlocksWithoutUserData(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+    
+#ifndef DACCESS_COMPILE
+    // get the first and limit handles for these blocks
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+    _UNCHECKED_OBJECTREF *pLast  = pValue            + (uCount * HANDLE_HANDLES_PER_BLOCK);
+#else
+    PTR_UNCHECKED_OBJECTREF pValue = dac_cast<PTR_UNCHECKED_OBJECTREF>(PTR_HOST_MEMBER_TADDR(TableSegment, pSegment, rgValue)) 
+                                                     + (uBlock * HANDLE_HANDLES_PER_BLOCK);
+    PTR_UNCHECKED_OBJECTREF pLast  = pValue          + (uCount * HANDLE_HANDLES_PER_BLOCK);
+#endif
+    
+    // scan the specified handles
+    ScanConsecutiveHandlesWithoutUserData(pValue, pLast, pInfo, NULL);
+    
+    // optionally update the clump generations for these blocks too
+    if (pInfo->uFlags & HNDGCF_AGE)
+        BlockAgeBlocks(pSegment, uBlock, uCount, pInfo);
+
+#ifdef _DEBUG
+    // update our scanning statistics
+    pInfo->DEBUG_BlocksScannedNonTrivially += uCount;
+    pInfo->DEBUG_BlocksScanned += uCount;
+#endif
+}
+
+
+/*
+ * BlockScanBlocksWithUserData
+ *
+ * Calls the specified callback once for each handle in a range of blocks,
+ * optionally aging the corresponding generation clumps.
+ *
+ */
+void CALLBACK BlockScanBlocksWithUserData(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // iterate individual blocks scanning with user data
+    for (uint32_t u = 0; u < uCount; u++)
+    {
+        // compute the current block
+        uint32_t uCur = (u + uBlock);
+
+        // fetch the user data for this block
+        uintptr_t *pUserData = BlockFetchUserDataPointer(PTR__TableSegmentHeader(pSegment), uCur, TRUE);
+
+#ifndef DACCESS_COMPILE
+        // get the first and limit handles for these blocks
+        _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uCur * HANDLE_HANDLES_PER_BLOCK);
+        _UNCHECKED_OBJECTREF *pLast  = pValue            + HANDLE_HANDLES_PER_BLOCK;
+#else
+        PTR_UNCHECKED_OBJECTREF pValue = dac_cast<PTR_UNCHECKED_OBJECTREF>(PTR_HOST_MEMBER_TADDR(TableSegment, pSegment, rgValue)) 
+                                                         + (uCur * HANDLE_HANDLES_PER_BLOCK);
+        PTR_UNCHECKED_OBJECTREF pLast  = pValue          + HANDLE_HANDLES_PER_BLOCK;
+#endif
+        
+        // scan the handles in this block
+        ScanConsecutiveHandlesWithUserData(pValue, pLast, pInfo, pUserData);
+    }
+
+    // optionally update the clump generations for these blocks too
+    if (pInfo->uFlags & HNDGCF_AGE)
+        BlockAgeBlocks(pSegment, uBlock, uCount, pInfo);
+
+#ifdef _DEBUG
+    // update our scanning statistics
+    pInfo->DEBUG_BlocksScannedNonTrivially += uCount;
+    pInfo->DEBUG_BlocksScanned += uCount;
+#endif
+}
+
+
+/*
+ * BlockScanBlocksEphemeralWorker
+ *
+ * Calls the specified callback once for each handle in any clump
+ * identified by the clump mask in the specified block.
+ *
+ */
+void BlockScanBlocksEphemeralWorker(uint32_t *pdwGen, uint32_t dwClumpMask, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    //
+    // OPTIMIZATION: Since we expect to call this worker fairly rarely compared to
+    //  the number of times we pass through the outer loop, this function intentionally
+    //  does not take pSegment as a param.
+    //
+    //  We do this so that the compiler won't try to keep pSegment in a register during
+    //  the outer loop, leaving more registers for the common codepath.
+    //
+    //  You might wonder why this is an issue considering how few locals we have in
+    //  BlockScanBlocksEphemeral.  For some reason the x86 compiler doesn't like to use
+    //  all the registers during that loop, so a little coaxing was necessary to get
+    //  the right output.
+    //
+
+    // fetch the table segment we are working in
+    PTR_TableSegment pSegment = pInfo->pCurrentSegment;
+
+    // if we should age the clumps then do so now (before we trash dwClumpMask)
+    if (pInfo->uFlags & HNDGCF_AGE)
+        *pdwGen = APPLY_CLUMP_ADDENDS(*pdwGen, MAKE_CLUMP_MASK_ADDENDS(dwClumpMask));
+
+    // compute the index of the first clump in the block
+    uint32_t uClump = (uint32_t)((uint8_t *)pdwGen - pSegment->rgGeneration);
+
+#ifndef DACCESS_COMPILE
+    // compute the first handle in the first clump of this block
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uClump * HANDLE_HANDLES_PER_CLUMP);
+#else
+    PTR_UNCHECKED_OBJECTREF pValue = dac_cast<PTR_UNCHECKED_OBJECTREF>(PTR_HOST_MEMBER_TADDR(TableSegment, pSegment, rgValue)) 
+                                                     + (uClump * HANDLE_HANDLES_PER_CLUMP);
+#endif
+
+    // some scans require us to report per-handle extra info - assume this one doesn't
+    ARRAYSCANPROC pfnScanHandles = ScanConsecutiveHandlesWithoutUserData;
+    uintptr_t       *pUserData = NULL;
+
+    // do we need to pass user data to the callback?
+    if (pInfo->fEnumUserData)
+    {
+        // scan with user data enabled
+        pfnScanHandles = ScanConsecutiveHandlesWithUserData;
+
+        // get the first user data slot for this block
+        pUserData = BlockFetchUserDataPointer(PTR__TableSegmentHeader(pSegment), (uClump / HANDLE_CLUMPS_PER_BLOCK), TRUE);
+    }
+
+    // loop over the clumps, scanning those that are identified by the mask
+    do
+    {
+        // compute the last handle in this clump
+        PTR_UNCHECKED_OBJECTREF pLast = pValue + HANDLE_HANDLES_PER_CLUMP;
+
+        // if this clump should be scanned then scan it
+        if (dwClumpMask & GEN_CLUMP_0_MASK)
+            pfnScanHandles(pValue, pLast, pInfo, pUserData);
+
+        // skip to the next clump
+        dwClumpMask = NEXT_CLUMP_IN_MASK(dwClumpMask);
+        pValue = pLast;
+        pUserData += HANDLE_HANDLES_PER_CLUMP;
+
+    } while (dwClumpMask);
+
+#ifdef _DEBUG
+    // update our scanning statistics
+    pInfo->DEBUG_BlocksScannedNonTrivially++;
+#endif
+}
+
+
+/*
+ * BlockScanBlocksEphemeral
+ *
+ * Calls the specified callback once for each handle from the specified
+ * generation in a block.
+ *
+ */
+void CALLBACK BlockScanBlocksEphemeral(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // get frequently used params into locals
+    uint32_t dwAgeMask = pInfo->dwAgeMask;
+
+    // set up to update the specified blocks
+    uint32_t *pdwGen     = (uint32_t *)pSegment->rgGeneration + uBlock;
+    uint32_t *pdwGenLast =             pdwGen                 + uCount;
+
+    // loop over all the blocks, checking for elligible clumps as we go
+    do
+    {
+        // determine if any clumps in this block are elligible
+        uint32_t dwClumpMask = COMPUTE_CLUMP_MASK(*pdwGen, dwAgeMask);
+
+        // if there are any clumps to scan then scan them now
+        if (dwClumpMask)
+        {
+            // ok we need to scan some parts of this block
+            //
+            // OPTIMIZATION: Since we expect to call the worker fairly rarely compared
+            //  to the number of times we pass through the loop, the function below
+            //  intentionally does not take pSegment as a param.
+            //
+            //  We do this so that the compiler won't try to keep pSegment in a register
+            //  during our loop, leaving more registers for the common codepath.
+            //
+            //  You might wonder why this is an issue considering how few locals we have
+            //  here.  For some reason the x86 compiler doesn't like to use all the
+            //  registers available during this loop and instead was hitting the stack
+            //  repeatedly, so a little coaxing was necessary to get the right output.
+            //
+            BlockScanBlocksEphemeralWorker(pdwGen, dwClumpMask, pInfo);
+        }
+
+        // on to the next block's generation info
+        pdwGen++;
+
+    } while (pdwGen < pdwGenLast);
+
+#ifdef _DEBUG
+    // update our scanning statistics
+    pInfo->DEBUG_BlocksScanned += uCount;
+#endif
+}
+
+#ifndef DACCESS_COMPILE
+/*
+ * BlockAgeBlocksEphemeral
+ *
+ * Ages all clumps within the specified generation.
+ *
+ */
+void CALLBACK BlockAgeBlocksEphemeral(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // get frequently used params into locals
+    uint32_t dwAgeMask = pInfo->dwAgeMask;
+
+    // set up to update the specified blocks
+    uint32_t *pdwGen     = (uint32_t *)pSegment->rgGeneration + uBlock;
+    uint32_t *pdwGenLast =             pdwGen                 + uCount;
+
+    // loop over all the blocks, aging their clumps as we go
+    do
+    {
+        // compute and store the new ages in parallel
+        *pdwGen = COMPUTE_AGED_CLUMPS(*pdwGen, dwAgeMask);
+
+    } while (++pdwGen < pdwGenLast);
+}
+
+/*
+ * BlockResetAgeMapForBlocksWorker
+ *
+ * Figures out the minimum age of the objects referred to by the handles in any clump
+ * identified by the clump mask in the specified block.
+ *
+ */
+void BlockResetAgeMapForBlocksWorker(uint32_t *pdwGen, uint32_t dwClumpMask, ScanCallbackInfo *pInfo)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+
+    // fetch the table segment we are working in
+    TableSegment *pSegment = pInfo->pCurrentSegment;
+
+    // compute the index of the first clump in the block
+    uint32_t uClump = (uint32_t)((uint8_t *)pdwGen - pSegment->rgGeneration);
+
+    // compute the first handle in the first clump of this block
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uClump * HANDLE_HANDLES_PER_CLUMP);
+
+    // loop over the clumps, scanning those that are identified by the mask
+    do
+    {        
+                // compute the last handle in this clump
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_CLUMP;
+
+        // if this clump should be scanned then scan it
+        if (dwClumpMask & GEN_CLUMP_0_MASK)
+        {
+            // for each clump, determine the minimum age of the objects pointed at.
+            int minAge = GEN_MAX_AGE;
+            for ( ; pValue < pLast; pValue++)
+            {
+                if (!HndIsNullOrDestroyedHandle(*pValue))
+                {
+                    int thisAge = GCHeap::GetGCHeap()->WhichGeneration(*pValue);
+                    if (minAge > thisAge)
+                        minAge = thisAge;
+
+#ifndef FEATURE_REDHAWK
+                    if ((*pValue)->GetGCSafeMethodTable() == g_pOverlappedDataClass)
+                    {
+                        // reporting the pinned user objects
+                        OverlappedDataObject *pOverlapped = (OverlappedDataObject *)(*pValue);
+                        if (pOverlapped->m_userObject != NULL)
+                        {
+                            Object * pUserObject = OBJECTREFToObject(pOverlapped->m_userObject);
+                            thisAge = GCHeap::GetGCHeap()->WhichGeneration(pUserObject);
+                            if (minAge > thisAge)
+                                minAge = thisAge;
+                            if (pOverlapped->m_isArray)
+                            {
+                                ArrayBase* pUserArrayObject = (ArrayBase*)pUserObject;
+                                Object **pObj = (Object**)pUserArrayObject->GetDataPtr(TRUE);
+                                size_t num = pUserArrayObject->GetNumComponents();
+                                for (size_t i = 0; i < num; i ++)
+                                {
+                                     thisAge = GCHeap::GetGCHeap()->WhichGeneration(pObj[i]);
+                                     if (minAge > thisAge)
+                                         minAge = thisAge;
+                                 }                                    
+                            }                            
+                        }
+                    }
+#endif // !FEATURE_REDHAWK                    
+                }
+            }
+            _ASSERTE(FitsInU1(minAge));
+            ((uint8_t *)pSegment->rgGeneration)[uClump] = static_cast<uint8_t>(minAge);
+        }
+        // skip to the next clump
+        dwClumpMask = NEXT_CLUMP_IN_MASK(dwClumpMask);
+        pValue = pLast;
+        uClump++;
+    } while (dwClumpMask);
+}
+
+
+/*
+ * BlockResetAgeMapForBlocks
+ *
+ * Sets the age maps for a range of blocks. Called in the case of demotion. Even in this case
+ * though, most handles refer to objects that don't get demoted and that need to be aged therefore.
+ *
+ */
+void CALLBACK BlockResetAgeMapForBlocks(TableSegment *pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+#if 0
+    // zero the age map for the specified range of blocks
+    ZeroMemory((uint32_t *)pSegment->rgGeneration + uBlock, uCount * sizeof(uint32_t));
+#else
+    // Actually, we need to be more sophisticated than the above code - there are scenarios
+    // where there is demotion in almost every gc cycle, so none of handles get
+    // aged appropriately.
+
+    // get frequently used params into locals
+    uint32_t dwAgeMask = pInfo->dwAgeMask;
+
+    // set up to update the specified blocks
+    uint32_t *pdwGen     = (uint32_t *)pSegment->rgGeneration + uBlock;
+    uint32_t *pdwGenLast =             pdwGen                 + uCount;
+
+    // loop over all the blocks, checking for eligible clumps as we go
+    do
+    {
+        // determine if any clumps in this block are eligible
+        uint32_t dwClumpMask = COMPUTE_CLUMP_MASK(*pdwGen, dwAgeMask);
+
+        // if there are any clumps to scan then scan them now
+        if (dwClumpMask)
+        {
+            // ok we need to scan some parts of this block
+            // This code is a variation of the code in BlockScanBlocksEphemeral,
+            // so the OPTIMIZATION comment there applies here as well
+            BlockResetAgeMapForBlocksWorker(pdwGen, dwClumpMask, pInfo);
+        }
+
+        // on to the next block's generation info
+        pdwGen++;
+
+    } while (pdwGen < pdwGenLast);
+#endif
+}
+
+static void VerifyObject(_UNCHECKED_OBJECTREF from, _UNCHECKED_OBJECTREF obj)
+{
+#ifdef FEATURE_REDHAWK
+    UNREFERENCED_PARAMETER(from);
+    MethodTable* pMT = (MethodTable*)(obj->GetGCSafeMethodTable());
+    pMT->SanityCheck();
+#else
+    obj->ValidateHeap(from);
+#endif // FEATURE_REDHAWK
+}
+
+static void VerifyObjectAndAge(_UNCHECKED_OBJECTREF *pValue, _UNCHECKED_OBJECTREF from, _UNCHECKED_OBJECTREF obj, uint8_t minAge)
+{
+    UNREFERENCED_PARAMETER(pValue);
+    VerifyObject(from, obj);
+
+    int thisAge = GCHeap::GetGCHeap()->WhichGeneration(obj);
+
+    //debugging code
+    //if (minAge > thisAge && thisAge < GCHeap::GetGCHeap()->GetMaxGeneration())
+    //{
+    //    if ((*pValue) == obj)
+    //        printf("Handle (age %u) %p -> %p (age %u)", minAge, pValue, obj, thisAge);
+    //    else
+    //        printf("Handle (age %u) %p -> %p -> %p (age %u)", minAge, pValue, from, obj, thisAge);
+
+    //    // for test programs - if the object is a string, print it
+    //    if (obj->GetGCSafeMethodTable() == g_pStringClass)
+    //    {
+    //        printf("'%ls'\n", ((StringObject *)obj)->GetBuffer());
+    //    }
+    //    else
+    //    {
+    //        printf("\n");
+    //    }
+    //}
+
+    if (minAge >= GEN_MAX_AGE || (minAge > thisAge && thisAge < static_cast<int>(GCHeap::GetGCHeap()->GetMaxGeneration())))
+    {
+        _ASSERTE(!"Fatal Error in HandleTable.");
+        EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
+    }
+}
+
+/*
+ * BlockVerifyAgeMapForBlocksWorker
+ *
+ * Verifies out the minimum age of the objects referred to by the handles in any clump
+ * identified by the clump mask in the specified block.
+ * Also validates the objects themselves.
+ *
+ */
+void BlockVerifyAgeMapForBlocksWorker(uint32_t *pdwGen, uint32_t dwClumpMask, ScanCallbackInfo *pInfo, uint32_t uType)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch the table segment we are working in
+    TableSegment *pSegment = pInfo->pCurrentSegment;
+
+    // compute the index of the first clump in the block
+    uint32_t uClump = (uint32_t)((uint8_t *)pdwGen - pSegment->rgGeneration);
+
+    // compute the first handle in the first clump of this block
+    _UNCHECKED_OBJECTREF *pValue = pSegment->rgValue + (uClump * HANDLE_HANDLES_PER_CLUMP);
+
+    // loop over the clumps, scanning those that are identified by the mask
+    do
+    {        
+        // compute the last handle in this clump
+        _UNCHECKED_OBJECTREF *pLast = pValue + HANDLE_HANDLES_PER_CLUMP;
+
+        // if this clump should be scanned then scan it
+        if (dwClumpMask & GEN_CLUMP_0_MASK)
+        {
+            // for each clump, check whether any object is younger than the age indicated by the clump
+            uint8_t minAge = ((uint8_t *)pSegment->rgGeneration)[uClump];
+            for ( ; pValue < pLast; pValue++)
+            {
+                if (!HndIsNullOrDestroyedHandle(*pValue))
+                {
+                    VerifyObjectAndAge(pValue, (*pValue), (*pValue), minAge);
+#ifndef FEATURE_REDHAWK
+                    if ((*pValue)->GetGCSafeMethodTable() == g_pOverlappedDataClass)
+                    {
+                        // reporting the pinned user objects
+                        OverlappedDataObject *pOverlapped = (OverlappedDataObject *)(*pValue);
+                        if (pOverlapped->m_userObject != NULL)
+                        {
+                            Object * pUserObject = OBJECTREFToObject(pOverlapped->m_userObject);
+                            VerifyObjectAndAge(pValue, (*pValue), pUserObject, minAge);
+                            if (pOverlapped->m_isArray)
+                            {
+                                ArrayBase* pUserArrayObject = (ArrayBase*)pUserObject;
+                                Object **pObj = (Object**)pUserArrayObject->GetDataPtr(TRUE);
+                                size_t num = pUserArrayObject->GetNumComponents();
+                                for (size_t i = 0; i < num; i ++)
+                                {
+                                     VerifyObjectAndAge(pValue, pUserObject, pObj[i], minAge);
+                                }                                    
+                            }                            
+                        }
+                    }
+#endif // !FEATURE_REDHAWK
+
+                    if (uType == HNDTYPE_DEPENDENT)
+                    {
+                        PTR_uintptr_t pUserData = HandleQuickFetchUserDataPointer((OBJECTHANDLE)pValue);
+
+                        // if we did then copy the value
+                        if (pUserData)
+                        {
+                            _UNCHECKED_OBJECTREF pSecondary = (_UNCHECKED_OBJECTREF)(*pUserData);
+                            if (pSecondary)
+                            {
+                                VerifyObject(pSecondary, pSecondary);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+//        else
+//            printf("skipping clump with age %x\n", ((uint8_t *)pSegment->rgGeneration)[uClump]);
+
+        // skip to the next clump
+        dwClumpMask = NEXT_CLUMP_IN_MASK(dwClumpMask);
+        pValue = pLast;
+        uClump++;
+    } while (dwClumpMask);
+}
+
+/*
+ * BlockVerifyAgeMapForBlocks
+ *
+ * Sets the age maps for a range of blocks. Called in the case of demotion. Even in this case
+ * though, most handles refer to objects that don't get demoted and that need to be aged therefore.
+ *
+ */
+void CALLBACK BlockVerifyAgeMapForBlocks(TableSegment *pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    for (uint32_t u = 0; u < uCount; u++)
+    {
+        uint32_t uCur = (u + uBlock);
+
+        uint32_t *pdwGen = (uint32_t *)pSegment->rgGeneration + uCur;
+
+        uint32_t uType = pSegment->rgBlockType[uCur];
+
+        BlockVerifyAgeMapForBlocksWorker(pdwGen, 0xFFFFFFFF, pInfo, uType);
+    }
+}
+
+/*
+ * BlockLockBlocks
+ *
+ * Locks all blocks in the specified range.
+ *
+ */
+void CALLBACK BlockLockBlocks(TableSegment *pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // loop over the blocks in the specified range and lock them
+    for (uCount += uBlock; uBlock < uCount; uBlock++)
+        BlockLock(pSegment, uBlock);
+}
+
+
+/*
+ * BlockUnlockBlocks
+ *
+ * Unlocks all blocks in the specified range.
+ *
+ */
+void CALLBACK BlockUnlockBlocks(TableSegment *pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // loop over the blocks in the specified range and unlock them
+    for (uCount += uBlock; uBlock < uCount; uBlock++)
+        BlockUnlock(pSegment, uBlock);
+}
+#endif // !DACCESS_COMPILE
+    
+/*
+ * BlockQueueBlocksForAsyncScan
+ *
+ * Queues the specified blocks to be scanned asynchronously.
+ *
+ */
+void CALLBACK BlockQueueBlocksForAsyncScan(PTR_TableSegment pSegment, uint32_t uBlock, uint32_t uCount, ScanCallbackInfo *)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        WRAPPER(GC_TRIGGERS);
+    }
+    CONTRACTL_END;
+
+    // fetch our async scan information
+    AsyncScanInfo *pAsyncInfo = pSegment->pHandleTable->pAsyncScanInfo;
+
+    // sanity
+    _ASSERTE(pAsyncInfo);
+
+    // fetch the current queue tail
+    ScanQNode *pQNode = pAsyncInfo->pQueueTail;
+
+    // did we get a tail?
+    if (pQNode)
+    {
+        // we got an existing tail - is the tail node full already?
+        if (pQNode->uEntries >= _countof(pQNode->rgRange))
+        {
+            // the node is full - is there another node in the queue?
+            if (!pQNode->pNext)
+            {
+                // no more nodes - allocate a new one
+                ScanQNode *pQNodeT = new (nothrow) ScanQNode(); 
+
+                // did it succeed?
+                if (!pQNodeT)
+                {
+                    //
+                    // We couldn't allocate another queue node.
+                    //
+                    // THIS IS NOT FATAL IF ASYNCHRONOUS SCANNING IS BEING USED PROPERLY
+                    //
+                    // The reason we can survive this is that asynchronous scans are not
+                    // guaranteed to enumerate all handles anyway.  Since the table can
+                    // change while the lock is released, the caller may assume only that
+                    // asynchronous scanning will enumerate a reasonably high percentage
+                    // of the handles requested, most of the time.
+                    //
+                    // The typical use of an async scan is to process as many handles as
+                    // possible asynchronously, so as to reduce the amount of time spent
+                    // in the inevitable synchronous scan that follows.
+                    //
+                    // As a practical example, the Concurrent Mark phase of garbage
+                    // collection marks as many objects as possible asynchronously, and
+                    // subsequently performs a normal, synchronous mark to catch the
+                    // stragglers.  Since most of the reachable objects in the heap are
+                    // already marked at this point, the synchronous scan ends up doing
+                    // very little work.
+                    //
+                    // So the moral of the story is that yes, we happily drop some of
+                    // your blocks on the floor in this out of memory case, and that's
+                    // BY DESIGN.
+                    //
+                    LOG((LF_GC, LL_WARNING, "WARNING: Out of memory queueing for async scan.  Some blocks skipped.\n"));
+                    return;
+                }
+
+                memset (pQNodeT, 0, sizeof(ScanQNode));
+
+                // link the new node into the queue
+                pQNode->pNext = pQNodeT;
+            }
+
+            // either way, use the next node in the queue
+            pQNode = pQNode->pNext;
+        }
+    }
+    else
+    {
+        // no tail - this is a brand new queue; start the tail at the head node
+        pQNode = pAsyncInfo->pScanQueue;
+    }
+
+    // we will be using the last slot after the existing entries
+    uint32_t uSlot = pQNode->uEntries;
+
+    // fetch the slot where we will be storing the new block range
+    ScanRange *pNewRange = pQNode->rgRange + uSlot;
+
+    // update the entry count in the node
+    pQNode->uEntries = uSlot + 1;
+
+    // fill in the new slot with the block range info
+    pNewRange->uIndex = uBlock;
+    pNewRange->uCount = uCount;
+
+    // remember the last block we stored into as the new queue tail
+    pAsyncInfo->pQueueTail = pQNode;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * ASYNCHRONOUS SCANNING WORKERS AND CALLBACKS
+ *
+ ****************************************************************************/
+
+/*
+ * QNODESCANPROC
+ *
+ * Prototype for callbacks that implement per ScanQNode scanning logic.
+ *
+ */
+typedef void (CALLBACK *QNODESCANPROC)(AsyncScanInfo *pAsyncInfo, ScanQNode *pQNode, uintptr_t lParam);
+
+
+/*
+ * ProcessScanQueue
+ *
+ * Calls the specified handler once for each node in a scan queue.
+ *
+ */
+void ProcessScanQueue(AsyncScanInfo *pAsyncInfo, QNODESCANPROC pfnNodeHandler, uintptr_t lParam, BOOL fCountEmptyQNodes)
+{
+    WRAPPER_NO_CONTRACT;
+
+	if (pAsyncInfo->pQueueTail == NULL && fCountEmptyQNodes == FALSE)
+		return;
+		
+    // if any entries were added to the block list after our initial node, clean them up now
+    ScanQNode *pQNode = pAsyncInfo->pScanQueue;
+    while (pQNode)
+    {
+        // remember the next node
+        ScanQNode *pNext = pQNode->pNext;
+
+        // call the handler for the current node and then advance to the next
+        pfnNodeHandler(pAsyncInfo, pQNode, lParam);
+        pQNode = pNext;
+    }
+}
+
+
+/*
+ * ProcessScanQNode
+ *
+ * Calls the specified block handler once for each range of blocks in a ScanQNode.
+ *
+ */
+void CALLBACK ProcessScanQNode(AsyncScanInfo *pAsyncInfo, ScanQNode *pQNode, uintptr_t lParam)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // get the block handler from our lParam
+    BLOCKSCANPROC     pfnBlockHandler = (BLOCKSCANPROC)lParam;
+
+    // fetch the params we will be passing to the handler
+    ScanCallbackInfo *pCallbackInfo = pAsyncInfo->pCallbackInfo;
+    PTR_TableSegment  pSegment = pCallbackInfo->pCurrentSegment;
+
+    // set up to iterate the ranges in the queue node
+    ScanRange *pRange     = pQNode->rgRange;
+    ScanRange *pRangeLast = pRange          + pQNode->uEntries;
+
+    // loop over all the ranges, calling the block handler for each one
+    while (pRange < pRangeLast) {
+        // call the block handler with the current block range
+        pfnBlockHandler(pSegment, pRange->uIndex, pRange->uCount, pCallbackInfo);
+
+        // advance to the next range
+        pRange++;
+
+    }
+}
+
+#ifndef DACCESS_COMPILE
+/*
+ * UnlockAndForgetQueuedBlocks
+ *
+ * Unlocks all blocks referenced in the specified node and marks the node as empty.
+ *
+ */
+void CALLBACK UnlockAndForgetQueuedBlocks(AsyncScanInfo *pAsyncInfo, ScanQNode *pQNode, uintptr_t)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // unlock the blocks named in this node
+    ProcessScanQNode(pAsyncInfo, pQNode, (uintptr_t)BlockUnlockBlocks);
+
+    // reset the node so it looks empty
+    pQNode->uEntries = 0;
+}
+#endif
+
+/*
+ * FreeScanQNode
+ *
+ * Frees the specified ScanQNode
+ *
+ */
+void CALLBACK FreeScanQNode(AsyncScanInfo *, ScanQNode *pQNode, uintptr_t)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // free the node's memory
+    delete  pQNode;
+}
+
+
+/*
+ * xxxTableScanQueuedBlocksAsync
+ *
+ * Performs and asynchronous scan of the queued blocks for the specified segment.
+ *
+ * N.B. THIS FUNCTION LEAVES THE TABLE LOCK WHILE SCANNING.
+ *
+ */
+void xxxTableScanQueuedBlocksAsync(PTR_HandleTable pTable, PTR_TableSegment pSegment, CrstHolderWithState *pCrstHolder)
+{
+    WRAPPER_NO_CONTRACT;
+
+    //-------------------------------------------------------------------------------
+    // PRE-SCAN PREPARATION
+
+    // fetch our table's async and sync scanning info
+    AsyncScanInfo    *pAsyncInfo    = pTable->pAsyncScanInfo;
+    ScanCallbackInfo *pCallbackInfo = pAsyncInfo->pCallbackInfo;
+
+    // make a note that we are now processing this segment
+    pCallbackInfo->pCurrentSegment = pSegment;
+
+#ifndef DACCESS_COMPILE
+    // loop through and lock down all the blocks referenced by the queue
+    ProcessScanQueue(pAsyncInfo, ProcessScanQNode, (uintptr_t)BlockLockBlocks, FALSE);
+#endif
+
+    //-------------------------------------------------------------------------------
+    // ASYNCHRONOUS SCANNING OF QUEUED BLOCKS
+    //
+
+    // leave the table lock
+    _ASSERTE(pCrstHolder->GetValue()==(&pTable->Lock));
+    pCrstHolder->Release();
+
+    // sanity - this isn't a very asynchronous scan if we don't actually leave
+    _ASSERTE(!pTable->Lock.OwnedByCurrentThread());
+
+    // perform the actual scanning of the specified blocks
+    ProcessScanQueue(pAsyncInfo, ProcessScanQNode, (uintptr_t)pAsyncInfo->pfnBlockHandler, FALSE);
+
+    // re-enter the table lock
+    pCrstHolder->Acquire();
+
+
+    //-------------------------------------------------------------------------------
+    // POST-SCAN CLEANUP
+    //
+
+#ifndef DACCESS_COMPILE
+    // loop through, unlock all the blocks we had locked, and reset the queue nodes
+    ProcessScanQueue(pAsyncInfo, UnlockAndForgetQueuedBlocks, NULL, FALSE);
+#endif
+
+    // we are done processing this segment
+    pCallbackInfo->pCurrentSegment = NULL;
+
+    // reset the "queue tail" pointer to indicate an empty queue
+    pAsyncInfo->pQueueTail = NULL;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * SEGMENT ITERATORS
+ *
+ ****************************************************************************/
+
+/*
+ * QuickSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ */
+PTR_TableSegment CALLBACK QuickSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    PTR_TableSegment pNextSegment;
+
+    // do we have a previous segment?
+    if (!pPrevSegment)
+    {
+        // nope - start with the first segment in our list
+        pNextSegment = pTable->pSegmentList;
+    }
+    else
+    {
+        // yup, fetch the next segment in the list
+        pNextSegment = pPrevSegment->pNextSegment;
+    }
+
+    // return the segment pointer
+    return pNextSegment;
+}
+
+
+/*
+ * StandardSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ * This iterator performs some maintenance on the segments,
+ * primarily making sure the block chains are sorted so that
+ * g0 scans are more likely to operate on contiguous blocks.
+ *
+ */
+PTR_TableSegment CALLBACK StandardSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *)
+{
+    CONTRACTL
+    {
+        WRAPPER(THROWS);
+        WRAPPER(GC_TRIGGERS);
+        FORBID_FAULT;
+        SUPPORTS_DAC;
+    }
+    CONTRACTL_END;
+
+    // get the next segment using the quick iterator
+    PTR_TableSegment pNextSegment = QuickSegmentIterator(pTable, pPrevSegment);
+
+#ifndef DACCESS_COMPILE
+    // re-sort the block chains if neccessary
+    if (pNextSegment && pNextSegment->fResortChains)
+        SegmentResortChains(pNextSegment);
+#endif
+
+    // return the segment we found
+    return pNextSegment;
+}
+
+
+/*
+ * FullSegmentIterator
+ *
+ * Returns the next segment to be scanned in a scanning loop.
+ *
+ * This iterator performs full maintenance on the segments,
+ * including freeing those it notices are empty along the way.
+ *
+ */
+PTR_TableSegment CALLBACK FullSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *)
+{
+    CONTRACTL
+    {
+        WRAPPER(THROWS);
+        WRAPPER(GC_TRIGGERS);
+        FORBID_FAULT;
+        SUPPORTS_DAC;
+    }
+    CONTRACTL_END;
+
+    // we will be resetting the next segment's sequence number
+    uint32_t uSequence = 0;
+
+    // if we have a previous segment then compute the next sequence number from it
+    if (pPrevSegment)
+        uSequence = (uint32_t)pPrevSegment->bSequence + 1;
+
+    // loop until we find an appropriate segment to return
+    PTR_TableSegment pNextSegment;
+    for (;;)
+    {
+        // first, call the standard iterator to get the next segment
+        pNextSegment = StandardSegmentIterator(pTable, pPrevSegment);
+
+        // if there are no more segments then we're done
+        if (!pNextSegment)
+            break;
+
+#ifndef DACCESS_COMPILE
+        // check if we should decommit any excess pages in this segment
+        if (DoesSegmentNeedsToTrimExcessPages(pNextSegment))
+        {
+            CrstHolder ch(&pTable->Lock);
+            SegmentTrimExcessPages(pNextSegment);
+        }
+#endif
+
+        // if the segment has handles in it then it will survive and be returned
+        if (pNextSegment->bEmptyLine > 0)
+        {
+            // update this segment's sequence number
+            pNextSegment->bSequence = (uint8_t)(uSequence % 0x100);
+
+            // break out and return the segment
+            break;
+        }
+
+#ifndef DACCESS_COMPILE
+        CrstHolder ch(&pTable->Lock);
+        // this segment is completely empty - can we free it now?
+        if (pNextSegment->bEmptyLine == 0 && TableCanFreeSegmentNow(pTable, pNextSegment))
+        {
+            // yup, we probably want to free this one
+            PTR_TableSegment pNextNext = pNextSegment->pNextSegment;
+
+            // was this the first segment in the list?
+            if (!pPrevSegment)
+            {
+                // yes - are there more segments?
+                if (pNextNext)
+                {
+                    // yes - unlink the head
+                    pTable->pSegmentList = pNextNext;
+                }
+                else
+                {
+                    // no - leave this one in the list and enumerate it
+                    break;
+                }
+            }
+            else
+            {
+                // no - unlink this segment from the segment list
+                pPrevSegment->pNextSegment = pNextNext;
+            }
+
+            // free this segment
+            SegmentFree(pNextSegment);
+        }
+#else
+        // The code above has a side effect we need to preserve:
+        // while neither pNextSegment nor pPrevSegment are modified, their fields
+        // are, which affects the handle table walk. Since TableCanFreeSegmentNow
+        // actually only checks to see if something is asynchronously scanning this
+        // segment (and returns FALSE if something is), we'll assume it always
+        // returns TRUE.  (Since we can't free memory in the Dac, it doesn't matter
+        // if there's another async scan going on.)
+        pPrevSegment = pNextSegment;
+#endif
+    }
+
+    // return the segment we found
+    return pNextSegment;
+}
+
+/*
+ * xxxAsyncSegmentIterator
+ *
+ * Implements the core handle scanning loop for a table.
+ *
+ * This iterator wraps another iterator, checking for queued blocks from the
+ * previous segment before advancing to the next.  If there are queued blocks,
+ * the function processes them by calling xxxTableScanQueuedBlocksAsync.
+ *
+ * N.B. THIS FUNCTION LEAVES THE TABLE LOCK WHILE SCANNING.
+ *
+ */
+PTR_TableSegment CALLBACK xxxAsyncSegmentIterator(PTR_HandleTable pTable, PTR_TableSegment pPrevSegment, CrstHolderWithState *pCrstHolder)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // fetch our table's async scanning info
+    AsyncScanInfo *pAsyncInfo = pTable->pAsyncScanInfo;
+
+    // sanity
+    _ASSERTE(pAsyncInfo);
+
+    // if we have queued some blocks from the previous segment then scan them now
+    if (pAsyncInfo->pQueueTail)
+        xxxTableScanQueuedBlocksAsync(pTable, pPrevSegment, pCrstHolder);
+
+    // fetch the underlying iterator from our async info
+    SEGMENTITERATOR pfnCoreIterator = pAsyncInfo->pfnSegmentIterator;
+
+    // call the underlying iterator to get the next segment
+    return pfnCoreIterator(pTable, pPrevSegment, pCrstHolder);
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+
+/****************************************************************************
+ *
+ * CORE SCANNING LOGIC
+ *
+ ****************************************************************************/
+
+/*
+ * SegmentScanByTypeChain
+ *
+ * Implements the single-type block scanning loop for a single segment.
+ *
+ */
+void SegmentScanByTypeChain(PTR_TableSegment pSegment, uint32_t uType, BLOCKSCANPROC pfnBlockHandler, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // hope we are enumerating a valid type chain :)
+    _ASSERTE(uType < HANDLE_MAX_INTERNAL_TYPES);
+
+    // fetch the tail
+    uint32_t uBlock = pSegment->rgTail[uType];
+    
+    // if we didn't find a terminator then there's blocks to enumerate
+    if (uBlock != BLOCK_INVALID)
+    {
+        // start walking from the head
+        uBlock = pSegment->rgAllocation[uBlock];
+
+        // scan until we loop back to the first block
+        uint32_t uHead = uBlock;
+        do
+        {
+            // search forward trying to batch up sequential runs of blocks
+            uint32_t uLast, uNext = uBlock;
+            do
+            {
+                // compute the next sequential block for comparison
+                uLast = uNext + 1;
+
+                // fetch the next block in the allocation chain
+                uNext = pSegment->rgAllocation[uNext];
+
+            } while ((uNext == uLast) && (uNext != uHead));
+
+            // call the calback for this group of blocks
+            pfnBlockHandler(pSegment, uBlock, (uLast - uBlock), pInfo);
+
+            // advance to the next block
+            uBlock = uNext;
+
+        } while (uBlock != uHead);
+    }
+}
+
+
+/*
+ * SegmentScanByTypeMap
+ *
+ * Implements the multi-type block scanning loop for a single segment.
+ *
+ */
+void SegmentScanByTypeMap(PTR_TableSegment pSegment, const BOOL *rgTypeInclusion,
+                          BLOCKSCANPROC pfnBlockHandler, ScanCallbackInfo *pInfo)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // start scanning with the first block in the segment
+    uint32_t uBlock = 0;
+
+    // we don't need to scan the whole segment, just up to the empty line
+    uint32_t uLimit = pSegment->bEmptyLine;
+
+    // loop across the segment looking for blocks to scan
+    for (;;)
+    {
+        // find the first block included by the type map
+        for (;;)
+        {
+            // if we are out of range looking for a start point then we're done
+            if (uBlock >= uLimit)
+                return;
+
+            // if the type is one we are scanning then we found a start point
+            if (IsBlockIncluded(pSegment, uBlock, rgTypeInclusion))
+                break;
+
+            // keep searching with the next block
+            uBlock++;
+        }
+
+        // remember this block as the first that needs scanning
+        uint32_t uFirst = uBlock;
+
+        // find the next block not included in the type map
+        for (;;)
+        {
+            // advance the block index
+            uBlock++;
+
+            // if we are beyond the limit then we are done
+            if (uBlock >= uLimit)
+                break;
+
+            // if the type is not one we are scanning then we found an end point
+            if (!IsBlockIncluded(pSegment, uBlock, rgTypeInclusion))
+                break;
+        }
+
+        // call the calback for the group of blocks we found
+        pfnBlockHandler(pSegment, uFirst, (uBlock - uFirst), pInfo);
+
+        // look for another range starting with the next block
+        uBlock++;
+    }
+}
+
+
+/*
+ * TableScanHandles
+ *
+ * Implements the core handle scanning loop for a table.
+ *
+ */
+void CALLBACK TableScanHandles(PTR_HandleTable pTable,
+                               const uint32_t *puType,
+                               uint32_t uTypeCount,
+                               SEGMENTITERATOR pfnSegmentIterator,
+                               BLOCKSCANPROC pfnBlockHandler,
+                               ScanCallbackInfo *pInfo,
+                               CrstHolderWithState *pCrstHolder)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // sanity - caller must ALWAYS provide a valid ScanCallbackInfo
+    _ASSERTE(pInfo);
+
+    // we may need a type inclusion map for multi-type scans
+    BOOL rgTypeInclusion[INCLUSION_MAP_SIZE];
+
+    // we only need to scan types if we have a type array and a callback to call
+    if (!pfnBlockHandler || !puType)
+        uTypeCount = 0;
+
+    // if we will be scanning more than one type then initialize the inclusion map
+    if (uTypeCount > 1)
+        BuildInclusionMap(rgTypeInclusion, puType, uTypeCount);
+
+    // now, iterate over the segments, scanning blocks of the specified type(s)
+    PTR_TableSegment pSegment = NULL;
+    while ((pSegment = pfnSegmentIterator(pTable, pSegment, pCrstHolder)) != NULL)
+    {
+        // if there are types to scan then enumerate the blocks in this segment
+        // (we do this test inside the loop since the iterators should still run...)
+        if (uTypeCount >= 1)
+        {
+            // make sure the "current segment" pointer in the scan info is up to date
+            pInfo->pCurrentSegment = pSegment;
+
+            // is this a single type or multi-type enumeration?
+            if (uTypeCount == 1)
+            {
+                // single type enumeration - walk the type's allocation chain
+                SegmentScanByTypeChain(pSegment, *puType, pfnBlockHandler, pInfo);
+            }
+            else
+            {
+                // multi-type enumeration - walk the type map to find eligible blocks
+                SegmentScanByTypeMap(pSegment, rgTypeInclusion, pfnBlockHandler, pInfo);
+            }
+
+            // make sure the "current segment" pointer in the scan info is up to date
+            pInfo->pCurrentSegment = NULL;
+        }
+    }
+}
+
+
+/*
+ * xxxTableScanHandlesAsync
+ *
+ * Implements asynchronous handle scanning for a table.
+ *
+ * N.B. THIS FUNCTION LEAVES THE TABLE LOCK WHILE SCANNING.
+ *
+ */
+void CALLBACK xxxTableScanHandlesAsync(PTR_HandleTable pTable,
+                                       const uint32_t *puType,
+                                       uint32_t uTypeCount,
+                                       SEGMENTITERATOR pfnSegmentIterator,
+                                       BLOCKSCANPROC pfnBlockHandler,
+                                       ScanCallbackInfo *pInfo,
+                                       CrstHolderWithState *pCrstHolder)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // presently only one async scan is allowed at a time
+    if (pTable->pAsyncScanInfo)
+    {
+        // somebody tried to kick off multiple async scans
+        _ASSERTE(FALSE);
+        return;
+    }
+
+
+    //-------------------------------------------------------------------------------
+    // PRE-SCAN PREPARATION
+
+    // we keep an initial scan list node on the stack (for perf)
+    ScanQNode initialNode;
+
+    initialNode.pNext    = NULL;
+    initialNode.uEntries = 0;
+
+    // initialize our async scanning info
+    AsyncScanInfo asyncInfo;
+
+    asyncInfo.pCallbackInfo      = pInfo;
+    asyncInfo.pfnSegmentIterator = pfnSegmentIterator;
+    asyncInfo.pfnBlockHandler    = pfnBlockHandler;
+    asyncInfo.pScanQueue         = &initialNode;
+    asyncInfo.pQueueTail         = NULL;
+
+    // link our async scan info into the table
+    pTable->pAsyncScanInfo = &asyncInfo;
+
+
+    //-------------------------------------------------------------------------------
+    // PER-SEGMENT ASYNCHRONOUS SCANNING OF BLOCKS
+    //
+
+    // call the synchronous scanner with our async callbacks
+    TableScanHandles(pTable,
+                     puType, uTypeCount,
+                     xxxAsyncSegmentIterator,
+                     BlockQueueBlocksForAsyncScan,
+                     pInfo,
+                     pCrstHolder);
+
+
+    //-------------------------------------------------------------------------------
+    // POST-SCAN CLEANUP
+    //
+
+    // if we dynamically allocated more nodes then free them now
+    if (initialNode.pNext)
+    {
+        // adjust the head to point to the first dynamically allocated block
+        asyncInfo.pScanQueue = initialNode.pNext;
+
+        // loop through and free all the queue nodes
+        ProcessScanQueue(&asyncInfo, FreeScanQNode, NULL, TRUE);
+    }
+
+    // unlink our async scanning info from the table
+    pTable->pAsyncScanInfo = NULL;
+}
+
+#ifdef DACCESS_COMPILE
+// TableSegment is variable size, where the data up to "rgValue" is static,
+// then more is committed as TableSegment::bCommitLine * HANDLE_BYTES_PER_BLOCK.
+// See SegmentInitialize in HandleTableCore.cpp.
+uint32_t TableSegment::DacSize(TADDR addr)
+{
+    WRAPPER_NO_CONTRACT;
+    
+    uint8_t commitLine = 0;
+    DacReadAll(addr + offsetof(TableSegment, bCommitLine), &commitLine, sizeof(commitLine), true);
+    
+    return offsetof(TableSegment, rgValue) + (uint32_t)commitLine * HANDLE_BYTES_PER_BLOCK;
+}
+#endif
+/*--------------------------------------------------------------------------*/
+
diff --git a/src/gc/objecthandle.cpp b/src/gc/objecthandle.cpp
new file mode 100644
index 0000000000..74a8a71c5e
--- /dev/null
+++ b/src/gc/objecthandle.cpp
@@ -0,0 +1,1986 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Wraps handle table to implement various handle types (Strong, Weak, etc.)
+ *
+
+ *
+ */
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+
+#include "objecthandle.h"
+#include "handletablepriv.h"
+
+#ifdef FEATURE_COMINTEROP
+#include "comcallablewrapper.h"
+#endif // FEATURE_COMINTEROP
+#ifndef FEATURE_REDHAWK
+#include "nativeoverlapped.h"
+#endif // FEATURE_REDHAWK
+
+GVAL_IMPL(HandleTableMap, g_HandleTableMap);
+
+// Array of contexts used while scanning dependent handles for promotion. There are as many contexts as GC
+// heaps and they're allocated by Ref_Initialize and initialized during each GC by GcDhInitialScan.
+DhContext *g_pDependentHandleContexts;
+
+#ifndef DACCESS_COMPILE
+
+//----------------------------------------------------------------------------
+
+/*
+ * struct VARSCANINFO
+ *
+ * used when tracing variable-strength handles.
+ */
+struct VARSCANINFO
+{
+    uintptr_t      lEnableMask; // mask of types to trace
+    HANDLESCANPROC pfnTrace;    // tracing function to use
+    uintptr_t      lp2;         // second parameter
+};
+
+
+//----------------------------------------------------------------------------
+
+/*
+ * Scan callback for tracing variable-strength handles.
+ *
+ * This callback is called to trace individual objects referred to by handles
+ * in the variable-strength table.
+ */
+void CALLBACK VariableTraceDispatcher(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // lp2 is a pointer to our VARSCANINFO
+    struct VARSCANINFO *pInfo = (struct VARSCANINFO *)lp2;
+
+    // is the handle's dynamic type one we're currently scanning?
+    if ((*pExtraInfo & pInfo->lEnableMask) != 0)
+    {
+        // yes - call the tracing function for this handle
+        pInfo->pfnTrace(pObjRef, NULL, lp1, pInfo->lp2);
+    }
+}
+
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+/*
+ * Scan callback for tracing ref-counted handles.
+ *
+ * This callback is called to trace individual objects referred to by handles
+ * in the refcounted table.
+ */
+void CALLBACK PromoteRefCounted(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    // there are too many races when asychnronously scanning ref-counted handles so we no longer support it
+    _ASSERTE(!((ScanContext*)lp1)->concurrent);
+
+    LOG((LF_GC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("", pObjRef, "causes promotion of ", *pObjRef)));
+
+    Object *pObj = VolatileLoad((PTR_Object*)pObjRef);
+
+#ifdef _DEBUG
+    Object *pOldObj = pObj;
+#endif
+
+    if (!HndIsNullOrDestroyedHandle(pObj) && !GCHeap::GetGCHeap()->IsPromoted(pObj))
+    {
+        if (GCToEEInterface::RefCountedHandleCallbacks(pObj))
+        {
+            _ASSERTE(lp2);
+            promote_func* callback = (promote_func*) lp2;
+            callback(&pObj, (ScanContext *)lp1, 0);
+        }
+    }
+    
+    // Assert this object wasn't relocated since we are passing a temporary object's address.
+    _ASSERTE(pOldObj == pObj);
+}
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+
+void CALLBACK TraceDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+
+    if (pObjRef == NULL || pExtraInfo == NULL)
+        return;
+
+    // At this point, it's possible that either or both of the primary and secondary
+    // objects are NULL.  However, if the secondary object is non-NULL, then the primary
+    // object should also be non-NULL.
+    _ASSERTE(*pExtraInfo == NULL || *pObjRef != NULL);
+
+    // lp2 is a HANDLESCANPROC
+    HANDLESCANPROC pfnTrace = (HANDLESCANPROC) lp2;
+
+    // is the handle's secondary object non-NULL?
+    if ((*pObjRef != NULL) && (*pExtraInfo != 0))
+    {
+        // yes - call the tracing function for this handle
+        pfnTrace(pObjRef, NULL, lp1, *pExtraInfo);
+    }
+}
+
+void CALLBACK UpdateDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    LIMITED_METHOD_CONTRACT;
+    _ASSERTE(pExtraInfo);
+
+    Object **pPrimaryRef = (Object **)pObjRef;
+    Object **pSecondaryRef = (Object **)pExtraInfo;
+  
+    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT("Querying for new location of ", 
+            pPrimaryRef, "to ", *pPrimaryRef)));
+    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT(" and ", 
+            pSecondaryRef, "to ", *pSecondaryRef)));
+
+#ifdef _DEBUG
+    Object *pOldPrimary = *pPrimaryRef;
+    Object *pOldSecondary = *pSecondaryRef;
+#endif
+
+	_ASSERTE(lp2);
+	promote_func* callback = (promote_func*) lp2;
+	callback(pPrimaryRef, (ScanContext *)lp1, 0);
+	callback(pSecondaryRef, (ScanContext *)lp1, 0);
+
+#ifdef _DEBUG
+    if (pOldPrimary != *pPrimaryRef)
+        LOG((LF_GC|LF_ENC, LL_INFO10000,  "Updating " FMT_HANDLE "from" FMT_ADDR "to " FMT_OBJECT "\n", 
+             DBG_ADDR(pPrimaryRef), DBG_ADDR(pOldPrimary), DBG_ADDR(*pPrimaryRef)));
+    else
+        LOG((LF_GC|LF_ENC, LL_INFO10000, "Updating " FMT_HANDLE "- " FMT_OBJECT "did not move\n", 
+             DBG_ADDR(pPrimaryRef), DBG_ADDR(*pPrimaryRef)));
+    if (pOldSecondary != *pSecondaryRef)
+        LOG((LF_GC|LF_ENC, LL_INFO10000,  "Updating " FMT_HANDLE "from" FMT_ADDR "to " FMT_OBJECT "\n", 
+             DBG_ADDR(pSecondaryRef), DBG_ADDR(pOldSecondary), DBG_ADDR(*pSecondaryRef)));
+    else
+        LOG((LF_GC|LF_ENC, LL_INFO10000, "Updating " FMT_HANDLE "- " FMT_OBJECT "did not move\n", 
+             DBG_ADDR(pSecondaryRef), DBG_ADDR(*pSecondaryRef)));
+#endif
+}
+
+void CALLBACK PromoteDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    LIMITED_METHOD_CONTRACT;
+    _ASSERTE(pExtraInfo);
+    
+    Object **pPrimaryRef = (Object **)pObjRef;
+    Object **pSecondaryRef = (Object **)pExtraInfo;
+    LOG((LF_GC|LF_ENC, LL_INFO1000, "Checking promotion of DependentHandle"));
+    LOG((LF_GC|LF_ENC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("\tPrimary:\t", pObjRef, "to ", *pObjRef)));
+    LOG((LF_GC|LF_ENC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("\tSecondary\t", pSecondaryRef, "to ", *pSecondaryRef)));
+
+    ScanContext *sc = (ScanContext*)lp1;
+    DhContext *pDhContext = Ref_GetDependentHandleContext(sc);
+
+    if (*pObjRef && GCHeap::GetGCHeap()->IsPromoted(*pPrimaryRef))
+    {
+        if (!GCHeap::GetGCHeap()->IsPromoted(*pSecondaryRef))
+        {
+            LOG((LF_GC|LF_ENC, LL_INFO10000, "\tPromoting secondary " LOG_OBJECT_CLASS(*pSecondaryRef)));
+            _ASSERTE(lp2);
+            promote_func* callback = (promote_func*) lp2;
+            callback(pSecondaryRef, (ScanContext *)lp1, 0);
+            // need to rescan because we might have promoted an object that itself has added fields and this
+            // promotion might be all that is pinning that object. If we've already scanned that dependent
+            // handle relationship, we could lose it secondary object.
+            pDhContext->m_fPromoted = true;
+        }
+    }
+    else if (*pObjRef)
+    {
+        // If we see a non-cleared primary which hasn't been promoted, record the fact. We will only require a
+        // rescan if this flag has been set (if it's clear then the previous scan found only clear and
+        // promoted handles, so there's no chance of finding an additional handle being promoted on a
+        // subsequent scan).
+        pDhContext->m_fUnpromotedPrimaries = true;
+    }
+}
+    
+void CALLBACK ClearDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t /*lp1*/, uintptr_t /*lp2*/)
+{
+    LIMITED_METHOD_CONTRACT;
+    _ASSERTE(pExtraInfo);
+
+    Object **pPrimaryRef = (Object **)pObjRef;
+    Object **pSecondaryRef = (Object **)pExtraInfo;
+    LOG((LF_GC|LF_ENC, LL_INFO1000, "Checking referent of DependentHandle"));
+    LOG((LF_GC|LF_ENC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("\tPrimary:\t", pPrimaryRef, "to ", *pPrimaryRef)));
+    LOG((LF_GC|LF_ENC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("\tSecondary\t", pSecondaryRef, "to ", *pSecondaryRef)));
+
+    if (!GCHeap::GetGCHeap()->IsPromoted(*pPrimaryRef))
+    {
+        LOG((LF_GC|LF_ENC, LL_INFO1000, "\tunreachable ", LOG_OBJECT_CLASS(*pPrimaryRef)));
+        LOG((LF_GC|LF_ENC, LL_INFO1000, "\tunreachable ", LOG_OBJECT_CLASS(*pSecondaryRef)));
+        *pPrimaryRef = NULL;
+        *pSecondaryRef = NULL;
+    }
+    else
+    {
+        _ASSERTE(GCHeap::GetGCHeap()->IsPromoted(*pSecondaryRef));
+        LOG((LF_GC|LF_ENC, LL_INFO10000, "\tPrimary is reachable " LOG_OBJECT_CLASS(*pPrimaryRef)));
+        LOG((LF_GC|LF_ENC, LL_INFO10000, "\tSecondary is reachable " LOG_OBJECT_CLASS(*pSecondaryRef)));
+    }
+}
+
+/*
+ * Scan callback for pinning handles.
+ *
+ * This callback is called to pin individual objects referred to by handles in
+ * the pinning table.
+ */
+void CALLBACK PinObject(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_SO_TOLERANT;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    // PINNING IS BAD - DON'T DO IT IF YOU CAN AVOID IT
+    LOG((LF_GC, LL_WARNING, LOG_HANDLE_OBJECT_CLASS("WARNING: ", pObjRef, "causes pinning of ", *pObjRef)));
+
+    Object **pRef = (Object **)pObjRef;
+    _ASSERTE(lp2);
+    promote_func* callback = (promote_func*) lp2;
+    callback(pRef, (ScanContext *)lp1, GC_CALL_PINNED);
+
+#ifndef FEATURE_REDHAWK
+    Object * pPinnedObj = *pRef;
+
+    if (!HndIsNullOrDestroyedHandle(pPinnedObj) && pPinnedObj->GetGCSafeMethodTable() == g_pOverlappedDataClass)
+    {
+        // reporting the pinned user objects
+        OverlappedDataObject *pOverlapped = (OverlappedDataObject *)pPinnedObj;
+        if (pOverlapped->m_userObject != NULL)
+        {
+            //callback(OBJECTREF_TO_UNCHECKED_OBJECTREF(pOverlapped->m_userObject), (ScanContext *)lp1, GC_CALL_PINNED);
+            if (pOverlapped->m_isArray)
+            {
+                pOverlapped->m_userObjectInternal = static_cast<void*>(OBJECTREFToObject(pOverlapped->m_userObject));
+                ArrayBase* pUserObject = (ArrayBase*)OBJECTREFToObject(pOverlapped->m_userObject);
+                Object **ppObj = (Object**)pUserObject->GetDataPtr(TRUE);
+                size_t num = pUserObject->GetNumComponents();
+                for (size_t i = 0; i < num; i ++)
+                {
+                    callback(ppObj + i, (ScanContext *)lp1, GC_CALL_PINNED);
+                }
+            }
+            else
+            {
+                callback(&OBJECTREF_TO_UNCHECKED_OBJECTREF(pOverlapped->m_userObject), (ScanContext *)lp1, GC_CALL_PINNED);
+            }
+        }
+
+        if (pOverlapped->GetAppDomainId() !=  DefaultADID && pOverlapped->GetAppDomainIndex().m_dwIndex == DefaultADID)
+        {
+            OverlappedDataObject::MarkCleanupNeededFromGC();
+        }
+    }
+#endif // !FEATURE_REDHAWK
+}
+
+
+/*
+ * Scan callback for tracing strong handles.
+ *
+ * This callback is called to trace individual objects referred to by handles
+ * in the strong table.
+ */
+void CALLBACK PromoteObject(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    LOG((LF_GC, LL_INFO1000, LOG_HANDLE_OBJECT_CLASS("", pObjRef, "causes promotion of ", *pObjRef)));
+
+    Object **ppRef = (Object **)pObjRef;
+    _ASSERTE(lp2);
+    promote_func* callback = (promote_func*) lp2;
+    callback(ppRef, (ScanContext *)lp1, 0);
+}
+
+
+/*
+ * Scan callback for disconnecting dead handles.
+ *
+ * This callback is called to check promotion of individual objects referred to by
+ * handles in the weak tables.
+ */
+void CALLBACK CheckPromoted(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+    UNREFERENCED_PARAMETER(lp1);
+    UNREFERENCED_PARAMETER(lp2);
+
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Checking referent of Weak-", pObjRef, "to ", *pObjRef)));
+
+    Object **ppRef = (Object **)pObjRef;
+    if (!GCHeap::GetGCHeap()->IsPromoted(*ppRef))
+    {
+        LOG((LF_GC, LL_INFO100, LOG_HANDLE_OBJECT_CLASS("Severing Weak-", pObjRef, "to unreachable ", *pObjRef)));
+
+        *ppRef = NULL;
+    }
+    else
+    {
+        LOG((LF_GC, LL_INFO1000000, "reachable " LOG_OBJECT_CLASS(*pObjRef)));
+    }
+}
+
+void CALLBACK CalculateSizedRefSize(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE(pExtraInfo);
+    
+    Object **ppSizedRef = (Object **)pObjRef;
+    size_t* pSize = (size_t *)pExtraInfo;
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Getting size of referent of SizedRef-", pObjRef, "to ", *pObjRef)));
+
+    ScanContext* sc = (ScanContext *)lp1;
+    promote_func* callback = (promote_func*) lp2;
+
+    size_t sizeBegin = GCHeap::GetGCHeap()->GetPromotedBytes(sc->thread_number);
+    callback(ppSizedRef, (ScanContext *)lp1, 0);
+    size_t sizeEnd = GCHeap::GetGCHeap()->GetPromotedBytes(sc->thread_number);
+    *pSize = sizeEnd - sizeBegin;
+}
+
+/*
+ * Scan callback for updating pointers.
+ *
+ * This callback is called to update pointers for individual objects referred to by
+ * handles in the weak and strong tables.
+ */
+void CALLBACK UpdatePointer(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Querying for new location of ", pObjRef, "to ", *pObjRef)));
+
+    Object **ppRef = (Object **)pObjRef;
+
+#ifdef _DEBUG
+    Object *pOldLocation = *ppRef;
+#endif
+
+    _ASSERTE(lp2);
+    promote_func* callback = (promote_func*) lp2;
+    callback(ppRef, (ScanContext *)lp1, 0);
+
+#ifdef _DEBUG
+    if (pOldLocation != *pObjRef)
+        LOG((LF_GC, LL_INFO10000,  "Updating " FMT_HANDLE "from" FMT_ADDR "to " FMT_OBJECT "\n", 
+             DBG_ADDR(pObjRef), DBG_ADDR(pOldLocation), DBG_ADDR(*pObjRef)));
+    else
+        LOG((LF_GC, LL_INFO100000, "Updating " FMT_HANDLE "- " FMT_OBJECT "did not move\n", 
+             DBG_ADDR(pObjRef), DBG_ADDR(*pObjRef)));
+#endif
+}
+
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+/*
+ * Scan callback for updating pointers.
+ *
+ * This callback is called to update pointers for individual objects referred to by
+ * handles in the weak and strong tables.
+ */
+void CALLBACK ScanPointerForProfilerAndETW(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+#ifndef FEATURE_REDHAWK
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        if (GetThreadNULLOk()) { MODE_COOPERATIVE; } 
+    }
+    CONTRACTL_END;
+#endif // FEATURE_REDHAWK
+    UNREFERENCED_PARAMETER(pExtraInfo);
+    UNREFERENCED_PARAMETER(lp2);
+
+    LOG((LF_GC | LF_CORPROF, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Notifying profiler of ", pObjRef, "to ", *pObjRef)));
+
+    // Get the baseobject (which can subsequently be cast into an OBJECTREF == ObjectID
+    Object **pRef = (Object **)pObjRef;
+
+    // Get a hold of the heap ID that's tacked onto the end of the scancontext struct.
+    ProfilingScanContext *pSC = (ProfilingScanContext *)lp1;
+
+    uint32_t rootFlags = 0;
+    BOOL isDependent = FALSE;
+
+    OBJECTHANDLE handle = (OBJECTHANDLE)(pRef);
+    switch (HandleFetchType(handle))
+    {
+    case    HNDTYPE_DEPENDENT:
+        isDependent = TRUE;
+        break;
+    case    HNDTYPE_WEAK_SHORT:
+    case    HNDTYPE_WEAK_LONG:
+#ifdef FEATURE_COMINTEROP
+    case    HNDTYPE_WEAK_WINRT:
+#endif // FEATURE_COMINTEROP
+        rootFlags |= kEtwGCRootFlagsWeakRef;
+        break;
+
+    case    HNDTYPE_STRONG:
+    case    HNDTYPE_SIZEDREF:
+        break;
+
+    case    HNDTYPE_PINNED:
+    case    HNDTYPE_ASYNCPINNED:
+        rootFlags |= kEtwGCRootFlagsPinning;
+        break;
+
+    case    HNDTYPE_VARIABLE:
+#ifdef FEATURE_REDHAWK
+    {
+        // Set the appropriate ETW flags for the current strength of this variable handle
+        uint32_t nVarHandleType = GetVariableHandleType(handle);
+        if (((nVarHandleType & VHT_WEAK_SHORT) != 0) ||
+            ((nVarHandleType & VHT_WEAK_LONG) != 0))
+        {
+            rootFlags |= kEtwGCRootFlagsWeakRef;
+        }
+        if ((nVarHandleType & VHT_PINNED) != 0)
+        {
+            rootFlags |= kEtwGCRootFlagsPinning;
+        }
+
+        // No special ETW flag for strong handles (VHT_STRONG)
+    }
+#else
+        _ASSERTE(!"Variable handle encountered");
+#endif
+        break;
+
+#if defined(FEATURE_COMINTEROP) && !defined(FEATURE_REDHAWK)
+    case    HNDTYPE_REFCOUNTED:
+        rootFlags |= kEtwGCRootFlagsRefCounted;
+        if (*pRef != NULL)
+        {
+            ComCallWrapper* pWrap = ComCallWrapper::GetWrapperForObject((OBJECTREF)*pRef);
+            if (pWrap == NULL || !pWrap->IsWrapperActive())
+                rootFlags |= kEtwGCRootFlagsWeakRef;
+        }
+        break;
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+    }
+
+    _UNCHECKED_OBJECTREF pSec = NULL;
+
+#ifdef GC_PROFILING
+    // Give the profiler the objectref.
+    if (pSC->fProfilerPinned)
+    {
+        if (!isDependent)
+        {
+            BEGIN_PIN_PROFILER(CORProfilerTrackGC());
+            g_profControlBlock.pProfInterface->RootReference2(
+                (uint8_t *)*pRef,
+                kEtwGCRootKindHandle,
+                (EtwGCRootFlags)rootFlags,
+                pRef, 
+                &pSC->pHeapId);
+            END_PIN_PROFILER();
+        }
+        else
+        {
+            BEGIN_PIN_PROFILER(CORProfilerTrackConditionalWeakTableElements());
+            pSec = (_UNCHECKED_OBJECTREF)HndGetHandleExtraInfo(handle);
+            g_profControlBlock.pProfInterface->ConditionalWeakTableElementReference(
+                (uint8_t*)*pRef,
+                (uint8_t*)pSec,
+                pRef,
+                &pSC->pHeapId);
+            END_PIN_PROFILER();
+        }
+    }
+#endif // GC_PROFILING
+
+#if defined(FEATURE_EVENT_TRACE)
+    // Notify ETW of the handle
+    if (ETW::GCLog::ShouldWalkHeapRootsForEtw())
+    {
+        if (isDependent && (pSec == NULL))
+        {
+            pSec = (_UNCHECKED_OBJECTREF)HndGetHandleExtraInfo(handle);
+
+        }
+
+        ETW::GCLog::RootReference(
+            handle,
+            *pRef,          // object being rooted
+            pSec,           // pSecondaryNodeForDependentHandle
+            isDependent,
+            pSC,
+            0,              // dwGCFlags,
+            rootFlags);     // ETW handle flags
+    }
+#endif // defined(FEATURE_EVENT_TRACE) 
+}
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+
+
+/*
+ * Scan callback for updating pointers.
+ *
+ * This callback is called to update pointers for individual objects referred to by
+ * handles in the pinned table.
+ */
+void CALLBACK UpdatePointerPinned(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    Object **ppRef = (Object **)pObjRef;
+
+    _ASSERTE(lp2);
+    promote_func* callback = (promote_func*) lp2;
+    callback(ppRef, (ScanContext *)lp1, GC_CALL_PINNED);
+    
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Updating ", pObjRef, "to pinned ", *pObjRef)));
+}
+
+
+//----------------------------------------------------------------------------
+
+// flags describing the handle types
+static const uint32_t s_rgTypeFlags[] =
+{
+    HNDF_NORMAL,    // HNDTYPE_WEAK_SHORT
+    HNDF_NORMAL,    // HNDTYPE_WEAK_LONG
+    HNDF_NORMAL,    // HNDTYPE_STRONG
+    HNDF_NORMAL,    // HNDTYPE_PINNED
+    HNDF_EXTRAINFO, // HNDTYPE_VARIABLE
+    HNDF_NORMAL,    // HNDTYPE_REFCOUNTED
+    HNDF_EXTRAINFO, // HNDTYPE_DEPENDENT
+    HNDF_NORMAL,    // HNDTYPE_ASYNCPINNED
+    HNDF_EXTRAINFO, // HNDTYPE_SIZEDREF
+    HNDF_EXTRAINFO, // HNDTYPE_WEAK_WINRT
+};
+
+int getNumberOfSlots()
+{
+    WRAPPER_NO_CONTRACT;
+
+    // when Ref_Initialize called, GCHeap::GetNumberOfHeaps() is still 0, so use #procs as a workaround
+    // it is legal since even if later #heaps < #procs we create handles by thread home heap
+    // and just have extra unused slots in HandleTableBuckets, which does not take a lot of space
+    if (!GCHeap::IsServerHeap())
+        return 1;
+
+#ifdef FEATURE_REDHAWK
+    return g_SystemInfo.dwNumberOfProcessors;
+#else
+    return (CPUGroupInfo::CanEnableGCCPUGroups() ? CPUGroupInfo::GetNumActiveProcessors() :
+                                                   g_SystemInfo.dwNumberOfProcessors);
+#endif
+}
+
+class HandleTableBucketHolder
+{
+private:
+    HandleTableBucket* m_bucket;
+    int m_slots;
+    BOOL m_SuppressRelease;
+public:
+    HandleTableBucketHolder(HandleTableBucket* bucket, int slots);
+    ~HandleTableBucketHolder();
+
+    void SuppressRelease()
+    {
+        m_SuppressRelease = TRUE;
+    }
+};
+
+HandleTableBucketHolder::HandleTableBucketHolder(HandleTableBucket* bucket, int slots)
+    :m_bucket(bucket), m_slots(slots), m_SuppressRelease(FALSE)
+{
+}
+
+HandleTableBucketHolder::~HandleTableBucketHolder()
+{
+    if (m_SuppressRelease)
+    {
+        return;
+    }
+    if (m_bucket->pTable)
+    {
+        for (int n = 0; n < m_slots; n ++)
+        {
+            if (m_bucket->pTable[n])
+            {
+                HndDestroyHandleTable(m_bucket->pTable[n]);
+            }
+        }
+        delete [] m_bucket->pTable;
+    }
+    delete m_bucket;
+}
+
+bool Ref_Initialize()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        WRAPPER(GC_NOTRIGGER);
+        INJECT_FAULT(return false);
+    }
+    CONTRACTL_END;
+
+    // sanity
+    _ASSERTE(g_HandleTableMap.pBuckets == NULL);
+
+    // Create an array of INITIAL_HANDLE_TABLE_ARRAY_SIZE HandleTableBuckets to hold the handle table sets
+    HandleTableBucket** pBuckets = new (nothrow) HandleTableBucket * [ INITIAL_HANDLE_TABLE_ARRAY_SIZE ];
+    if (pBuckets == NULL)
+        return false;
+
+    ZeroMemory(pBuckets,
+         INITIAL_HANDLE_TABLE_ARRAY_SIZE * sizeof (HandleTableBucket *));
+
+    // Crate the first bucket
+    HandleTableBucket * pBucket = new (nothrow) HandleTableBucket;
+    if (pBucket != NULL)
+    {
+        pBucket->HandleTableIndex = 0;
+
+        int n_slots = getNumberOfSlots();
+
+        HandleTableBucketHolder bucketHolder(pBucket, n_slots);
+
+        // create the handle table set for the first bucket
+        pBucket->pTable = new (nothrow) HHANDLETABLE[n_slots];
+        if (pBucket->pTable == NULL)
+            goto CleanupAndFail;
+
+        ZeroMemory(pBucket->pTable,
+            n_slots * sizeof(HHANDLETABLE));
+        for (int uCPUindex = 0; uCPUindex < n_slots; uCPUindex++)
+        {
+            pBucket->pTable[uCPUindex] = HndCreateHandleTable(s_rgTypeFlags, _countof(s_rgTypeFlags), ADIndex(1));
+            if (pBucket->pTable[uCPUindex] == NULL)
+                goto CleanupAndFail;
+
+            HndSetHandleTableIndex(pBucket->pTable[uCPUindex], 0);
+        }
+
+        pBuckets[0] = pBucket;
+        bucketHolder.SuppressRelease();
+
+        g_HandleTableMap.pBuckets = pBuckets;
+        g_HandleTableMap.dwMaxIndex = INITIAL_HANDLE_TABLE_ARRAY_SIZE;
+        g_HandleTableMap.pNext = NULL;
+
+        // Allocate contexts used during dependent handle promotion scanning. There's one of these for every GC
+        // heap since they're scanned in parallel.
+        g_pDependentHandleContexts = new (nothrow) DhContext[n_slots];
+        if (g_pDependentHandleContexts == NULL)
+            goto CleanupAndFail;
+
+        return true;
+    }
+
+CleanupAndFail:
+    if (pBuckets != NULL)
+        delete[] pBuckets;
+    return false;
+}
+
+void Ref_Shutdown()
+{
+    WRAPPER_NO_CONTRACT;
+
+    if (g_pDependentHandleContexts)
+    {
+        delete [] g_pDependentHandleContexts;
+        g_pDependentHandleContexts = NULL;
+    }
+
+    // are there any handle tables?
+    if (g_HandleTableMap.pBuckets)
+    {
+        // don't destroy any of the indexed handle tables; they should
+        // be destroyed externally.
+
+        // destroy the global handle table bucket tables
+        Ref_DestroyHandleTableBucket(g_HandleTableMap.pBuckets[0]);
+
+        // destroy the handle table bucket array
+        HandleTableMap *walk = &g_HandleTableMap;
+        while (walk) {
+            delete [] walk->pBuckets;
+            walk = walk->pNext;
+        }
+
+        // null out the handle table array
+        g_HandleTableMap.pNext = NULL;
+        g_HandleTableMap.dwMaxIndex = 0;
+
+        // null out the global table handle
+        g_HandleTableMap.pBuckets = NULL;
+    }
+}
+
+#ifndef FEATURE_REDHAWK
+// ATTENTION: interface changed
+// Note: this function called only from AppDomain::Init()
+HandleTableBucket *Ref_CreateHandleTableBucket(ADIndex uADIndex)
+{
+    CONTRACTL
+    {
+        THROWS;
+        WRAPPER(GC_TRIGGERS);
+        INJECT_FAULT(COMPlusThrowOM());
+    }
+    CONTRACTL_END;
+
+    HandleTableBucket *result = NULL;
+    HandleTableMap *walk;
+    
+    walk = &g_HandleTableMap;
+    HandleTableMap *last = NULL;
+    uint32_t offset = 0;
+
+    result = new HandleTableBucket;
+    result->pTable = NULL;
+
+    // create handle table set for the bucket
+    int n_slots = getNumberOfSlots();
+
+    HandleTableBucketHolder bucketHolder(result, n_slots);
+
+    result->pTable = new HHANDLETABLE [ n_slots ];
+    ZeroMemory(result->pTable, n_slots * sizeof (HHANDLETABLE));
+
+    for (int uCPUindex=0; uCPUindex < n_slots; uCPUindex++) {
+        result->pTable[uCPUindex] = HndCreateHandleTable(s_rgTypeFlags, _countof(s_rgTypeFlags), uADIndex);
+        if (!result->pTable[uCPUindex])
+            COMPlusThrowOM();
+    }
+
+    for (;;) {
+        // Do we have free slot
+        while (walk) {
+            for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++) {
+                if (walk->pBuckets[i] == 0) {
+                    for (int uCPUindex=0; uCPUindex < n_slots; uCPUindex++)
+                        HndSetHandleTableIndex(result->pTable[uCPUindex], i+offset);
+
+                    result->HandleTableIndex = i+offset;
+                    if (Interlocked::CompareExchangePointer(&walk->pBuckets[i], result, NULL) == 0) {
+                        // Get a free slot.
+                        bucketHolder.SuppressRelease();
+                        return result;
+                    }
+                }
+            }
+            last = walk;
+            offset = walk->dwMaxIndex;
+            walk = walk->pNext;
+        }
+
+        // No free slot.
+        // Let's create a new node
+        NewHolder<HandleTableMap> newMap;
+        newMap = new HandleTableMap;
+
+        newMap->pBuckets = new HandleTableBucket * [ INITIAL_HANDLE_TABLE_ARRAY_SIZE ];
+        newMap.SuppressRelease();
+
+        newMap->dwMaxIndex = last->dwMaxIndex + INITIAL_HANDLE_TABLE_ARRAY_SIZE;
+        newMap->pNext = NULL;
+        ZeroMemory(newMap->pBuckets,
+                INITIAL_HANDLE_TABLE_ARRAY_SIZE * sizeof (HandleTableBucket *));
+
+        if (Interlocked::CompareExchangePointer(&last->pNext, newMap.GetValue(), NULL) != NULL) 
+        {
+            // This thread loses.
+            delete [] newMap->pBuckets;
+            delete newMap;
+        }
+        walk = last->pNext;
+        offset = last->dwMaxIndex;
+    }
+}
+#endif // !FEATURE_REDHAWK
+
+void Ref_RemoveHandleTableBucket(HandleTableBucket *pBucket)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    size_t          index   = pBucket->HandleTableIndex;
+    HandleTableMap* walk    = &g_HandleTableMap;
+    size_t          offset  = 0;
+
+    while (walk) 
+    {
+        if ((index < walk->dwMaxIndex) && (index >= offset)) 
+        {
+            // During AppDomain unloading, we first remove a handle table and then destroy
+            // the table.  As soon as the table is removed, the slot can be reused.
+            if (walk->pBuckets[index - offset] == pBucket)
+            {
+                walk->pBuckets[index - offset] = NULL;
+                return;
+            }
+        }
+        offset = walk->dwMaxIndex;
+        walk   = walk->pNext;
+    }
+
+    // Didn't find it.  This will happen typically from Ref_DestroyHandleTableBucket if 
+    // we explicitly call Ref_RemoveHandleTableBucket first.
+}
+
+
+void Ref_DestroyHandleTableBucket(HandleTableBucket *pBucket)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // this check is because here we might be called from AppDomain::Terminate after AppDomain::ClearGCRoots,
+    // which calls Ref_RemoveHandleTableBucket itself
+
+    Ref_RemoveHandleTableBucket(pBucket);
+    for (int uCPUindex=0; uCPUindex < getNumberOfSlots(); uCPUindex++)
+    {
+        HndDestroyHandleTable(pBucket->pTable[uCPUindex]);
+    }
+    delete [] pBucket->pTable;
+    delete pBucket;
+}
+
+int getSlotNumber(ScanContext* sc)
+{
+    WRAPPER_NO_CONTRACT;
+
+    return (GCHeap::IsServerHeap() ? sc->thread_number : 0);
+}
+
+// <TODO> - reexpress as complete only like hndtable does now!!! -fmh</REVISIT_TODO>
+void Ref_EndSynchronousGC(uint32_t condemned, uint32_t maxgen)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREFERENCED_PARAMETER(condemned);
+    UNREFERENCED_PARAMETER(maxgen);
+
+// NOT used, must be modified for MTHTS (scalable HandleTable scan) if planned to use:
+// need to pass ScanContext info to split HT bucket by threads, or to be performed under t_join::join
+/*
+    // tell the table we finished a GC
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++) {
+            HHANDLETABLE hTable = walk->pTable[i];
+            if (hTable)
+                HndNotifyGcCycleComplete(hTable, condemned, maxgen);
+        }
+        walk = walk->pNext;
+    }
+*/    
+}
+
+
+OBJECTHANDLE CreateDependentHandle(HHANDLETABLE table, OBJECTREF primary, OBJECTREF secondary)
+{ 
+    CONTRACTL
+    {
+        THROWS;
+        GC_NOTRIGGER;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    OBJECTHANDLE handle = HndCreateHandle(table, HNDTYPE_DEPENDENT, primary); 
+
+    SetDependentHandleSecondary(handle, secondary);
+
+    return handle;
+}
+
+void SetDependentHandleSecondary(OBJECTHANDLE handle, OBJECTREF objref)
+{ 
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SO_TOLERANT;
+        MODE_COOPERATIVE;
+    }
+    CONTRACTL_END;
+
+    // sanity
+    _ASSERTE(handle);
+
+#ifdef _DEBUG
+    // handle should not be in unloaded domain
+    ValidateAppDomainForHandle(handle);
+
+    // Make sure the objref is valid before it is assigned to a handle
+    ValidateAssignObjrefForHandle(objref, HndGetHandleTableADIndex(HndGetHandleTable(handle)));
+#endif
+    // unwrap the objectref we were given
+    _UNCHECKED_OBJECTREF value = OBJECTREF_TO_UNCHECKED_OBJECTREF(objref);
+
+    // if we are doing a non-NULL pointer store then invoke the write-barrier
+    if (value)
+        HndWriteBarrier(handle, objref);
+
+    // store the pointer
+    HndSetHandleExtraInfo(handle, HNDTYPE_DEPENDENT, (uintptr_t)value);
+}
+
+
+//----------------------------------------------------------------------------
+
+/*
+ * CreateVariableHandle.
+ *
+ * Creates a variable-strength handle.
+ *
+ * N.B. This routine is not a macro since we do validation in RETAIL.
+ * We always validate the type here because it can come from external callers.
+ */
+OBJECTHANDLE CreateVariableHandle(HHANDLETABLE hTable, OBJECTREF object, uint32_t type)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // verify that we are being asked to create a valid type
+    if (!IS_VALID_VHT_VALUE(type))
+    {
+        // bogus value passed in
+        _ASSERTE(FALSE);
+        return NULL;
+    }
+
+    // create the handle
+    return HndCreateHandle(hTable, HNDTYPE_VARIABLE, object, (uintptr_t)type);
+}
+
+/*
+* GetVariableHandleType.
+*
+* Retrieves the dynamic type of a variable-strength handle.
+*/
+uint32_t GetVariableHandleType(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    return (uint32_t)HndGetHandleExtraInfo(handle);
+}
+
+/*
+ * UpdateVariableHandleType.
+ *
+ * Changes the dynamic type of a variable-strength handle.
+ *
+ * N.B. This routine is not a macro since we do validation in RETAIL.
+ * We always validate the type here because it can come from external callers.
+ */
+void UpdateVariableHandleType(OBJECTHANDLE handle, uint32_t type)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // verify that we are being asked to set a valid type
+    if (!IS_VALID_VHT_VALUE(type))
+    {
+        // bogus value passed in
+        _ASSERTE(FALSE);
+        return;
+    }
+
+    // <REVISIT_TODO> (francish)  CONCURRENT GC NOTE</REVISIT_TODO>
+    //
+    // If/when concurrent GC is implemented, we need to make sure variable handles
+    // DON'T change type during an asynchronous scan, OR that we properly recover
+    // from the change.  Some changes are benign, but for example changing to or
+    // from a pinning handle in the middle of a scan would not be fun.
+    //
+
+    // store the type in the handle's extra info
+    HndSetHandleExtraInfo(handle, HNDTYPE_VARIABLE, (uintptr_t)type);
+}
+
+/*
+* CompareExchangeVariableHandleType.
+*
+* Changes the dynamic type of a variable-strength handle. Unlike UpdateVariableHandleType we assume that the
+* types have already been validated.
+*/
+uint32_t CompareExchangeVariableHandleType(OBJECTHANDLE handle, uint32_t oldType, uint32_t newType)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // verify that we are being asked to get/set valid types
+    _ASSERTE(IS_VALID_VHT_VALUE(oldType) && IS_VALID_VHT_VALUE(newType));
+
+    // attempt to store the type in the handle's extra info
+    return (uint32_t)HndCompareExchangeHandleExtraInfo(handle, HNDTYPE_VARIABLE, (uintptr_t)oldType, (uintptr_t)newType);
+}
+
+
+/*
+ * TraceVariableHandles.
+ *
+ * Convenience function for tracing variable-strength handles.
+ * Wraps HndScanHandlesForGC.
+ */
+void TraceVariableHandles(HANDLESCANPROC pfnTrace, uintptr_t lp1, uintptr_t lp2, uint32_t uEnableMask, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // set up to scan variable handles with the specified mask and trace function
+    uint32_t               type = HNDTYPE_VARIABLE;
+    struct VARSCANINFO info = { (uintptr_t)uEnableMask, pfnTrace, lp2 };
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber((ScanContext*) lp1)];
+                if (hTable)
+                {
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    if (g_fEnableARM)
+                    {
+                        ScanContext* sc = (ScanContext *)lp1;
+                        sc->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(HndGetHandleTableADIndex(hTable));
+                    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    HndScanHandlesForGC(hTable, VariableTraceDispatcher,
+                                        lp1, (uintptr_t)&info, &type, 1, condemned, maxgen, HNDGCF_EXTRAINFO | flags);
+                }
+            }
+        walk = walk->pNext;
+    }
+}
+
+/*
+  loop scan version of TraceVariableHandles for single-thread-managed Ref_* functions
+  should be kept in sync with the code above
+*/
+void TraceVariableHandlesBySingleThread(HANDLESCANPROC pfnTrace, uintptr_t lp1, uintptr_t lp2, uint32_t uEnableMask, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // set up to scan variable handles with the specified mask and trace function
+    uint32_t type = HNDTYPE_VARIABLE;
+    struct VARSCANINFO info = { (uintptr_t)uEnableMask, pfnTrace, lp2 };
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                  // this is the one of Ref_* function performed by single thread in MULTI_HEAPS case, so we need to loop through all HT of the bucket
+                for (int uCPUindex=0; uCPUindex < getNumberOfSlots(); uCPUindex++)
+                {
+                   HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                    if (hTable)
+                        HndScanHandlesForGC(hTable, VariableTraceDispatcher,
+                                        lp1, (uintptr_t)&info, &type, 1, condemned, maxgen, HNDGCF_EXTRAINFO | flags);
+                }
+            }
+        walk = walk->pNext;
+    }
+}
+
+//----------------------------------------------------------------------------
+
+void Ref_TracePinningRoots(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Pinning referents of pinned handles in generation %u\n", condemned));
+
+    // pin objects pointed to by pinning handles
+    uint32_t types[2] = {HNDTYPE_PINNED, HNDTYPE_ASYNCPINNED};
+    uint32_t flags = sc->concurrent ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber((ScanContext*) sc)];
+                if (hTable)
+                {
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    if (g_fEnableARM)
+                    {
+                        sc->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(HndGetHandleTableADIndex(hTable));
+                    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    HndScanHandlesForGC(hTable, PinObject, uintptr_t(sc), uintptr_t(fn), types, _countof(types), condemned, maxgen, flags);
+                }
+            }
+        walk = walk->pNext;
+    }
+
+    // pin objects pointed to by variable handles whose dynamic type is VHT_PINNED
+    TraceVariableHandles(PinObject, uintptr_t(sc), uintptr_t(fn), VHT_PINNED, condemned, maxgen, flags);
+}
+
+
+void Ref_TraceNormalRoots(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Promoting referents of strong handles in generation %u\n", condemned));
+
+    // promote objects pointed to by strong handles
+    // during ephemeral GCs we also want to promote the ones pointed to by sizedref handles.
+    uint32_t types[2] = {HNDTYPE_STRONG, HNDTYPE_SIZEDREF};
+    uint32_t uTypeCount = (((condemned >= maxgen) && !GCHeap::GetGCHeap()->IsConcurrentGCInProgress()) ? 1 : _countof(types));
+    uint32_t flags = (sc->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                {
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    if (g_fEnableARM)
+                    {
+                        sc->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(HndGetHandleTableADIndex(hTable));
+                    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+                    HndScanHandlesForGC(hTable, PromoteObject, uintptr_t(sc), uintptr_t(fn), types, uTypeCount, condemned, maxgen, flags);
+                }
+            }
+        walk = walk->pNext;
+    }
+
+    // promote objects pointed to by variable handles whose dynamic type is VHT_STRONG
+    TraceVariableHandles(PromoteObject, uintptr_t(sc), uintptr_t(fn), VHT_STRONG, condemned, maxgen, flags);
+
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+    // don't scan ref-counted handles during concurrent phase as the clean-up of CCWs can race with AD unload and cause AV's
+    if (!sc->concurrent)
+    {
+        // promote ref-counted handles
+        uint32_t type = HNDTYPE_REFCOUNTED;
+
+        walk = &g_HandleTableMap;
+        while (walk) {
+            for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+                if (walk->pBuckets[i] != NULL)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                    if (hTable)
+                        HndScanHandlesForGC(hTable, PromoteRefCounted, uintptr_t(sc), uintptr_t(fn), &type, 1, condemned, maxgen, flags );
+                }
+            walk = walk->pNext;
+        }
+    }
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+}
+
+#ifdef FEATURE_COMINTEROP
+
+void Ref_TraceRefCountHandles(HANDLESCANPROC callback, uintptr_t lParam1, uintptr_t lParam2)
+{
+    int max_slots = getNumberOfSlots();
+    uint32_t handleType = HNDTYPE_REFCOUNTED;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk)
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                for (int j = 0; j < max_slots; j++)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[j];
+                    if (hTable)
+                        HndEnumHandles(hTable, &handleType, 1, callback, lParam1, lParam2, false);
+                }
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+#endif
+
+
+
+void Ref_CheckReachable(uint32_t condemned, uint32_t maxgen, uintptr_t lp1)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Checking reachability of referents of long-weak handles in generation %u\n", condemned));
+
+    // these are the handle types that need to be checked
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_LONG,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+    };
+
+    // check objects pointed to by short weak handles
+    uint32_t flags = (((ScanContext*) lp1)->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+    int uCPUindex = getSlotNumber((ScanContext*) lp1);
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+           {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                if (hTable)
+                    HndScanHandlesForGC(hTable, CheckPromoted, lp1, 0, types, _countof(types), condemned, maxgen, flags);
+        }
+        }
+        walk = walk->pNext;
+    }
+
+    // check objects pointed to by variable handles whose dynamic type is VHT_WEAK_LONG
+    TraceVariableHandles(CheckPromoted, lp1, 0, VHT_WEAK_LONG, condemned, maxgen, flags);
+}
+
+//
+// Dependent handles manages the relationship between primary and secondary objects, where the lifetime of
+// the secondary object is dependent upon that of the primary. The handle itself holds the primary instance,
+// while the extra handle info holds the secondary object. The secondary object should always be promoted
+// when the primary is, and the handle should be cleared if the primary is not promoted. Can't use ordinary
+// strong handle to refer to the secondary as this could case a cycle in the graph if the secondary somehow
+// pointed back to the primary. Can't use weak handle because that would not keep the secondary object alive.
+//
+// The result is that a dependenHandle has the EFFECT of 
+//    * long weak handles in both the primary and secondary objects
+//    * a strong reference from the primary object to the secondary one
+//
+// Dependent handles are currently used for
+// 
+//    * managing fields added to EnC classes, where the handle itself holds the this pointer and the
+//        secondary object represents the new field that was added.
+//    * it is exposed to managed code (as System.Runtime.CompilerServices.DependentHandle) and is used in the
+//        implementation of ConditionWeakTable.
+//
+
+// Retrieve the dependent handle context associated with the current GC scan context.
+DhContext *Ref_GetDependentHandleContext(ScanContext* sc)
+{
+    WRAPPER_NO_CONTRACT;
+    return &g_pDependentHandleContexts[getSlotNumber(sc)];
+}
+
+// Scan the dependent handle table promoting any secondary object whose associated primary object is promoted.
+//
+// Multiple scans may be required since (a) secondary promotions made during one scan could cause the primary
+// of another handle to be promoted and (b) the GC may not have marked all promoted objects at the time it
+// initially calls us.
+//
+// Returns true if any promotions resulted from this scan.
+bool Ref_ScanDependentHandlesForPromotion(DhContext *pDhContext)
+{
+    LOG((LF_GC, LL_INFO10000, "Checking liveness of referents of dependent handles in generation %u\n", pDhContext->m_iCondemned));
+    uint32_t type = HNDTYPE_DEPENDENT;
+    uint32_t flags = (pDhContext->m_pScanContext->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+    flags |= HNDGCF_EXTRAINFO;
+
+    // Keep a note of whether we promoted anything over the entire scan (not just the last iteration). We need
+    // to return this data since under server GC promotions from this table may cause further promotions in
+    // tables handled by other threads.
+    bool fAnyPromotions = false;
+
+    // Keep rescanning the table while both the following conditions are true:
+    //  1) There's at least primary object left that could have been promoted.
+    //  2) We performed at least one secondary promotion (which could have caused a primary promotion) on the
+    //     last scan.
+    // Note that even once we terminate the GC may call us again (because it has caused more objects to be
+    // marked as promoted). But we scan in a loop here anyway because it is cheaper for us to loop than the GC
+    // (especially on server GC where each external cycle has to be synchronized between GC worker threads).
+    do
+    {
+        // Assume the conditions for re-scanning are both false initially. The scan callback below
+        // (PromoteDependentHandle) will set the relevant flag on the first unpromoted primary it sees or
+        // secondary promotion it performs.
+        pDhContext->m_fUnpromotedPrimaries = false;
+        pDhContext->m_fPromoted = false;
+
+        HandleTableMap *walk = &g_HandleTableMap;
+        while (walk) 
+        {
+            for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            {
+                if (walk->pBuckets[i] != NULL)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(pDhContext->m_pScanContext)];
+                    if (hTable)
+                    {
+                        HndScanHandlesForGC(hTable,
+                                            PromoteDependentHandle,
+                                            uintptr_t(pDhContext->m_pScanContext),
+                                            uintptr_t(pDhContext->m_pfnPromoteFunction),
+                                            &type, 1,
+                                            pDhContext->m_iCondemned,
+                                            pDhContext->m_iMaxGen,
+                                            flags );
+                    }
+                }
+            }
+            walk = walk->pNext;
+        }
+
+        if (pDhContext->m_fPromoted)
+            fAnyPromotions = true;
+
+    } while (pDhContext->m_fUnpromotedPrimaries && pDhContext->m_fPromoted);
+
+    return fAnyPromotions;
+}
+
+// Perform a scan of dependent handles for the purpose of clearing any that haven't had their primary
+// promoted.
+void Ref_ScanDependentHandlesForClearing(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    LOG((LF_GC, LL_INFO10000, "Clearing dead dependent handles in generation %u\n", condemned));
+    uint32_t type = HNDTYPE_DEPENDENT;
+    uint32_t flags = (sc->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+    flags |= HNDGCF_EXTRAINFO;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) 
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                {
+                    HndScanHandlesForGC(hTable, ClearDependentHandle, uintptr_t(sc), uintptr_t(fn), &type, 1, condemned, maxgen, flags );
+                }
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+// Perform a scan of dependent handles for the purpose of updating handles to track relocated objects.
+void Ref_ScanDependentHandlesForRelocation(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    LOG((LF_GC, LL_INFO10000, "Relocating moved dependent handles in generation %u\n", condemned));
+    uint32_t type = HNDTYPE_DEPENDENT;
+    uint32_t flags = (sc->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+    flags |= HNDGCF_EXTRAINFO;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) 
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                {
+                    HndScanHandlesForGC(hTable, UpdateDependentHandle, uintptr_t(sc), uintptr_t(fn), &type, 1, condemned, maxgen, flags );
+                }
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+/*
+  loop scan version of TraceVariableHandles for single-thread-managed Ref_* functions
+  should be kept in sync with the code above
+*/
+void TraceDependentHandlesBySingleThread(HANDLESCANPROC pfnTrace, uintptr_t lp1, uint32_t condemned, uint32_t maxgen, uint32_t flags)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // set up to scan variable handles with the specified mask and trace function
+    uint32_t type = HNDTYPE_DEPENDENT;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                // this is the one of Ref_* function performed by single thread in MULTI_HEAPS case, so we need to loop through all HT of the bucket
+                for (int uCPUindex=0; uCPUindex < getNumberOfSlots(); uCPUindex++)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                    if (hTable)
+                        HndScanHandlesForGC(hTable, TraceDependentHandle,
+                                    lp1, (uintptr_t)pfnTrace, &type, 1, condemned, maxgen, HNDGCF_EXTRAINFO | flags);
+                }
+            }
+        walk = walk->pNext;
+    }
+}
+
+
+// We scan handle tables by their buckets (ie, AD index). We could get into the situation where
+// the AD indices are not very compacted (for example if we have just unloaded ADs and their 
+// indices haven't been reused yet) and we could be scanning them in an unbalanced fashion. 
+// Consider using an array to represent the compacted form of all AD indices exist for the 
+// sized ref handles. 
+void ScanSizedRefByAD(uint32_t maxgen, HANDLESCANPROC scanProc, ScanContext* sc, Ref_promote_func* fn, uint32_t flags)
+{
+    HandleTableMap *walk = &g_HandleTableMap;
+    uint32_t type = HNDTYPE_SIZEDREF;
+    int uCPUindex = getSlotNumber(sc);
+    int n_slots = GCHeap::GetGCHeap()->GetNumberOfHeaps();
+
+    while (walk)
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                ADIndex adIndex = HndGetHandleTableADIndex(walk->pBuckets[i]->pTable[0]);
+                if ((adIndex.m_dwIndex % n_slots) == (uint32_t)uCPUindex)
+                {
+                    for (int index = 0; index < n_slots; index++)
+                    {
+                        HHANDLETABLE hTable = walk->pBuckets[i]->pTable[index];
+                        if (hTable)
+                        {
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                            if (g_fEnableARM)
+                            {
+                                sc->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(adIndex);
+                            }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                            HndScanHandlesForGC(hTable, scanProc, uintptr_t(sc), uintptr_t(fn), &type, 1, maxgen, maxgen, flags);
+                        }
+                    }
+                }
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+void ScanSizedRefByCPU(uint32_t maxgen, HANDLESCANPROC scanProc, ScanContext* sc, Ref_promote_func* fn, uint32_t flags)
+{
+    HandleTableMap *walk = &g_HandleTableMap;
+    uint32_t type = HNDTYPE_SIZEDREF;
+    int uCPUindex = getSlotNumber(sc);
+
+    while (walk) 
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+        	if (walk->pBuckets[i] != NULL)
+	        {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                if (hTable)
+                {
+#ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
+                    if (g_fEnableARM)
+                    {
+                        sc->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(HndGetHandleTableADIndex(hTable));
+                    }
+#endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
+
+                    HndScanHandlesForGC(hTable, scanProc, uintptr_t(sc), uintptr_t(fn), &type, 1, maxgen, maxgen, flags);
+                }
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+void Ref_ScanSizedRefHandles(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    LOG((LF_GC, LL_INFO10000, "Scanning SizedRef handles to in generation %u\n", condemned));
+    UNREFERENCED_PARAMETER(condemned);
+    _ASSERTE (condemned == maxgen);
+    uint32_t flags = (sc->concurrent ? HNDGCF_ASYNC : HNDGCF_NORMAL) | HNDGCF_EXTRAINFO;
+
+    ScanSizedRefByCPU(maxgen, CalculateSizedRefSize, sc, fn, flags);
+}
+
+void Ref_CheckAlive(uint32_t condemned, uint32_t maxgen, uintptr_t lp1)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Checking liveness of referents of short-weak handles in generation %u\n", condemned));
+
+    // perform a multi-type scan that checks for unreachable objects
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT
+#ifdef FEATURE_COMINTEROP
+        , HNDTYPE_WEAK_WINRT
+#endif // FEATURE_COMINTEROP
+    };
+    uint32_t flags = (((ScanContext*) lp1)->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+
+    int uCPUindex = getSlotNumber((ScanContext*) lp1);
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk)
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                if (hTable)
+                    HndScanHandlesForGC(hTable, CheckPromoted, lp1, 0, types, _countof(types), condemned, maxgen, flags);
+            }
+        }
+        walk = walk->pNext;
+    }
+    // check objects pointed to by variable handles whose dynamic type is VHT_WEAK_SHORT
+    TraceVariableHandles(CheckPromoted, lp1, 0, VHT_WEAK_SHORT, condemned, maxgen, flags);
+}
+
+static VOLATILE(int32_t) uCount = 0;
+
+// NOTE: Please: if you update this function, update the very similar profiling function immediately below!!!
+void Ref_UpdatePointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    WRAPPER_NO_CONTRACT;
+
+    // For now, treat the syncblock as if it were short weak handles.  <REVISIT_TODO>Later, get
+    // the benefits of fast allocation / free & generational awareness by supporting
+    // the SyncTable as a new block type.
+    // @TODO cwb: wait for compelling performance measurements.</REVISIT_TODO>
+    BOOL bDo = TRUE;
+
+    if (GCHeap::IsServerHeap()) 
+    {
+        bDo = (Interlocked::Increment(&uCount) == 1);
+        Interlocked::CompareExchange (&uCount, 0, GCHeap::GetGCHeap()->GetNumberOfHeaps());
+        _ASSERTE (uCount <= GCHeap::GetGCHeap()->GetNumberOfHeaps());
+    }
+
+    if (bDo)   
+        GCToEEInterface::SyncBlockCacheWeakPtrScan(&UpdatePointer, uintptr_t(sc), uintptr_t(fn));
+
+    LOG((LF_GC, LL_INFO10000, "Updating pointers to referents of non-pinning handles in generation %u\n", condemned));
+
+    // these are the handle types that need their pointers updated
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+        HNDTYPE_STRONG,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+#ifdef FEATURE_COMINTEROP
+        HNDTYPE_WEAK_WINRT,
+#endif // FEATURE_COMINTEROP
+        HNDTYPE_SIZEDREF,
+    };
+
+    // perform a multi-type scan that updates pointers
+    uint32_t flags = (sc->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                    HndScanHandlesForGC(hTable, UpdatePointer, uintptr_t(sc), uintptr_t(fn), types, _countof(types), condemned, maxgen, flags);
+            }
+        walk = walk->pNext;
+    }
+
+    // update pointers in variable handles whose dynamic type is VHT_WEAK_SHORT, VHT_WEAK_LONG or VHT_STRONG
+    TraceVariableHandles(UpdatePointer, uintptr_t(sc), uintptr_t(fn), VHT_WEAK_SHORT | VHT_WEAK_LONG | VHT_STRONG, condemned, maxgen, flags);
+}
+
+#if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+// Please update this if you change the Ref_UpdatePointers function above.
+void Ref_ScanPointersForProfilerAndETW(uint32_t maxgen, uintptr_t lp1)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC | LF_CORPROF, LL_INFO10000, "Scanning all handle roots for profiler.\n"));
+
+    // Don't scan the sync block because they should not be reported. They are weak handles only
+
+    // <REVISIT_TODO>We should change the following to not report weak either
+    // these are the handle types that need their pointers updated</REVISIT_TODO>
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+        HNDTYPE_STRONG,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+#ifdef FEATURE_COMINTEROP
+        HNDTYPE_WEAK_WINRT,
+#endif // FEATURE_COMINTEROP
+        HNDTYPE_PINNED,
+//        HNDTYPE_VARIABLE,
+        HNDTYPE_ASYNCPINNED,
+        HNDTYPE_SIZEDREF,
+    };
+
+    uint32_t flags = HNDGCF_NORMAL;
+
+    // perform a multi-type scan that updates pointers
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+                // this is the one of Ref_* function performed by single thread in MULTI_HEAPS case, so we need to loop through all HT of the bucket
+                for (int uCPUindex=0; uCPUindex < getNumberOfSlots(); uCPUindex++)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                    if (hTable)
+                        HndScanHandlesForGC(hTable, &ScanPointerForProfilerAndETW, lp1, 0, types, _countof(types), maxgen, maxgen, flags);
+                }
+        walk = walk->pNext;
+    }
+
+    // update pointers in variable handles whose dynamic type is VHT_WEAK_SHORT, VHT_WEAK_LONG or VHT_STRONG
+    TraceVariableHandlesBySingleThread(&ScanPointerForProfilerAndETW, lp1, 0, VHT_WEAK_SHORT | VHT_WEAK_LONG | VHT_STRONG, maxgen, maxgen, flags);
+}
+
+void Ref_ScanDependentHandlesForProfilerAndETW(uint32_t maxgen, ProfilingScanContext * SC)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC | LF_CORPROF, LL_INFO10000, "Scanning dependent handles for profiler.\n"));
+
+    uint32_t flags = HNDGCF_NORMAL;
+
+    uintptr_t lp1 = (uintptr_t)SC;
+    // we'll re-use pHeapId (which was either unused (0) or freed by EndRootReferences2
+    // (-1)), so reset it to NULL
+    _ASSERTE((*((size_t *)(&SC->pHeapId)) == (size_t)(-1)) ||
+             (*((size_t *)(&SC->pHeapId)) == (size_t)(0)));
+    SC->pHeapId = NULL;
+    TraceDependentHandlesBySingleThread(&ScanPointerForProfilerAndETW, lp1, maxgen, maxgen, flags);
+}
+
+#endif // defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
+
+// Callback to enumerate all object references held in handles.
+void CALLBACK ScanPointer(_UNCHECKED_OBJECTREF *pObjRef, uintptr_t *pExtraInfo, uintptr_t lp1, uintptr_t lp2)
+{
+    WRAPPER_NO_CONTRACT;
+    UNREFERENCED_PARAMETER(pExtraInfo);
+
+    Object **pRef = (Object **)pObjRef;
+    _ASSERTE(lp2);
+    promote_func* callback = (promote_func*)lp2;
+    callback(pRef, (ScanContext *)lp1, 0);
+}
+
+// Enumerate all object references held by any of the handle tables in the system.
+void Ref_ScanPointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    WRAPPER_NO_CONTRACT;
+
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+        HNDTYPE_STRONG,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+        HNDTYPE_PINNED,
+        HNDTYPE_ASYNCPINNED,
+        HNDTYPE_SIZEDREF,
+    };
+
+    uint32_t flags = HNDGCF_NORMAL;
+
+    // perform a multi-type scan that enumerates pointers
+    for (HandleTableMap * walk = &g_HandleTableMap; 
+         walk != nullptr; 
+         walk = walk->pNext)
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                // this is the one of Ref_* function performed by single thread in MULTI_HEAPS case, so we need to loop through all HT of the bucket
+                for (int uCPUindex = 0; uCPUindex < getNumberOfSlots(); uCPUindex++)
+                {
+                    HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                    if (hTable)
+                        HndScanHandlesForGC(hTable, &ScanPointer, uintptr_t(sc), uintptr_t(fn), types, _countof(types), condemned, maxgen, flags);
+                }
+            }
+        }
+    }
+
+    // enumerate pointers in variable handles whose dynamic type is VHT_WEAK_SHORT, VHT_WEAK_LONG or VHT_STRONG
+    TraceVariableHandlesBySingleThread(&ScanPointer, uintptr_t(sc), uintptr_t(fn), VHT_WEAK_SHORT | VHT_WEAK_LONG | VHT_STRONG, condemned, maxgen, flags);
+}
+
+void Ref_UpdatePinnedPointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Updating pointers to referents of pinning handles in generation %u\n", condemned));
+
+    // these are the handle types that need their pointers updated
+    uint32_t types[2] = {HNDTYPE_PINNED, HNDTYPE_ASYNCPINNED};
+    uint32_t flags = (sc->concurrent) ? HNDGCF_ASYNC : HNDGCF_NORMAL;
+
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                    HndScanHandlesForGC(hTable, UpdatePointerPinned, uintptr_t(sc), uintptr_t(fn), types, _countof(types), condemned, maxgen, flags); 
+            }
+        walk = walk->pNext;
+    }
+
+    // update pointers in variable handles whose dynamic type is VHT_PINNED
+    TraceVariableHandles(UpdatePointerPinned, uintptr_t(sc), uintptr_t(fn), VHT_PINNED, condemned, maxgen, flags);
+}
+
+
+void Ref_AgeHandles(uint32_t condemned, uint32_t maxgen, uintptr_t lp1)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Aging handles in generation %u\n", condemned));
+
+    // these are the handle types that need their ages updated
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+
+        HNDTYPE_STRONG,
+
+        HNDTYPE_PINNED,
+        HNDTYPE_VARIABLE,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+#ifdef FEATURE_COMINTEROP
+        HNDTYPE_WEAK_WINRT,
+#endif // FEATURE_COMINTEROP
+        HNDTYPE_ASYNCPINNED,
+        HNDTYPE_SIZEDREF,
+    };
+
+    int uCPUindex = getSlotNumber((ScanContext*) lp1);
+    // perform a multi-type scan that ages the handles
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                if (hTable)
+                    HndScanHandlesForGC(hTable, NULL, 0, 0, types, _countof(types), condemned, maxgen, HNDGCF_AGE);
+            }
+        walk = walk->pNext;
+    }
+}
+
+
+void Ref_RejuvenateHandles(uint32_t condemned, uint32_t maxgen, uintptr_t lp1)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Rejuvenating handles.\n"));
+
+    // these are the handle types that need their ages updated
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+
+
+        HNDTYPE_STRONG,
+
+        HNDTYPE_PINNED,
+        HNDTYPE_VARIABLE,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+#ifdef FEATURE_COMINTEROP
+        HNDTYPE_WEAK_WINRT,
+#endif // FEATURE_COMINTEROP
+        HNDTYPE_ASYNCPINNED,
+        HNDTYPE_SIZEDREF,
+    };
+
+    int uCPUindex = getSlotNumber((ScanContext*) lp1);
+    // reset the ages of these handles
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk) {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[uCPUindex];
+                if (hTable)
+                    HndResetAgeMap(hTable, types, _countof(types), condemned, maxgen, HNDGCF_NORMAL);
+            }
+        walk = walk->pNext;
+    }
+}
+
+void Ref_VerifyHandleTable(uint32_t condemned, uint32_t maxgen, ScanContext* sc)
+{
+    WRAPPER_NO_CONTRACT;
+
+    LOG((LF_GC, LL_INFO10000, "Verifying handles.\n"));
+
+    // these are the handle types that need to be verified
+    uint32_t types[] =
+    {
+        HNDTYPE_WEAK_SHORT,
+        HNDTYPE_WEAK_LONG,
+
+
+        HNDTYPE_STRONG,
+
+        HNDTYPE_PINNED,
+        HNDTYPE_VARIABLE,
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+        HNDTYPE_REFCOUNTED,
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+#ifdef FEATURE_COMINTEROP
+        HNDTYPE_WEAK_WINRT,
+#endif // FEATURE_COMINTEROP
+        HNDTYPE_ASYNCPINNED,
+        HNDTYPE_SIZEDREF,
+        HNDTYPE_DEPENDENT,
+    };
+
+    // verify these handles
+    HandleTableMap *walk = &g_HandleTableMap;
+    while (walk)
+    {
+        for (uint32_t i = 0; i < INITIAL_HANDLE_TABLE_ARRAY_SIZE; i ++)
+        {
+            if (walk->pBuckets[i] != NULL)
+            {
+                HHANDLETABLE hTable = walk->pBuckets[i]->pTable[getSlotNumber(sc)];
+                if (hTable)
+                    HndVerifyTable(hTable, types, _countof(types), condemned, maxgen, HNDGCF_NORMAL);
+            }
+        }
+        walk = walk->pNext;
+    }
+}
+
+int GetCurrentThreadHomeHeapNumber()
+{
+    WRAPPER_NO_CONTRACT;
+
+    if (!GCHeap::IsGCHeapInitialized())
+        return 0;
+    return GCHeap::GetGCHeap()->GetHomeHeapNumber();
+}
+
+bool HandleTableBucket::Contains(OBJECTHANDLE handle)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (NULL == handle)
+    {
+        return FALSE;
+    }
+    
+    HHANDLETABLE hTable = HndGetHandleTable(handle);
+    for (int uCPUindex=0; uCPUindex < GCHeap::GetGCHeap()->GetNumberOfHeaps(); uCPUindex++)
+    {
+        if (hTable == this->pTable[uCPUindex]) 
+        {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+void DestroySizedRefHandle(OBJECTHANDLE handle)
+{ 
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        SO_TOLERANT;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    HHANDLETABLE hTable = HndGetHandleTable(handle);
+    HndDestroyHandle(hTable , HNDTYPE_SIZEDREF, handle);
+    AppDomain* pDomain = SystemDomain::GetAppDomainAtIndex(HndGetHandleTableADIndex(hTable));
+    pDomain->DecNumSizedRefHandles();
+}
+
+#ifdef FEATURE_COMINTEROP
+
+void DestroyWinRTWeakHandle(OBJECTHANDLE handle)
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+        SO_TOLERANT;
+    }
+    CONTRACTL_END;
+
+    // Release the WinRT weak reference if we have one.  We're assuming that this will not reenter the
+    // runtime, since if we are pointing at a managed object, we should not be using a HNDTYPE_WEAK_WINRT
+    // but rather a HNDTYPE_WEAK_SHORT or HNDTYPE_WEAK_LONG.
+    IWeakReference* pWinRTWeakReference = reinterpret_cast<IWeakReference*>(HndGetHandleExtraInfo(handle));
+    if (pWinRTWeakReference != NULL)
+    {
+        pWinRTWeakReference->Release();
+    }
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_WINRT, handle);
+}
+
+#endif // FEATURE_COMINTEROP
+
+#endif // !DACCESS_COMPILE
+
+OBJECTREF GetDependentHandleSecondary(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return UNCHECKED_OBJECTREF_TO_OBJECTREF((_UNCHECKED_OBJECTREF)HndGetHandleExtraInfo(handle));
+}
diff --git a/src/gc/objecthandle.h b/src/gc/objecthandle.h
new file mode 100644
index 0000000000..89365267d6
--- /dev/null
+++ b/src/gc/objecthandle.h
@@ -0,0 +1,686 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+ * Wraps handle table to implement various handle types (Strong, Weak, etc.)
+ *
+
+ *
+ */
+
+#ifndef _OBJECTHANDLE_H
+#define _OBJECTHANDLE_H
+
+/*
+ * include handle manager declarations
+ */
+#include "handletable.h"
+
+#ifdef FEATURE_COMINTEROP
+#include <weakreference.h>
+#endif // FEATURE_COMINTEROP
+
+/*
+ * Convenience macros for accessing handles.  StoreFirstObjectInHandle is like
+ * StoreObjectInHandle, except it only succeeds if transitioning from NULL to
+ * non-NULL.  In other words, if this handle is being initialized for the first
+ * time.
+ */
+#define ObjectFromHandle(handle)                   HndFetchHandle(handle)
+#define StoreObjectInHandle(handle, object)        HndAssignHandle(handle, object)
+#define InterlockedCompareExchangeObjectInHandle(handle, object, oldObj)        HndInterlockedCompareExchangeHandle(handle, object, oldObj)
+#define StoreFirstObjectInHandle(handle, object)   HndFirstAssignHandle(handle, object)
+#define ObjectHandleIsNull(handle)                 HndIsNull(handle)
+#define IsHandleNullUnchecked(handle)              HndCheckForNullUnchecked(handle)
+
+
+/*
+ * HANDLES
+ *
+ * The default type of handle is a strong handle.
+ *
+ */
+#define HNDTYPE_DEFAULT                         HNDTYPE_STRONG
+
+
+/*
+ * WEAK HANDLES
+ *
+ * Weak handles are handles that track an object as long as it is alive,
+ * but do not keep the object alive if there are no strong references to it.
+ *
+ * The default type of weak handle is 'long-lived' weak handle.
+ *
+ */
+#define HNDTYPE_WEAK_DEFAULT                    HNDTYPE_WEAK_LONG
+
+
+/*
+ * SHORT-LIVED WEAK HANDLES
+ *
+ * Short-lived weak handles are weak handles that track an object until the
+ * first time it is detected to be unreachable.  At this point, the handle is
+ * severed, even if the object will be visible from a pending finalization
+ * graph.  This further implies that short weak handles do not track
+ * across object resurrections.
+ *
+ */
+#define HNDTYPE_WEAK_SHORT                      (0)
+
+
+/*
+ * LONG-LIVED WEAK HANDLES
+ *
+ * Long-lived weak handles are weak handles that track an object until the
+ * object is actually reclaimed.  Unlike short weak handles, long weak handles
+ * continue to track their referents through finalization and across any
+ * resurrections that may occur.
+ *
+ */
+#define HNDTYPE_WEAK_LONG                       (1)
+
+
+/*
+ * STRONG HANDLES
+ *
+ * Strong handles are handles which function like a normal object reference.
+ * The existence of a strong handle for an object will cause the object to
+ * be promoted (remain alive) through a garbage collection cycle.
+ *
+ */
+#define HNDTYPE_STRONG                          (2)
+
+
+/*
+ * PINNED HANDLES
+ *
+ * Pinned handles are strong handles which have the added property that they
+ * prevent an object from moving during a garbage collection cycle.  This is
+ * useful when passing a pointer to object innards out of the runtime while GC
+ * may be enabled.
+ *
+ * NOTE:  PINNING AN OBJECT IS EXPENSIVE AS IT PREVENTS THE GC FROM ACHIEVING
+ *        OPTIMAL PACKING OF OBJECTS DURING EPHEMERAL COLLECTIONS.  THIS TYPE
+ *        OF HANDLE SHOULD BE USED SPARINGLY!
+ */
+#define HNDTYPE_PINNED                          (3)
+
+
+/*
+ * VARIABLE HANDLES
+ *
+ * Variable handles are handles whose type can be changed dynamically.  They
+ * are larger than other types of handles, and are scanned a little more often,
+ * but are useful when the handle owner needs an efficient way to change the
+ * strength of a handle on the fly.
+ * 
+ */
+#define HNDTYPE_VARIABLE                        (4)
+
+#if defined(FEATURE_COMINTEROP) || defined(FEATURE_REDHAWK)
+/*
+ * REFCOUNTED HANDLES
+ *
+ * Refcounted handles are handles that behave as strong handles while the
+ * refcount on them is greater than 0 and behave as weak handles otherwise.
+ *
+ * N.B. These are currently NOT general purpose.
+ *      The implementation is tied to COM Interop.
+ *
+ */
+#define HNDTYPE_REFCOUNTED                      (5)
+#endif // FEATURE_COMINTEROP || FEATURE_REDHAWK
+
+
+/*
+ * DEPENDENT HANDLES
+ *
+ * Dependent handles are two handles that need to have the same lifetime.  One handle refers to a secondary object 
+ * that needs to have the same lifetime as the primary object. The secondary object should not cause the primary 
+ * object to be referenced, but as long as the primary object is alive, so must be the secondary
+ *
+ * They are currently used for EnC for adding new field members to existing instantiations under EnC modes where
+ * the primary object is the original instantiation and the secondary represents the added field.
+ *
+ * They are also used to implement the ConditionalWeakTable class in mscorlib.dll. If you want to use
+ * these from managed code, they are exposed to BCL through the managed DependentHandle class.
+ *
+ *
+ */
+#define HNDTYPE_DEPENDENT                            (6)
+
+/*
+ * PINNED HANDLES for asynchronous operation
+ *
+ * Pinned handles are strong handles which have the added property that they
+ * prevent an object from moving during a garbage collection cycle.  This is
+ * useful when passing a pointer to object innards out of the runtime while GC
+ * may be enabled.
+ *
+ * NOTE:  PINNING AN OBJECT IS EXPENSIVE AS IT PREVENTS THE GC FROM ACHIEVING
+ *        OPTIMAL PACKING OF OBJECTS DURING EPHEMERAL COLLECTIONS.  THIS TYPE
+ *        OF HANDLE SHOULD BE USED SPARINGLY!
+ */
+#define HNDTYPE_ASYNCPINNED                          (7)
+
+
+/*
+ * SIZEDREF HANDLES
+ *
+ * SizedRef handles are strong handles. Each handle has a piece of user data associated
+ * with it that stores the size of the object this handle refers to. These handles
+ * are scanned as strong roots during each GC but only during full GCs would the size
+ * be calculated.
+ *
+ */
+#define HNDTYPE_SIZEDREF                             (8)
+
+#ifdef FEATURE_COMINTEROP
+
+/*
+ * WINRT WEAK HANDLES
+ *
+ * WinRT weak reference handles hold two different types of weak handles to any
+ * RCW with an underlying COM object that implements IWeakReferenceSource.  The
+ * object reference itself is a short weak handle to the RCW.  In addition an
+ * IWeakReference* to the underlying COM object is stored, allowing the handle
+ * to create a new RCW if the existing RCW is collected.  This ensures that any
+ * code holding onto a WinRT weak reference can always access an RCW to the
+ * underlying COM object as long as it has not been released by all of its strong
+ * references.
+ */
+#define HNDTYPE_WEAK_WINRT                           (9)
+
+#endif // FEATURE_COMINTEROP
+
+typedef DPTR(struct HandleTableMap) PTR_HandleTableMap;
+typedef DPTR(struct HandleTableBucket) PTR_HandleTableBucket;
+typedef DPTR(PTR_HandleTableBucket) PTR_PTR_HandleTableBucket;
+
+struct HandleTableMap
+{
+    PTR_PTR_HandleTableBucket   pBuckets;
+    PTR_HandleTableMap          pNext;
+    uint32_t                    dwMaxIndex;
+};
+
+GVAL_DECL(HandleTableMap, g_HandleTableMap);
+
+#define INITIAL_HANDLE_TABLE_ARRAY_SIZE 10
+
+// struct containing g_SystemInfo.dwNumberOfProcessors HHANDLETABLEs and current table index
+// instead of just single HHANDLETABLE for on-fly balancing while adding handles on multiproc machines
+
+struct HandleTableBucket
+{
+    PTR_HHANDLETABLE pTable;
+    uint32_t         HandleTableIndex;
+
+    bool Contains(OBJECTHANDLE handle);
+};
+
+
+/*
+ * Type mask definitions for HNDTYPE_VARIABLE handles.
+ */
+#define VHT_WEAK_SHORT              (0x00000100)  // avoid using low byte so we don't overlap normal types
+#define VHT_WEAK_LONG               (0x00000200)  // avoid using low byte so we don't overlap normal types
+#define VHT_STRONG                  (0x00000400)  // avoid using low byte so we don't overlap normal types
+#define VHT_PINNED                  (0x00000800)  // avoid using low byte so we don't overlap normal types
+
+#define IS_VALID_VHT_VALUE(flag)   ((flag == VHT_WEAK_SHORT) || \
+                                    (flag == VHT_WEAK_LONG)  || \
+                                    (flag == VHT_STRONG)     || \
+                                    (flag == VHT_PINNED))
+
+#ifndef DACCESS_COMPILE
+/*
+ * Convenience macros and prototypes for the various handle types we define
+ */
+
+inline OBJECTHANDLE CreateTypedHandle(HHANDLETABLE table, OBJECTREF object, int type)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, type, object); 
+}
+
+inline void DestroyTypedHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandleOfUnknownType(HndGetHandleTable(handle), handle);
+}
+
+inline OBJECTHANDLE CreateHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_DEFAULT, object); 
+}
+
+inline void DestroyHandle(OBJECTHANDLE handle)
+{ 
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+        CAN_TAKE_LOCK;
+        SO_TOLERANT;
+    }
+    CONTRACTL_END;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_DEFAULT, handle);
+}
+
+inline OBJECTHANDLE CreateDuplicateHandle(OBJECTHANDLE handle) {
+    WRAPPER_NO_CONTRACT;
+
+    // Create a new STRONG handle in the same table as an existing handle.  
+    return HndCreateHandle(HndGetHandleTable(handle), HNDTYPE_DEFAULT, ObjectFromHandle(handle));
+}
+
+
+inline OBJECTHANDLE CreateWeakHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_WEAK_DEFAULT, object); 
+}
+
+inline void DestroyWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_DEFAULT, handle);
+}
+
+inline OBJECTHANDLE CreateShortWeakHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_WEAK_SHORT, object); 
+}
+
+inline void DestroyShortWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_SHORT, handle);
+}
+
+
+inline OBJECTHANDLE CreateLongWeakHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_WEAK_LONG, object); 
+}
+
+inline void DestroyLongWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_LONG, handle);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Holder<OBJECTHANDLE,DoNothing<OBJECTHANDLE>,DestroyLongWeakHandle> LongWeakHandleHolder;
+#endif
+
+inline OBJECTHANDLE CreateStrongHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_STRONG, object); 
+}
+
+inline void DestroyStrongHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_STRONG, handle);
+}
+
+inline OBJECTHANDLE CreatePinningHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_PINNED, object); 
+}
+
+inline void DestroyPinningHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_PINNED, handle);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Wrapper<OBJECTHANDLE, DoNothing<OBJECTHANDLE>, DestroyPinningHandle, NULL> PinningHandleHolder;
+#endif
+
+inline OBJECTHANDLE CreateAsyncPinningHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_ASYNCPINNED, object); 
+}
+
+inline void DestroyAsyncPinningHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_ASYNCPINNED, handle);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Wrapper<OBJECTHANDLE, DoNothing<OBJECTHANDLE>, DestroyAsyncPinningHandle, NULL> AsyncPinningHandleHolder;
+#endif
+
+inline OBJECTHANDLE CreateSizedRefHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_SIZEDREF, object, (uintptr_t)0);
+}
+
+void DestroySizedRefHandle(OBJECTHANDLE handle);
+
+#ifndef FEATURE_REDHAWK
+typedef Wrapper<OBJECTHANDLE, DoNothing<OBJECTHANDLE>, DestroySizedRefHandle, NULL> SizeRefHandleHolder;
+#endif
+
+#ifdef FEATURE_COMINTEROP
+inline OBJECTHANDLE CreateRefcountedHandle(HHANDLETABLE table, OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(table, HNDTYPE_REFCOUNTED, object); 
+}
+
+inline void DestroyRefcountedHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_REFCOUNTED, handle);
+}
+
+inline OBJECTHANDLE CreateWinRTWeakHandle(HHANDLETABLE table, OBJECTREF object, IWeakReference* pWinRTWeakReference)
+{
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(pWinRTWeakReference != NULL);
+    return HndCreateHandle(table, HNDTYPE_WEAK_WINRT, object, reinterpret_cast<uintptr_t>(pWinRTWeakReference));
+}
+
+void DestroyWinRTWeakHandle(OBJECTHANDLE handle);
+
+#endif // FEATURE_COMINTEROP
+
+#endif // !DACCESS_COMPILE
+
+OBJECTREF GetDependentHandleSecondary(OBJECTHANDLE handle);
+
+#ifndef DACCESS_COMPILE
+OBJECTHANDLE CreateDependentHandle(HHANDLETABLE table, OBJECTREF primary, OBJECTREF secondary);
+void SetDependentHandleSecondary(OBJECTHANDLE handle, OBJECTREF secondary);
+
+inline void DestroyDependentHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+	HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_DEPENDENT, handle);
+}
+#endif // !DACCESS_COMPILE
+
+#ifndef DACCESS_COMPILE
+
+OBJECTHANDLE CreateVariableHandle(HHANDLETABLE hTable, OBJECTREF object, uint32_t type);
+uint32_t     GetVariableHandleType(OBJECTHANDLE handle);
+void         UpdateVariableHandleType(OBJECTHANDLE handle, uint32_t type);
+uint32_t     CompareExchangeVariableHandleType(OBJECTHANDLE handle, uint32_t oldType, uint32_t newType);
+
+inline void  DestroyVariableHandle(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_VARIABLE, handle);
+}
+
+void GCHandleValidatePinnedObject(OBJECTREF obj);
+
+/*
+ * Holder for OBJECTHANDLE
+ */
+
+#ifndef FEATURE_REDHAWK
+typedef Wrapper<OBJECTHANDLE, DoNothing<OBJECTHANDLE>, DestroyHandle > OHWrapper;
+
+class OBJECTHANDLEHolder : public OHWrapper
+{
+public:
+    FORCEINLINE OBJECTHANDLEHolder(OBJECTHANDLE p = NULL) : OHWrapper(p)
+    {
+        LIMITED_METHOD_CONTRACT;
+    }
+    FORCEINLINE void operator=(OBJECTHANDLE p)
+    {
+        WRAPPER_NO_CONTRACT;
+
+        OHWrapper::operator=(p);
+    }
+};
+#endif
+
+#ifdef FEATURE_COMINTEROP
+
+typedef Wrapper<OBJECTHANDLE, DoNothing<OBJECTHANDLE>, DestroyRefcountedHandle> RefCountedOHWrapper;
+
+class RCOBJECTHANDLEHolder : public RefCountedOHWrapper
+{
+public:
+    FORCEINLINE RCOBJECTHANDLEHolder(OBJECTHANDLE p = NULL) : RefCountedOHWrapper(p)
+    {
+        LIMITED_METHOD_CONTRACT;
+    }
+    FORCEINLINE void operator=(OBJECTHANDLE p)
+    {
+        WRAPPER_NO_CONTRACT;
+
+        RefCountedOHWrapper::operator=(p);
+    }
+};
+
+#endif // FEATURE_COMINTEROP
+/*
+ * Convenience prototypes for using the global handles
+ */
+
+int GetCurrentThreadHomeHeapNumber();
+
+inline OBJECTHANDLE CreateGlobalTypedHandle(OBJECTREF object, int type)
+{ 
+    WRAPPER_NO_CONTRACT;
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], type, object); 
+}
+
+inline void DestroyGlobalTypedHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandleOfUnknownType(HndGetHandleTable(handle), handle);
+}
+
+inline OBJECTHANDLE CreateGlobalHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+    CONDITIONAL_CONTRACT_VIOLATION(ModeViolation, object == NULL);
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_DEFAULT, object); 
+}
+
+inline void DestroyGlobalHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_DEFAULT, handle);
+}
+
+inline OBJECTHANDLE CreateGlobalWeakHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_WEAK_DEFAULT, object); 
+}
+
+inline void DestroyGlobalWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_DEFAULT, handle);
+}
+
+inline OBJECTHANDLE CreateGlobalShortWeakHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+    CONDITIONAL_CONTRACT_VIOLATION(ModeViolation, object == NULL);
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_WEAK_SHORT, object);     
+}
+
+inline void DestroyGlobalShortWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_SHORT, handle);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Holder<OBJECTHANDLE,DoNothing<OBJECTHANDLE>,DestroyGlobalShortWeakHandle> GlobalShortWeakHandleHolder;
+#endif
+
+inline OBJECTHANDLE CreateGlobalLongWeakHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_WEAK_LONG, object); 
+}
+
+inline void DestroyGlobalLongWeakHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_WEAK_LONG, handle);
+}
+
+inline OBJECTHANDLE CreateGlobalStrongHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+    CONDITIONAL_CONTRACT_VIOLATION(ModeViolation, object == NULL);
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_STRONG, object); 
+}
+
+inline void DestroyGlobalStrongHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_STRONG, handle);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Holder<OBJECTHANDLE,DoNothing<OBJECTHANDLE>,DestroyGlobalStrongHandle> GlobalStrongHandleHolder;
+#endif
+
+inline OBJECTHANDLE CreateGlobalPinningHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_PINNED, object); 
+}
+
+inline void DestroyGlobalPinningHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_PINNED, handle);
+}
+
+#ifdef FEATURE_COMINTEROP
+inline OBJECTHANDLE CreateGlobalRefcountedHandle(OBJECTREF object)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    return HndCreateHandle(g_HandleTableMap.pBuckets[0]->pTable[GetCurrentThreadHomeHeapNumber()], HNDTYPE_REFCOUNTED, object); 
+}
+
+inline void DestroyGlobalRefcountedHandle(OBJECTHANDLE handle)
+{ 
+    WRAPPER_NO_CONTRACT;
+
+    HndDestroyHandle(HndGetHandleTable(handle), HNDTYPE_REFCOUNTED, handle);
+}
+#endif // FEATURE_COMINTEROP
+
+inline void ResetOBJECTHANDLE(OBJECTHANDLE handle)
+{
+    WRAPPER_NO_CONTRACT;
+
+    StoreObjectInHandle(handle, NULL);
+}
+
+#ifndef FEATURE_REDHAWK
+typedef Holder<OBJECTHANDLE,DoNothing<OBJECTHANDLE>,ResetOBJECTHANDLE> ObjectInHandleHolder;
+#endif
+
+/*
+ * Table maintenance routines
+ */
+bool Ref_Initialize();
+void Ref_Shutdown();
+HandleTableBucket *Ref_CreateHandleTableBucket(ADIndex uADIndex);
+BOOL Ref_HandleAsyncPinHandles();
+void Ref_RelocateAsyncPinHandles(HandleTableBucket *pSource, HandleTableBucket *pTarget);
+void Ref_RemoveHandleTableBucket(HandleTableBucket *pBucket);
+void Ref_DestroyHandleTableBucket(HandleTableBucket *pBucket);
+BOOL Ref_ContainHandle(HandleTableBucket *pBucket, OBJECTHANDLE handle);
+
+/*
+ * GC-time scanning entrypoints
+ */
+struct ScanContext;
+struct DhContext;
+struct ProfilingScanContext;
+void Ref_BeginSynchronousGC   (uint32_t uCondemnedGeneration, uint32_t uMaxGeneration);
+void Ref_EndSynchronousGC     (uint32_t uCondemnedGeneration, uint32_t uMaxGeneration);
+
+typedef void Ref_promote_func(class Object**, ScanContext*, uint32_t);
+
+void Ref_TraceRefCountHandles(HANDLESCANPROC callback, uintptr_t lParam1, uintptr_t lParam2);
+void Ref_TracePinningRoots(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+void Ref_TraceNormalRoots(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+void Ref_UpdatePointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+void Ref_UpdatePinnedPointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+DhContext *Ref_GetDependentHandleContext(ScanContext* sc);
+bool Ref_ScanDependentHandlesForPromotion(DhContext *pDhContext);
+void Ref_ScanDependentHandlesForClearing(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+void Ref_ScanDependentHandlesForRelocation(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+void Ref_ScanSizedRefHandles(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+#ifdef FEATURE_REDHAWK
+void Ref_ScanPointers(uint32_t condemned, uint32_t maxgen, ScanContext* sc, Ref_promote_func* fn);
+#endif
+
+void Ref_CheckReachable       (uint32_t uCondemnedGeneration, uint32_t uMaxGeneration, uintptr_t lp1);
+void Ref_CheckAlive           (uint32_t uCondemnedGeneration, uint32_t uMaxGeneration, uintptr_t lp1);
+void Ref_ScanPointersForProfilerAndETW(uint32_t uMaxGeneration, uintptr_t lp1);
+void Ref_ScanDependentHandlesForProfilerAndETW(uint32_t uMaxGeneration, ProfilingScanContext * SC);
+void Ref_AgeHandles           (uint32_t uCondemnedGeneration, uint32_t uMaxGeneration, uintptr_t lp1);
+void Ref_RejuvenateHandles(uint32_t uCondemnedGeneration, uint32_t uMaxGeneration, uintptr_t lp1);
+
+void Ref_VerifyHandleTable(uint32_t condemned, uint32_t maxgen, ScanContext* sc);
+
+#endif // DACCESS_COMPILE
+
+#endif //_OBJECTHANDLE_H
diff --git a/src/gc/sample/CMakeLists.txt b/src/gc/sample/CMakeLists.txt
new file mode 100644
index 0000000000..572fba371f
--- /dev/null
+++ b/src/gc/sample/CMakeLists.txt
@@ -0,0 +1,34 @@
+project(clrgcsample)
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+include_directories(..)
+include_directories(../env)
+
+set(SOURCES
+    GCSample.cpp
+    gcenv.ee.cpp
+    ../gccommon.cpp
+    ../gceewks.cpp
+    ../gcscan.cpp
+    ../gcwks.cpp
+    ../handletable.cpp
+    ../handletablecache.cpp
+    ../handletablecore.cpp
+    ../handletablescan.cpp
+    ../objecthandle.cpp
+    ../softwarewritewatch.cpp
+)
+
+if(WIN32)
+    list(APPEND SOURCES
+        gcenv.windows.cpp)
+    add_definitions(-DUNICODE=1)
+else()
+    list(APPEND SOURCES
+        gcenv.unix.cpp)
+endif()
+
+_add_executable(gcsample
+    ${SOURCES}
+)
diff --git a/src/gc/sample/GCSample.cpp b/src/gc/sample/GCSample.cpp
new file mode 100644
index 0000000000..7e07834ced
--- /dev/null
+++ b/src/gc/sample/GCSample.cpp
@@ -0,0 +1,242 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// GCSample.cpp
+//
+
+//
+//  This sample demonstrates:
+//
+//  * How to initialize GC without the rest of CoreCLR
+//  * How to create a type layout information in format that the GC expects
+//  * How to implement fast object allocator and write barrier 
+//  * How to allocate objects and work with GC handles
+//
+//  An important part of the sample is the GC environment (gcenv.*) that provides methods for GC to interact 
+//  with the OS and execution engine.
+//
+// The methods to interact with the OS should be no surprise - block memory allocation, synchronization primitives, etc.
+//
+// The important methods that the execution engine needs to provide to GC are:
+//
+// * Thread suspend/resume:
+//      static void SuspendEE(SUSPEND_REASON reason);
+//      static void RestartEE(bool bFinishedGC); //resume threads.
+//
+// * Enumeration of thread-local allocators:
+//      static void GcEnumAllocContexts (enum_alloc_context_func* fn, void* param);
+//
+// * Scanning of stack roots:
+//      static void GcScanRoots(promote_func* fn,  int condemned, int max_gen, ScanContext* sc);
+//
+//  The sample has trivial implementation for these methods. It is single threaded, and there are no stack roots to 
+//  be reported. There are number of other callbacks that GC calls to optionally allow the execution engine to do its 
+//  own bookkeeping.
+//
+//  For now, the sample GC environment has some cruft in it to decouple the GC from Windows and rest of CoreCLR. 
+//  It is something we would like to clean up.
+//
+
+#include "common.h"
+
+#include "gcenv.h"
+
+#include "gc.h"
+#include "objecthandle.h"
+
+#include "gcdesc.h"
+
+//
+// The fast paths for object allocation and write barriers is performance critical. They are often
+// hand written in assembly code, etc.
+//
+Object * AllocateObject(MethodTable * pMT)
+{
+    alloc_context * acontext = GetThread()->GetAllocContext();
+    Object * pObject;
+
+    size_t size = pMT->GetBaseSize();
+
+    uint8_t* result = acontext->alloc_ptr;
+    uint8_t* advance = result + size;
+    if (advance <= acontext->alloc_limit)
+    {
+        acontext->alloc_ptr = advance;
+        pObject = (Object *)result;
+    }
+    else
+    {
+        pObject = GCHeap::GetGCHeap()->Alloc(acontext, size, 0);
+        if (pObject == NULL)
+            return NULL;
+    }
+
+    pObject->RawSetMethodTable(pMT);
+
+    return pObject;
+}
+
+#if defined(BIT64)
+// Card byte shift is different on 64bit.
+#define card_byte_shift     11
+#else
+#define card_byte_shift     10
+#endif
+
+#define card_byte(addr) (((size_t)(addr)) >> card_byte_shift)
+
+inline void ErectWriteBarrier(Object ** dst, Object * ref)
+{
+    // if the dst is outside of the heap (unboxed value classes) then we
+    //      simply exit
+    if (((uint8_t*)dst < g_lowest_address) || ((uint8_t*)dst >= g_highest_address))
+        return;
+        
+    if((uint8_t*)ref >= g_ephemeral_low && (uint8_t*)ref < g_ephemeral_high)
+    {
+        // volatile is used here to prevent fetch of g_card_table from being reordered 
+        // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables.
+        uint8_t* pCardByte = (uint8_t *)*(volatile uint8_t **)(&g_card_table) + card_byte((uint8_t *)dst);
+        if(*pCardByte != 0xFF)
+            *pCardByte = 0xFF;
+    }
+}
+
+void WriteBarrier(Object ** dst, Object * ref)
+{
+    *dst = ref;
+    ErectWriteBarrier(dst, ref);
+}
+
+int __cdecl main(int argc, char* argv[])
+{
+    //
+    // Initialize system info
+    //
+    if (!GCToOSInterface::Initialize())
+    {
+        return -1;
+    }
+
+    // 
+    // Initialize free object methodtable. The GC uses a special array-like methodtable as placeholder
+    // for collected free space.
+    //
+    static MethodTable freeObjectMT;
+    freeObjectMT.InitializeFreeObject();
+    g_pFreeObjectMethodTable = &freeObjectMT;
+
+    //
+    // Initialize handle table
+    //
+    if (!Ref_Initialize())
+        return -1;
+
+    //
+    // Initialize GC heap
+    //
+    GCHeap *pGCHeap = GCHeap::CreateGCHeap();
+    if (!pGCHeap)
+        return -1;
+
+    if (FAILED(pGCHeap->Initialize()))
+        return -1;
+
+    //
+    // Initialize current thread
+    //
+    ThreadStore::AttachCurrentThread();
+
+    //
+    // Create a Methodtable with GCDesc
+    //
+
+    class My : Object {
+    public:
+        Object * m_pOther1;
+        int dummy_inbetween;
+        Object * m_pOther2;
+    };
+
+    static struct My_MethodTable
+    {
+        // GCDesc
+        CGCDescSeries m_series[2];
+        size_t m_numSeries;
+
+        // The actual methodtable
+        MethodTable m_MT;
+    }
+    My_MethodTable;
+
+    // 'My' contains the MethodTable*
+    uint32_t baseSize = sizeof(My);
+    // GC expects the size of ObjHeader (extra void*) to be included in the size.
+    baseSize = baseSize + sizeof(ObjHeader);
+    // Add padding as necessary. GC requires the object size to be at least MIN_OBJECT_SIZE.
+    My_MethodTable.m_MT.m_baseSize = max(baseSize, MIN_OBJECT_SIZE);
+
+    My_MethodTable.m_MT.m_componentSize = 0;    // Array component size
+    My_MethodTable.m_MT.m_flags = MTFlag_ContainsPointers;
+
+    My_MethodTable.m_numSeries = 2;
+
+    // The GC walks the series backwards. It expects the offsets to be sorted in descending order.
+    My_MethodTable.m_series[0].SetSeriesOffset(offsetof(My, m_pOther2));
+    My_MethodTable.m_series[0].SetSeriesCount(1);
+    My_MethodTable.m_series[0].seriessize -= My_MethodTable.m_MT.m_baseSize;
+
+    My_MethodTable.m_series[1].SetSeriesOffset(offsetof(My, m_pOther1));
+    My_MethodTable.m_series[1].SetSeriesCount(1);
+    My_MethodTable.m_series[1].seriessize -= My_MethodTable.m_MT.m_baseSize;
+
+    MethodTable * pMyMethodTable = &My_MethodTable.m_MT;
+
+    // Allocate instance of MyObject
+    Object * pObj = AllocateObject(pMyMethodTable);
+    if (pObj == NULL)
+        return -1;
+
+    // Create strong handle and store the object into it
+    OBJECTHANDLE oh = CreateGlobalHandle(pObj);
+    if (oh == NULL)
+        return -1;
+
+    for (int i = 0; i < 1000000; i++)
+    {
+        Object * pBefore = ((My *)ObjectFromHandle(oh))->m_pOther1;
+
+        // Allocate more instances of the same object
+        Object * p = AllocateObject(pMyMethodTable);
+        if (p == NULL)
+            return -1;
+
+        Object * pAfter = ((My *)ObjectFromHandle(oh))->m_pOther1;
+
+        // Uncomment this assert to see how GC triggered inside AllocateObject moved objects around
+        // assert(pBefore == pAfter);
+
+        // Store the newly allocated object into a field using WriteBarrier
+        WriteBarrier(&(((My *)ObjectFromHandle(oh))->m_pOther1), p);
+    }
+
+    // Create weak handle that points to our object
+    OBJECTHANDLE ohWeak = CreateGlobalWeakHandle(ObjectFromHandle(oh));
+    if (ohWeak == NULL)
+        return -1;
+
+    // Destroy the strong handle so that nothing will be keeping out object alive
+    DestroyGlobalHandle(oh);
+
+    // Explicitly trigger full GC
+    pGCHeap->GarbageCollect();
+
+    // Verify that the weak handle got cleared by the GC
+    assert(ObjectFromHandle(ohWeak) == NULL);
+
+    printf("Done\n");
+
+    return 0;
+}
diff --git a/src/gc/sample/GCSample.vcxproj b/src/gc/sample/GCSample.vcxproj
new file mode 100644
index 0000000000..b196e1f34c
--- /dev/null
+++ b/src/gc/sample/GCSample.vcxproj
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{58D6B7AE-0A12-49F0-BCF7-200ED8BA445A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>GCSample</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_X86_;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <PrecompiledHeaderFile>common.h</PrecompiledHeaderFile>
+      <AdditionalIncludeDirectories>.;..;..\env</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;_X86_;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>.;..;..\env</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="common.h" />
+    <ClInclude Include="gcenv.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="gcenv.ee.cpp" />
+    <ClCompile Include="gcenv.windows.cpp" />
+    <ClCompile Include="GCSample.cpp" />
+    <ClCompile Include="..\gccommon.cpp" />
+    <ClCompile Include="..\gceewks.cpp" />
+    <ClCompile Include="..\gcscan.cpp" />
+    <ClCompile Include="..\gcwks.cpp" />
+    <ClCompile Include="..\handletable.cpp" />
+    <ClCompile Include="..\handletablecache.cpp" />
+    <ClCompile Include="..\handletablecore.cpp" />
+    <ClCompile Include="..\handletablescan.cpp" />
+    <ClCompile Include="..\objecthandle.cpp" />
+    <ClCompile Include="..\env\common.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/src/gc/sample/GCSample.vcxproj.filters b/src/gc/sample/GCSample.vcxproj.filters
new file mode 100644
index 0000000000..e46c054565
--- /dev/null
+++ b/src/gc/sample/GCSample.vcxproj.filters
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="common.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="gcenv.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="GCSample.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\objecthandle.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\handletable.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\handletablecache.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\handletablescan.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\handletablecore.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\gcwks.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\gcscan.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\gceewks.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\gccommon.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\env\common.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="gcenv.ee.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="gcenv.windows.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/src/gc/sample/gcenv.ee.cpp b/src/gc/sample/gcenv.ee.cpp
new file mode 100644
index 0000000000..330564a380
--- /dev/null
+++ b/src/gc/sample/gcenv.ee.cpp
@@ -0,0 +1,298 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+
+#include "windows.h"
+
+#include "gcenv.h"
+#include "gc.h"
+
+EEConfig * g_pConfig;
+
+bool CLREventStatic::CreateManualEventNoThrow(bool bInitialState)
+{
+    m_hEvent = CreateEventW(NULL, TRUE, bInitialState, NULL);
+    m_fInitialized = true;
+
+    return IsValid();
+}
+
+bool CLREventStatic::CreateAutoEventNoThrow(bool bInitialState)
+{
+    m_hEvent = CreateEventW(NULL, FALSE, bInitialState, NULL);
+    m_fInitialized = true;
+
+    return IsValid();
+}
+
+bool CLREventStatic::CreateOSManualEventNoThrow(bool bInitialState)
+{
+    m_hEvent = CreateEventW(NULL, TRUE, bInitialState, NULL);
+    m_fInitialized = true;
+
+    return IsValid();
+}
+
+bool CLREventStatic::CreateOSAutoEventNoThrow(bool bInitialState)
+{
+    m_hEvent = CreateEventW(NULL, FALSE, bInitialState, NULL);
+    m_fInitialized = true;
+
+    return IsValid();
+}
+
+void CLREventStatic::CloseEvent()
+{
+    if (m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE)
+    {
+        CloseHandle(m_hEvent);
+        m_hEvent = INVALID_HANDLE_VALUE;
+    }
+}
+
+bool CLREventStatic::IsValid() const
+{
+    return m_fInitialized && m_hEvent != INVALID_HANDLE_VALUE;
+}
+
+bool CLREventStatic::Set()
+{
+    if (!m_fInitialized)
+        return false;
+    return !!SetEvent(m_hEvent);
+}
+
+bool CLREventStatic::Reset()
+{
+    if (!m_fInitialized)
+        return false;
+    return !!ResetEvent(m_hEvent);
+}
+
+uint32_t CLREventStatic::Wait(uint32_t dwMilliseconds, bool bAlertable)
+{
+    DWORD result = WAIT_FAILED;
+
+    if (m_fInitialized)
+    {
+        bool        disablePreemptive = false;
+        Thread *    pCurThread = GetThread();
+
+        if (NULL != pCurThread)
+        {
+            if (GCToEEInterface::IsPreemptiveGCDisabled(pCurThread))
+            {
+                GCToEEInterface::EnablePreemptiveGC(pCurThread);
+                disablePreemptive = true;
+            }
+        }
+
+        result = WaitForSingleObjectEx(m_hEvent, dwMilliseconds, bAlertable);
+
+        if (disablePreemptive)
+        {
+            GCToEEInterface::DisablePreemptiveGC(pCurThread);
+        }
+    }
+
+    return result;
+}
+
+__declspec(thread) Thread * pCurrentThread;
+
+Thread * GetThread()
+{
+    return pCurrentThread;
+}
+
+Thread * g_pThreadList = NULL;
+
+Thread * ThreadStore::GetThreadList(Thread * pThread)
+{
+    if (pThread == NULL)
+        return g_pThreadList;
+
+    return pThread->m_pNext;
+}
+
+void ThreadStore::AttachCurrentThread()
+{
+    // TODO: Locks
+
+    Thread * pThread = new Thread();
+    pThread->GetAllocContext()->init();
+    pCurrentThread = pThread;
+
+    pThread->m_pNext = g_pThreadList;
+    g_pThreadList = pThread;
+}
+
+void GCToEEInterface::SuspendEE(GCToEEInterface::SUSPEND_REASON reason)
+{
+    GCHeap::GetGCHeap()->SetGCInProgress(TRUE);
+
+    // TODO: Implement
+}
+
+void GCToEEInterface::RestartEE(bool bFinishedGC)
+{
+    // TODO: Implement
+
+    GCHeap::GetGCHeap()->SetGCInProgress(FALSE);
+}
+
+void GCToEEInterface::GcScanRoots(promote_func* fn,  int condemned, int max_gen, ScanContext* sc)
+{
+    // TODO: Implement - Scan stack roots on given thread
+}
+
+void GCToEEInterface::GcStartWork(int condemned, int max_gen)
+{
+}
+
+void GCToEEInterface::AfterGcScanRoots(int condemned, int max_gen, ScanContext* sc)
+{
+}
+
+void GCToEEInterface::GcBeforeBGCSweepWork()
+{
+}
+
+void GCToEEInterface::GcDone(int condemned)
+{
+}
+
+bool GCToEEInterface::RefCountedHandleCallbacks(Object * pObject)
+{
+    return false;
+}
+
+bool GCToEEInterface::IsPreemptiveGCDisabled(Thread * pThread)
+{
+    return pThread->PreemptiveGCDisabled();
+}
+
+void GCToEEInterface::EnablePreemptiveGC(Thread * pThread)
+{
+    return pThread->EnablePreemptiveGC();
+}
+
+void GCToEEInterface::DisablePreemptiveGC(Thread * pThread)
+{
+    pThread->DisablePreemptiveGC();
+}
+
+alloc_context * GCToEEInterface::GetAllocContext(Thread * pThread)
+{
+    return pThread->GetAllocContext();
+}
+
+bool GCToEEInterface::CatchAtSafePoint(Thread * pThread)
+{
+    return pThread->CatchAtSafePoint();
+}
+
+void GCToEEInterface::GcEnumAllocContexts (enum_alloc_context_func* fn, void* param)
+{
+    Thread * pThread = NULL;
+    while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
+    {
+        fn(pThread->GetAllocContext(), param);
+    }
+}
+
+void GCToEEInterface::SyncBlockCacheWeakPtrScan(HANDLESCANPROC /*scanProc*/, uintptr_t /*lp1*/, uintptr_t /*lp2*/)
+{
+}
+
+void GCToEEInterface::SyncBlockCacheDemote(int /*max_gen*/)
+{
+}
+
+void GCToEEInterface::SyncBlockCachePromotionsGranted(int /*max_gen*/)
+{
+}
+
+Thread* GCToEEInterface::CreateBackgroundThread(GCBackgroundThreadFunction threadStart, void* arg)
+{
+    // TODO: Implement for background GC
+    return NULL;
+}
+
+void FinalizerThread::EnableFinalization()
+{
+    // Signal to finalizer thread that there are objects to finalize
+    // TODO: Implement for finalization
+}
+
+bool FinalizerThread::HaveExtraWorkForFinalizer()
+{
+    return false;
+}
+
+bool IsGCSpecialThread()
+{
+    // TODO: Implement for background GC
+    return false;
+}
+
+void StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
+{
+}
+
+void StompWriteBarrierResize(bool /* isRuntimeSuspended */, bool /*bReqUpperBoundsCheck*/)
+{
+}
+
+bool IsGCThread()
+{
+    return false;
+}
+
+void SwitchToWriteWatchBarrier()
+{
+}
+
+void SwitchToNonWriteWatchBarrier()
+{
+}
+
+void LogSpewAlways(const char * /*fmt*/, ...)
+{
+}
+
+uint32_t CLRConfig::GetConfigValue(ConfigDWORDInfo eType)
+{
+    switch (eType)
+    {
+    case UNSUPPORTED_BGCSpinCount:
+        return 140;
+
+    case UNSUPPORTED_BGCSpin:
+        return 2;
+
+    case UNSUPPORTED_GCLogEnabled:
+    case UNSUPPORTED_GCLogFile:
+    case UNSUPPORTED_GCLogFileSize:
+    case EXTERNAL_GCStressStart:
+    case INTERNAL_GCStressStartAtJit:
+    case INTERNAL_DbgDACSkipVerifyDlls:
+        return 0;
+
+    case Config_COUNT:
+    default:
+#ifdef _MSC_VER
+#pragma warning(suppress:4127) // Constant conditional expression in ASSERT below
+#endif
+        ASSERT(!"Unknown config value type");
+        return 0;
+    }
+}
+
+HRESULT CLRConfig::GetConfigValue(ConfigStringInfo /*eType*/, TCHAR * * outVal)
+{
+    *outVal = NULL;
+    return 0;
+}
diff --git a/src/gc/sample/gcenv.h b/src/gc/sample/gcenv.h
new file mode 100644
index 0000000000..d560789751
--- /dev/null
+++ b/src/gc/sample/gcenv.h
@@ -0,0 +1,188 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_DEBUG)
+#ifndef _DEBUG_IMPL
+#define _DEBUG_IMPL 1
+#endif
+#define ASSERT(_expr) assert(_expr)
+#else
+#define ASSERT(_expr)
+#endif
+
+#ifndef _ASSERTE
+#define _ASSERTE(_expr) ASSERT(_expr)
+#endif
+
+#include "gcenv.structs.h"
+#include "gcenv.base.h"
+#include "gcenv.ee.h"
+#include "gcenv.os.h"
+#include "gcenv.interlocked.h"
+#include "gcenv.interlocked.inl"
+#include "gcenv.object.h"
+#include "gcenv.sync.h"
+
+#define MAX_LONGPATH 1024
+
+#ifdef _MSC_VER
+#define SUPPRESS_WARNING_4127   \
+    __pragma(warning(push))     \
+    __pragma(warning(disable:4127)) /* conditional expression is constant*/
+#define POP_WARNING_STATE       \
+    __pragma(warning(pop))
+#else // _MSC_VER
+#define SUPPRESS_WARNING_4127
+#define POP_WARNING_STATE
+#endif // _MSC_VER
+
+#define WHILE_0             \
+    SUPPRESS_WARNING_4127   \
+    while(0)                \
+    POP_WARNING_STATE       \
+
+#define LL_INFO10 4
+
+#define STRESS_LOG_VA(msg)                                              do { } WHILE_0
+#define STRESS_LOG0(facility, level, msg)                               do { } WHILE_0
+#define STRESS_LOG1(facility, level, msg, data1)                        do { } WHILE_0
+#define STRESS_LOG2(facility, level, msg, data1, data2)                 do { } WHILE_0
+#define STRESS_LOG3(facility, level, msg, data1, data2, data3)          do { } WHILE_0
+#define STRESS_LOG4(facility, level, msg, data1, data2, data3, data4)   do { } WHILE_0
+#define STRESS_LOG5(facility, level, msg, data1, data2, data3, data4, data5)   do { } WHILE_0
+#define STRESS_LOG6(facility, level, msg, data1, data2, data3, data4, data5, data6)   do { } WHILE_0
+#define STRESS_LOG7(facility, level, msg, data1, data2, data3, data4, data5, data6, data7)   do { } WHILE_0
+#define STRESS_LOG_PLUG_MOVE(plug_start, plug_end, plug_delta)          do { } WHILE_0
+#define STRESS_LOG_ROOT_PROMOTE(root_addr, objPtr, methodTable)         do { } WHILE_0
+#define STRESS_LOG_ROOT_RELOCATE(root_addr, old_value, new_value, methodTable) do { } WHILE_0
+#define STRESS_LOG_GC_START(gcCount, Gen, collectClasses)               do { } WHILE_0
+#define STRESS_LOG_GC_END(gcCount, Gen, collectClasses)                 do { } WHILE_0
+#define STRESS_LOG_OOM_STACK(size)   do { } while(0)
+#define STRESS_LOG_RESERVE_MEM(numChunks) do {} while (0)
+#define STRESS_LOG_GC_STACK
+
+#define LOG(x)
+
+//
+// Thread
+//
+
+struct alloc_context;
+
+class Thread
+{
+    uint32_t m_fPreemptiveGCDisabled;
+    uintptr_t m_alloc_context[16]; // Reserve enough space to fix allocation context
+
+    friend class ThreadStore;
+    Thread * m_pNext;
+
+public:
+    Thread()
+    {
+    }
+
+    bool PreemptiveGCDisabled()
+    {
+        return !!m_fPreemptiveGCDisabled;
+    }
+
+    void EnablePreemptiveGC()
+    {
+        m_fPreemptiveGCDisabled = false;
+    }
+
+    void DisablePreemptiveGC()
+    {
+        m_fPreemptiveGCDisabled = true;
+    }
+
+    alloc_context* GetAllocContext()
+    {
+        return (alloc_context *)&m_alloc_context;
+    }
+
+    void SetGCSpecial(bool fGCSpecial)
+    {
+    }
+
+    bool CatchAtSafePoint()
+    {
+        // This is only called by the GC on a background GC worker thread that's explicitly interested in letting
+        // a foreground GC proceed at that point. So it's always safe to return true.
+        return true;
+    }
+};
+
+Thread * GetThread();
+
+class ThreadStore
+{
+public:
+    static Thread * GetThreadList(Thread * pThread);
+
+    static void AttachCurrentThread();
+};
+
+// -----------------------------------------------------------------------------------------------------------
+// Config file enumulation
+//
+
+class EEConfig
+{
+public:
+    enum HeapVerifyFlags {
+        HEAPVERIFY_NONE = 0,
+        HEAPVERIFY_GC = 1,   // Verify the heap at beginning and end of GC
+        HEAPVERIFY_BARRIERCHECK = 2,   // Verify the brick table
+        HEAPVERIFY_SYNCBLK = 4,   // Verify sync block scanning
+
+                                  // the following options can be used to mitigate some of the overhead introduced
+                                  // by heap verification.  some options might cause heap verifiction to be less
+                                  // effective depending on the scenario.
+
+        HEAPVERIFY_NO_RANGE_CHECKS = 0x10,   // Excludes checking if an OBJECTREF is within the bounds of the managed heap
+        HEAPVERIFY_NO_MEM_FILL = 0x20,   // Excludes filling unused segment portions with fill pattern
+        HEAPVERIFY_POST_GC_ONLY = 0x40,   // Performs heap verification post-GCs only (instead of before and after each GC)
+        HEAPVERIFY_DEEP_ON_COMPACT = 0x80    // Performs deep object verfication only on compacting GCs.
+    };
+
+    enum  GCStressFlags {
+        GCSTRESS_NONE = 0,
+        GCSTRESS_ALLOC = 1,    // GC on all allocs and 'easy' places
+        GCSTRESS_TRANSITION = 2,    // GC on transitions to preemptive GC
+        GCSTRESS_INSTR_JIT = 4,    // GC on every allowable JITed instr
+        GCSTRESS_INSTR_NGEN = 8,    // GC on every allowable NGEN instr
+        GCSTRESS_UNIQUE = 16,   // GC only on a unique stack trace
+    };
+
+    int     GetHeapVerifyLevel() { return 0; }
+    bool    IsHeapVerifyEnabled() { return GetHeapVerifyLevel() != 0; }
+
+    GCStressFlags GetGCStressLevel()        const { return GCSTRESS_NONE; }
+    bool    IsGCStressMix()                 const { return false; }
+
+    int     GetGCtraceStart()               const { return 0; }
+    int     GetGCtraceEnd()               const { return 0; }//1000000000; }
+    int     GetGCtraceFac()               const { return 0; }
+    int     GetGCprnLvl()               const { return 0; }
+    bool    IsGCBreakOnOOMEnabled()         const { return false; }
+    int     GetGCgen0size()               const { return 0; }
+    int     GetSegmentSize()               const { return 0; }
+    int     GetGCconcurrent()               const { return 1; }
+    int     GetGCLatencyMode()              const { return 1; }
+    int     GetGCForceCompact()             const { return 0; }
+    int     GetGCRetainVM()                const { return 0; }
+    int     GetGCTrimCommit()               const { return 0; }
+    int     GetGCLOHCompactionMode()        const { return 0; }
+
+    bool    GetGCAllowVeryLargeObjects()   const { return false; }
+
+    bool    GetGCConservative()             const { return true; }
+};
+
+extern EEConfig * g_pConfig;
+
+#include "etmdummy.h"
+#define ETW_EVENT_ENABLED(e,f) false
diff --git a/src/gc/sample/gcenv.unix.cpp b/src/gc/sample/gcenv.unix.cpp
new file mode 100644
index 0000000000..a5e9e83ee2
--- /dev/null
+++ b/src/gc/sample/gcenv.unix.cpp
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Implementation of the GC environment
+//
+
+#include "common.h"
+
+#include "gcenv.h"
+#include "gc.h"
+
+// TODO: Implement
diff --git a/src/gc/sample/gcenv.windows.cpp b/src/gc/sample/gcenv.windows.cpp
new file mode 100644
index 0000000000..76187f2185
--- /dev/null
+++ b/src/gc/sample/gcenv.windows.cpp
@@ -0,0 +1,453 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Implementation of the GC environment
+//
+
+#include "common.h"
+
+#include "windows.h"
+
+#include "gcenv.h"
+#include "gc.h"
+
+MethodTable * g_pFreeObjectMethodTable;
+
+int32_t g_TrapReturningThreads;
+
+bool g_fFinalizerRunOnShutDown;
+
+GCSystemInfo g_SystemInfo;
+
+static LARGE_INTEGER g_performanceFrequency;
+
+// Initialize the interface implementation
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::Initialize()
+{
+    if (!::QueryPerformanceFrequency(&g_performanceFrequency))
+    {
+        return false;
+    }
+
+    SYSTEM_INFO systemInfo;
+    GetSystemInfo(&systemInfo);
+
+    g_SystemInfo.dwNumberOfProcessors = systemInfo.dwNumberOfProcessors;
+    g_SystemInfo.dwPageSize = systemInfo.dwPageSize;
+    g_SystemInfo.dwAllocationGranularity = systemInfo.dwAllocationGranularity;
+
+    return true;
+}
+
+// Shutdown the interface implementation
+void GCToOSInterface::Shutdown()
+{
+}
+
+// Get numeric id of the current thread if possible on the
+// current platform. It is indended for logging purposes only.
+// Return:
+//  Numeric id of the current thread or 0 if the 
+uint64_t GCToOSInterface::GetCurrentThreadIdForLogging()
+{
+    return ::GetCurrentThreadId();
+}
+
+// Get id of the process
+// Return:
+//  Id of the current process
+uint32_t GCToOSInterface::GetCurrentProcessId()
+{
+    return ::GetCurrentProcessId();
+}
+
+// Set ideal affinity for the current thread
+// Parameters:
+//  affinity - ideal processor affinity for the thread
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::SetCurrentThreadIdealAffinity(GCThreadAffinity* affinity)
+{
+    bool success = true;
+
+#if !defined(FEATURE_CORESYSTEM)
+    SetThreadIdealProcessor(GetCurrentThread(), (DWORD)affinity->Processor);
+#else
+    PROCESSOR_NUMBER proc;
+
+    if (affinity->Group != -1)
+    {
+        proc.Group = (WORD)affinity->Group;
+        proc.Number = (BYTE)affinity->Processor;
+        proc.Reserved = 0;
+
+        success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
+    }
+    else
+    {
+        if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
+        {
+            proc.Number = affinity->Processor;
+            success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
+        }
+    }
+#endif
+
+    return success;
+}
+
+// Get the number of the current processor
+uint32_t GCToOSInterface::GetCurrentProcessorNumber()
+{
+    _ASSERTE(GCToOSInterface::CanGetCurrentProcessorNumber());
+    return ::GetCurrentProcessorNumber();
+}
+
+// Check if the OS supports getting current processor number
+bool GCToOSInterface::CanGetCurrentProcessorNumber()
+{
+    return true;
+}
+
+// Flush write buffers of processors that are executing threads of the current process
+void GCToOSInterface::FlushProcessWriteBuffers()
+{
+    ::FlushProcessWriteBuffers();
+}
+
+// Break into a debugger
+void GCToOSInterface::DebugBreak()
+{
+    ::DebugBreak();
+}
+
+// Get number of logical processors
+uint32_t GCToOSInterface::GetLogicalCpuCount()
+{
+    return g_SystemInfo.dwNumberOfProcessors;
+}
+
+// Causes the calling thread to sleep for the specified number of milliseconds
+// Parameters:
+//  sleepMSec   - time to sleep before switching to another thread
+void GCToOSInterface::Sleep(uint32_t sleepMSec)
+{
+    ::Sleep(sleepMSec);
+}
+
+// Causes the calling thread to yield execution to another thread that is ready to run on the current processor.
+// Parameters:
+//  switchCount - number of times the YieldThread was called in a loop
+void GCToOSInterface::YieldThread(uint32_t switchCount)
+{
+    SwitchToThread();
+}
+
+// Reserve virtual memory range.
+// Parameters:
+//  address   - starting virtual address, it can be NULL to let the function choose the starting address
+//  size      - size of the virtual memory range
+//  alignment - requested memory alignment
+//  flags     - flags to control special settings like write watching
+// Return:
+//  Starting virtual address of the reserved range
+void* GCToOSInterface::VirtualReserve(void* address, size_t size, size_t alignment, uint32_t flags)
+{
+    DWORD memFlags = (flags & VirtualReserveFlags::WriteWatch) ? (MEM_RESERVE | MEM_WRITE_WATCH) : MEM_RESERVE;
+    return ::VirtualAlloc(0, size, memFlags, PAGE_READWRITE);
+}
+
+// Release virtual memory range previously reserved using VirtualReserve
+// Parameters:
+//  address - starting virtual address
+//  size    - size of the virtual memory range
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::VirtualRelease(void* address, size_t size)
+{
+    UNREFERENCED_PARAMETER(size);
+    return !!::VirtualFree(address, 0, MEM_RELEASE);
+}
+
+// Commit virtual memory range. It must be part of a range reserved using VirtualReserve.
+// Parameters:
+//  address - starting virtual address
+//  size    - size of the virtual memory range
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::VirtualCommit(void* address, size_t size)
+{
+    return ::VirtualAlloc(address, size, MEM_COMMIT, PAGE_READWRITE) != NULL;
+}
+
+// Decomit virtual memory range.
+// Parameters:
+//  address - starting virtual address
+//  size    - size of the virtual memory range
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::VirtualDecommit(void* address, size_t size)
+{
+    return !!::VirtualFree(address, size, MEM_DECOMMIT);
+}
+
+// Reset virtual memory range. Indicates that data in the memory range specified by address and size is no 
+// longer of interest, but it should not be decommitted.
+// Parameters:
+//  address - starting virtual address
+//  size    - size of the virtual memory range
+//  unlock  - true if the memory range should also be unlocked
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::VirtualReset(void * address, size_t size, bool unlock)
+{
+    bool success = ::VirtualAlloc(address, size, MEM_RESET, PAGE_READWRITE) != NULL;
+    if (success && unlock)
+    {
+        // Remove the page range from the working set
+        ::VirtualUnlock(address, size);
+    }
+
+    return success;
+}
+
+// Check if the OS supports write watching
+bool GCToOSInterface::SupportsWriteWatch()
+{
+    return false;
+}
+
+// Reset the write tracking state for the specified virtual memory range.
+// Parameters:
+//  address - starting virtual address
+//  size    - size of the virtual memory range
+void GCToOSInterface::ResetWriteWatch(void* address, size_t size)
+{
+}
+
+// Retrieve addresses of the pages that are written to in a region of virtual memory
+// Parameters:
+//  resetState         - true indicates to reset the write tracking state
+//  address            - starting virtual address
+//  size               - size of the virtual memory range
+//  pageAddresses      - buffer that receives an array of page addresses in the memory region
+//  pageAddressesCount - on input, size of the lpAddresses array, in array elements
+//                       on output, the number of page addresses that are returned in the array.
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size, void** pageAddresses, uintptr_t* pageAddressesCount)
+{
+    return false;
+}
+
+// Get size of the largest cache on the processor die
+// Parameters:
+//  trueSize - true to return true cache size, false to return scaled up size based on
+//             the processor architecture
+// Return:
+//  Size of the cache
+size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
+{
+    // TODO: implement
+    return 0;
+}
+
+// Get affinity mask of the current process
+// Parameters:
+//  processMask - affinity mask for the specified process
+//  systemMask  - affinity mask for the system
+// Return:
+//  true if it has succeeded, false if it has failed
+// Remarks:
+//  A process affinity mask is a bit vector in which each bit represents the processors that
+//  a process is allowed to run on. A system affinity mask is a bit vector in which each bit
+//  represents the processors that are configured into a system.
+//  A process affinity mask is a subset of the system affinity mask. A process is only allowed
+//  to run on the processors configured into a system. Therefore, the process affinity mask cannot
+//  specify a 1 bit for a processor when the system affinity mask specifies a 0 bit for that processor.
+bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uintptr_t* systemMask)
+{
+    return false;
+}
+
+// Get number of processors assigned to the current process
+// Return:
+//  The number of processors
+uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
+{
+    return g_SystemInfo.dwNumberOfProcessors;
+}
+
+// Return the size of the user-mode portion of the virtual address space of this process.
+// Return:
+//  non zero if it has succeeded, 0 if it has failed
+size_t GCToOSInterface::GetVirtualMemoryLimit()
+{
+    MEMORYSTATUSEX memStatus;
+
+    memStatus.dwLength = sizeof(MEMORYSTATUSEX);
+    BOOL fRet = GlobalMemoryStatusEx(&memStatus);
+    _ASSERTE(fRet);
+
+    return (size_t)memStatus.ullTotalVirtual;
+}
+
+// Get the physical memory that this process can use.
+// Return:
+//  non zero if it has succeeded, 0 if it has failed
+uint64_t GCToOSInterface::GetPhysicalMemoryLimit()
+{
+    MEMORYSTATUSEX memStatus;
+
+    memStatus.dwLength = sizeof(MEMORYSTATUSEX);
+    BOOL fRet = GlobalMemoryStatusEx(&memStatus);
+    _ASSERTE(fRet);
+
+    return memStatus.ullTotalPhys;
+}
+
+// Get memory status
+// Parameters:
+//  memory_load - A number between 0 and 100 that specifies the approximate percentage of physical memory
+//      that is in use (0 indicates no memory use and 100 indicates full memory use).
+//  available_physical - The amount of physical memory currently available, in bytes.
+//  available_page_file - The maximum amount of memory the current process can commit, in bytes.
+void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available_physical, uint64_t* available_page_file)
+{
+    MEMORYSTATUSEX memStatus;
+
+    memStatus.dwLength = sizeof(MEMORYSTATUSEX);
+    BOOL fRet = GlobalMemoryStatusEx(&memStatus);
+    _ASSERTE (fRet);
+
+    // If the machine has more RAM than virtual address limit, let us cap it.
+    // The GC can never use more than virtual address limit.
+    if (memStatus.ullAvailPhys > memStatus.ullTotalVirtual)
+    {
+        memStatus.ullAvailPhys = memStatus.ullAvailVirtual;
+    }
+
+    if (memory_load != NULL)
+        *memory_load = memStatus.dwMemoryLoad;
+    if (available_physical != NULL)
+        *available_physical = memStatus.ullAvailPhys;
+    if (available_page_file != NULL)
+        *available_page_file = memStatus.ullAvailPageFile;
+}
+
+// Get a high precision performance counter
+// Return:
+//  The counter value
+int64_t GCToOSInterface::QueryPerformanceCounter()
+{
+    LARGE_INTEGER ts;
+    if (!::QueryPerformanceCounter(&ts))
+    {
+        _ASSERTE(!"Fatal Error - cannot query performance counter.");
+        abort();
+    }
+
+    return ts.QuadPart;
+}
+
+// Get a frequency of the high precision performance counter
+// Return:
+//  The counter frequency
+int64_t GCToOSInterface::QueryPerformanceFrequency()
+{
+    return g_performanceFrequency.QuadPart;
+}
+
+// Get a time stamp with a low precision
+// Return:
+//  Time stamp in milliseconds
+uint32_t GCToOSInterface::GetLowPrecisionTimeStamp()
+{
+    return ::GetTickCount();
+}
+
+// Parameters of the GC thread stub
+struct GCThreadStubParam
+{
+    GCThreadFunction GCThreadFunction;
+    void* GCThreadParam;
+};
+
+// GC thread stub to convert GC thread function to an OS specific thread function
+static DWORD __stdcall GCThreadStub(void* param)
+{
+    GCThreadStubParam *stubParam = (GCThreadStubParam*)param;
+    GCThreadFunction function = stubParam->GCThreadFunction;
+    void* threadParam = stubParam->GCThreadParam;
+
+    delete stubParam;
+
+    function(threadParam);
+
+    return 0;
+}
+
+// Create a new thread
+// Parameters:
+//  function - the function to be executed by the thread
+//  param    - parameters of the thread
+//  affinity - processor affinity of the thread
+// Return:
+//  true if it has succeeded, false if it has failed
+bool GCToOSInterface::CreateThread(GCThreadFunction function, void* param, GCThreadAffinity* affinity)
+{
+    DWORD thread_id;
+
+    GCThreadStubParam* stubParam = new (nothrow) GCThreadStubParam();
+    if (stubParam == NULL)
+    {
+        return false;
+    }
+
+    stubParam->GCThreadFunction = function;
+    stubParam->GCThreadParam = param;
+
+    HANDLE gc_thread = ::CreateThread(NULL, 0, GCThreadStub, stubParam, CREATE_SUSPENDED, &thread_id);
+
+    if (!gc_thread)
+    {
+        delete stubParam;
+        return false;
+    }
+
+    SetThreadPriority(gc_thread, /* THREAD_PRIORITY_ABOVE_NORMAL );*/ THREAD_PRIORITY_HIGHEST );
+
+    ResumeThread(gc_thread);
+
+    CloseHandle(gc_thread);
+
+    return true;
+}
+
+// Initialize the critical section
+void CLRCriticalSection::Initialize()
+{
+    ::InitializeCriticalSection(&m_cs);
+}
+
+// Destroy the critical section
+void CLRCriticalSection::Destroy()
+{
+    ::DeleteCriticalSection(&m_cs);
+}
+
+// Enter the critical section. Blocks until the section can be entered.
+void CLRCriticalSection::Enter()
+{
+    ::EnterCriticalSection(&m_cs);
+}
+
+// Leave the critical section
+void CLRCriticalSection::Leave()
+{
+    ::LeaveCriticalSection(&m_cs);
+}
diff --git a/src/gc/softwarewritewatch.cpp b/src/gc/softwarewritewatch.cpp
new file mode 100644
index 0000000000..519744900b
--- /dev/null
+++ b/src/gc/softwarewritewatch.cpp
@@ -0,0 +1,242 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "common.h"
+#include "softwarewritewatch.h"
+
+#include "gcenv.h"
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifndef DACCESS_COMPILE
+
+static_assert((static_cast<size_t>(1) << SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift) == OS_PAGE_SIZE, "Unexpected OS_PAGE_SIZE");
+
+extern "C"
+{
+    uint8_t *g_sw_ww_table = nullptr;
+    bool g_sw_ww_enabled_for_gc_heap = false;
+}
+
+void SoftwareWriteWatch::StaticClose()
+{
+    if (GetTable() == nullptr)
+    {
+        return;
+    }
+
+    g_sw_ww_enabled_for_gc_heap = false;
+    g_sw_ww_table = nullptr;
+}
+
+bool SoftwareWriteWatch::GetDirtyFromBlock(
+    uint8_t *block,
+    uint8_t *firstPageAddressInBlock,
+    size_t startByteIndex,
+    size_t endByteIndex,
+    void **dirtyPages,
+    size_t *dirtyPageIndexRef,
+    size_t dirtyPageCount,
+    bool clearDirty)
+{
+    assert(block != nullptr);
+    assert(ALIGN_DOWN(block, sizeof(size_t)) == block);
+    assert(firstPageAddressInBlock == reinterpret_cast<uint8_t *>(GetPageAddress(block - GetTable())));
+    assert(startByteIndex < endByteIndex);
+    assert(endByteIndex <= sizeof(size_t));
+    assert(dirtyPages != nullptr);
+    assert(dirtyPageIndexRef != nullptr);
+
+    size_t &dirtyPageIndex = *dirtyPageIndexRef;
+    assert(dirtyPageIndex < dirtyPageCount);
+
+    size_t dirtyBytes = *reinterpret_cast<size_t *>(block);
+    if (dirtyBytes == 0)
+    {
+        return true;
+    }
+
+    if (startByteIndex != 0)
+    {
+        size_t numLowBitsToClear = startByteIndex * 8;
+        dirtyBytes >>= numLowBitsToClear;
+        dirtyBytes <<= numLowBitsToClear;
+    }
+    if (endByteIndex != sizeof(size_t))
+    {
+        size_t numHighBitsToClear = (sizeof(size_t) - endByteIndex) * 8;
+        dirtyBytes <<= numHighBitsToClear;
+        dirtyBytes >>= numHighBitsToClear;
+    }
+
+    while (dirtyBytes != 0)
+    {
+        DWORD bitIndex;
+        static_assert(sizeof(size_t) <= 8, "Unexpected sizeof(size_t)");
+        if (sizeof(size_t) == 8)
+        {
+            BitScanForward64(&bitIndex, static_cast<DWORD64>(dirtyBytes));
+        }
+        else
+        {
+            BitScanForward(&bitIndex, static_cast<DWORD>(dirtyBytes));
+        }
+
+        // Each byte is only ever set to 0 or 0xff
+        assert(bitIndex % 8 == 0);
+        size_t byteMask = static_cast<size_t>(0xff) << bitIndex;
+        assert((dirtyBytes & byteMask) == byteMask);
+        dirtyBytes ^= byteMask;
+
+        DWORD byteIndex = bitIndex / 8;
+        if (clearDirty)
+        {
+            // Clear only the bytes for which pages are recorded as dirty
+            block[byteIndex] = 0;
+        }
+
+        void *pageAddress = firstPageAddressInBlock + byteIndex * OS_PAGE_SIZE;
+        assert(pageAddress >= GetHeapStartAddress());
+        assert(pageAddress < GetHeapEndAddress());
+        assert(dirtyPageIndex < dirtyPageCount);
+        dirtyPages[dirtyPageIndex] = pageAddress;
+        ++dirtyPageIndex;
+        if (dirtyPageIndex == dirtyPageCount)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+void SoftwareWriteWatch::GetDirty(
+    void *baseAddress,
+    size_t regionByteSize,
+    void **dirtyPages,
+    size_t *dirtyPageCountRef,
+    bool clearDirty,
+    bool isRuntimeSuspended)
+{
+    VerifyCreated();
+    VerifyMemoryRegion(baseAddress, regionByteSize);
+    assert(dirtyPages != nullptr);
+    assert(dirtyPageCountRef != nullptr);
+
+    size_t dirtyPageCount = *dirtyPageCountRef;
+    if (dirtyPageCount == 0)
+    {
+        return;
+    }
+
+    if (!isRuntimeSuspended)
+    {
+        // When a page is marked as dirty, a memory barrier is not issued after the write most of the time. Issue a memory
+        // barrier on all active threads of the process now to make recent changes to dirty state visible to this thread.
+        GCToOSInterface::FlushProcessWriteBuffers();
+    }
+
+    uint8_t *tableRegionStart;
+    size_t tableRegionByteSize;
+    TranslateToTableRegion(baseAddress, regionByteSize, &tableRegionStart, &tableRegionByteSize);
+    uint8_t *tableRegionEnd = tableRegionStart + tableRegionByteSize;
+
+    uint8_t *blockStart = ALIGN_DOWN(tableRegionStart, sizeof(size_t));
+    assert(blockStart >= GetUntranslatedTable());
+    uint8_t *blockEnd = ALIGN_UP(tableRegionEnd, sizeof(size_t));
+    assert(blockEnd <= GetUntranslatedTableEnd());
+    uint8_t *fullBlockEnd = ALIGN_DOWN(tableRegionEnd, sizeof(size_t));
+
+    size_t dirtyPageIndex = 0;
+    uint8_t *currentBlock = blockStart;
+    uint8_t *firstPageAddressInCurrentBlock = reinterpret_cast<uint8_t *>(GetPageAddress(currentBlock - GetTable()));
+
+    do
+    {
+        if (blockStart == fullBlockEnd)
+        {
+            if (GetDirtyFromBlock(
+                    currentBlock,
+                    firstPageAddressInCurrentBlock,
+                    tableRegionStart - blockStart,
+                    tableRegionEnd - fullBlockEnd,
+                    dirtyPages,
+                    &dirtyPageIndex,
+                    dirtyPageCount,
+                    clearDirty))
+            {
+                *dirtyPageCountRef = dirtyPageIndex;
+            }
+            break;
+        }
+
+        if (tableRegionStart != blockStart)
+        {
+            if (!GetDirtyFromBlock(
+                    currentBlock,
+                    firstPageAddressInCurrentBlock,
+                    tableRegionStart - blockStart,
+                    sizeof(size_t),
+                    dirtyPages,
+                    &dirtyPageIndex,
+                    dirtyPageCount,
+                    clearDirty))
+            {
+                break;
+            }
+            currentBlock += sizeof(size_t);
+            firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE;
+        }
+
+        while (currentBlock < fullBlockEnd)
+        {
+            if (!GetDirtyFromBlock(
+                    currentBlock,
+                    firstPageAddressInCurrentBlock,
+                    0,
+                    sizeof(size_t),
+                    dirtyPages,
+                    &dirtyPageIndex,
+                    dirtyPageCount,
+                    clearDirty))
+            {
+                break;
+            }
+            currentBlock += sizeof(size_t);
+            firstPageAddressInCurrentBlock += sizeof(size_t) * OS_PAGE_SIZE;
+        }
+        if (currentBlock < fullBlockEnd)
+        {
+            break;
+        }
+
+        if (tableRegionEnd != fullBlockEnd &&
+            !GetDirtyFromBlock(
+                currentBlock,
+                firstPageAddressInCurrentBlock,
+                0,
+                tableRegionEnd - fullBlockEnd,
+                dirtyPages,
+                &dirtyPageIndex,
+                dirtyPageCount,
+                clearDirty))
+        {
+            break;
+        }
+
+        *dirtyPageCountRef = dirtyPageIndex;
+    } while (false);
+
+    if (!isRuntimeSuspended && clearDirty && dirtyPageIndex != 0)
+    {
+        // When dirtying a page, the dirty state of the page is first checked to see if the page is already dirty. If already
+        // dirty, the write to mark it as dirty is skipped. So, when the dirty state of a page is cleared, we need to make sure
+        // the cleared state is visible to other threads that may dirty the page, before marking through objects in the page, so
+        // that the GC will not miss marking through dirtied objects in the page. Issue a memory barrier on all active threads
+        // of the process now.
+        MemoryBarrier(); // flush writes from this thread first to guarantee ordering
+        GCToOSInterface::FlushProcessWriteBuffers();
+    }
+}
+
+#endif // !DACCESS_COMPILE
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
diff --git a/src/gc/softwarewritewatch.h b/src/gc/softwarewritewatch.h
new file mode 100644
index 0000000000..3c8491cecb
--- /dev/null
+++ b/src/gc/softwarewritewatch.h
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __SOFTWARE_WRITE_WATCH_H__
+#define __SOFTWARE_WRITE_WATCH_H__
+
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#ifndef DACCESS_COMPILE
+
+extern void SwitchToWriteWatchBarrier(bool isRuntimeSuspended);
+extern void SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended);
+
+#define SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift 0xc
+
+extern "C"
+{
+    // Table containing the dirty state. This table is translated to exclude the lowest address it represents, see
+    // TranslateTableToExcludeHeapStartAddress.
+    extern uint8_t *g_sw_ww_table;
+
+    // Write watch may be disabled when it is not needed (between GCs for instance). This indicates whether it is enabled.
+    extern bool g_sw_ww_enabled_for_gc_heap;
+
+    extern uint8_t *g_lowest_address; // start address of the GC heap
+    extern uint8_t *g_highest_address; // end address of the GC heap
+}
+
+class SoftwareWriteWatch
+{
+private:
+    // The granularity of dirty state in the table is one page. Dirtiness is tracked per byte of the table so that
+    // synchronization is not required when changing the dirty state. Shifting-right an address by the following value yields
+    // the byte index of the address into the write watch table. For instance,
+    // GetTable()[address >> AddressToTableByteIndexShift] is the byte that represents the region of memory for 'address'.
+    static const uint8_t AddressToTableByteIndexShift = SOFTWARE_WRITE_WATCH_AddressToTableByteIndexShift;
+
+private:
+    static void VerifyCreated();
+    static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize);
+    static void VerifyMemoryRegion(void *baseAddress, size_t regionByteSize, void *heapStartAddress, void *heapEndAddress);
+
+public:
+    static uint8_t *GetTable();
+private:
+    static uint8_t *GetUntranslatedTable();
+    static uint8_t *GetUntranslatedTable(uint8_t *table, void *heapStartAddress);
+    static uint8_t *GetUntranslatedTableEnd();
+    static uint8_t *GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress);
+public:
+    static void InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress);
+private:
+    static void SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress);
+public:
+    static void SetResizedUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress, void *heapEndAddress);
+    static bool IsEnabledForGCHeap();
+    static void EnableForGCHeap();
+    static void DisableForGCHeap();
+private:
+    static void *GetHeapStartAddress();
+    static void *GetHeapEndAddress();
+
+public:
+    static void StaticClose();
+
+private:
+    static size_t GetTableByteIndex(void *address);
+    static void *GetPageAddress(size_t tableByteIndex);
+public:
+    static size_t GetTableByteSize(void *heapStartAddress, void *heapEndAddress);
+    static size_t GetTableStartByteOffset(size_t byteSizeBeforeTable);
+private:
+    static uint8_t *TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress);
+    static void TranslateToTableRegion(void *baseAddress, size_t regionByteSize, uint8_t **tableBaseAddressRef, size_t *tableRegionByteSizeRef);
+
+public:
+    static void ClearDirty(void *baseAddress, size_t regionByteSize);
+    static void SetDirty(void *address, size_t writeByteSize);
+    static void SetDirtyRegion(void *baseAddress, size_t regionByteSize);
+private:
+    static bool GetDirtyFromBlock(uint8_t *block, uint8_t *firstPageAddressInBlock, size_t startByteIndex, size_t endByteIndex, void **dirtyPages, size_t *dirtyPageIndexRef, size_t dirtyPageCount, bool clearDirty);
+public:
+    static void GetDirty(void *baseAddress, size_t regionByteSize, void **dirtyPages, size_t *dirtyPageCountRef, bool clearDirty, bool isRuntimeSuspended);
+};
+
+inline void SoftwareWriteWatch::VerifyCreated()
+{
+    assert(GetTable() != nullptr);
+    assert(GetHeapStartAddress() != nullptr);
+    assert(GetHeapEndAddress() != nullptr);
+    assert(GetHeapStartAddress() < GetHeapEndAddress());
+}
+
+inline void SoftwareWriteWatch::VerifyMemoryRegion(void *baseAddress, size_t regionByteSize)
+{
+    VerifyMemoryRegion(baseAddress, regionByteSize, GetHeapStartAddress(), GetHeapEndAddress());
+}
+
+inline void SoftwareWriteWatch::VerifyMemoryRegion(
+    void *baseAddress,
+    size_t regionByteSize,
+    void *heapStartAddress,
+    void *heapEndAddress)
+{
+    VerifyCreated();
+    assert(baseAddress != nullptr);
+    assert(heapStartAddress != nullptr);
+    assert(heapStartAddress >= GetHeapStartAddress());
+    assert(heapEndAddress != nullptr);
+    assert(heapEndAddress <= GetHeapEndAddress());
+    assert(baseAddress >= heapStartAddress);
+    assert(baseAddress < heapEndAddress);
+    assert(regionByteSize != 0);
+    assert(regionByteSize <= reinterpret_cast<size_t>(heapEndAddress) - reinterpret_cast<size_t>(baseAddress));
+}
+
+inline uint8_t *SoftwareWriteWatch::GetTable()
+{
+    return g_sw_ww_table;
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable()
+{
+    VerifyCreated();
+    return GetUntranslatedTable(GetTable(), GetHeapStartAddress());
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTable(uint8_t *table, void *heapStartAddress)
+{
+    assert(table != nullptr);
+    assert(heapStartAddress != nullptr);
+    assert(heapStartAddress >= GetHeapStartAddress());
+
+    uint8_t *untranslatedTable = table + GetTableByteIndex(heapStartAddress);
+    assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+    return untranslatedTable;
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd()
+{
+    VerifyCreated();
+    return GetUntranslatedTableEnd(GetTable(), GetHeapEndAddress());
+}
+
+inline uint8_t *SoftwareWriteWatch::GetUntranslatedTableEnd(uint8_t *table, void *heapEndAddress)
+{
+    assert(table != nullptr);
+    assert(heapEndAddress != nullptr);
+    assert(heapEndAddress <= GetHeapEndAddress());
+
+    return ALIGN_UP(&table[GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) + 1], sizeof(size_t));
+}
+
+inline void SoftwareWriteWatch::InitializeUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress)
+{
+    assert(GetTable() == nullptr);
+    SetUntranslatedTable(untranslatedTable, heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::SetUntranslatedTable(uint8_t *untranslatedTable, void *heapStartAddress)
+{
+    assert(untranslatedTable != nullptr);
+    assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+    assert(heapStartAddress != nullptr);
+
+    g_sw_ww_table = TranslateTableToExcludeHeapStartAddress(untranslatedTable, heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::SetResizedUntranslatedTable(
+    uint8_t *untranslatedTable,
+    void *heapStartAddress,
+    void *heapEndAddress)
+{
+    // The runtime needs to be suspended during this call, and background GC threads need to synchronize calls to ClearDirty()
+    // and GetDirty() such that they are not called concurrently with this function
+
+    VerifyCreated();
+    assert(untranslatedTable != nullptr);
+    assert(ALIGN_DOWN(untranslatedTable, sizeof(size_t)) == untranslatedTable);
+    assert(heapStartAddress != nullptr);
+    assert(heapEndAddress != nullptr);
+    assert(heapStartAddress <= GetHeapStartAddress());
+    assert(heapEndAddress >= GetHeapEndAddress());
+    assert(heapStartAddress < GetHeapStartAddress() || heapEndAddress > GetHeapEndAddress());
+
+    uint8_t *oldUntranslatedTable = GetUntranslatedTable();
+    void *oldTableHeapStartAddress = GetHeapStartAddress();
+    size_t oldTableByteSize = GetTableByteSize(oldTableHeapStartAddress, GetHeapEndAddress());
+    SetUntranslatedTable(untranslatedTable, heapStartAddress);
+
+    uint8_t *tableRegionStart = &GetTable()[GetTableByteIndex(oldTableHeapStartAddress)];
+    memcpy(tableRegionStart, oldUntranslatedTable, oldTableByteSize);
+}
+
+inline bool SoftwareWriteWatch::IsEnabledForGCHeap()
+{
+    return g_sw_ww_enabled_for_gc_heap;
+}
+
+inline void SoftwareWriteWatch::EnableForGCHeap()
+{
+    // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other
+    // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked.
+
+    VerifyCreated();
+    assert(!IsEnabledForGCHeap());
+
+    g_sw_ww_enabled_for_gc_heap = true;
+    SwitchToWriteWatchBarrier(true);
+}
+
+inline void SoftwareWriteWatch::DisableForGCHeap()
+{
+    // The runtime needs to be suspended during this call. This is how it currently guarantees that GC heap writes from other
+    // threads between calls to EnableForGCHeap() and DisableForGCHeap() will be tracked.
+
+    VerifyCreated();
+    assert(IsEnabledForGCHeap());
+
+    g_sw_ww_enabled_for_gc_heap = false;
+    SwitchToNonWriteWatchBarrier(true);
+}
+
+inline void *SoftwareWriteWatch::GetHeapStartAddress()
+{
+    return g_lowest_address;
+}
+
+inline void *SoftwareWriteWatch::GetHeapEndAddress()
+{
+    return g_highest_address;
+}
+
+inline size_t SoftwareWriteWatch::GetTableByteIndex(void *address)
+{
+    assert(address != nullptr);
+
+    size_t tableByteIndex = reinterpret_cast<size_t>(address) >> AddressToTableByteIndexShift;
+    assert(tableByteIndex != 0);
+    return tableByteIndex;
+}
+
+inline void *SoftwareWriteWatch::GetPageAddress(size_t tableByteIndex)
+{
+    assert(tableByteIndex != 0);
+
+    void *pageAddress = reinterpret_cast<void *>(tableByteIndex << AddressToTableByteIndexShift);
+    assert(pageAddress >= GetHeapStartAddress());
+    assert(pageAddress < GetHeapEndAddress());
+    assert(ALIGN_DOWN(pageAddress, OS_PAGE_SIZE) == pageAddress);
+    return pageAddress;
+}
+
+inline size_t SoftwareWriteWatch::GetTableByteSize(void *heapStartAddress, void *heapEndAddress)
+{
+    assert(heapStartAddress != nullptr);
+    assert(heapEndAddress != nullptr);
+    assert(heapStartAddress < heapEndAddress);
+
+    size_t tableByteSize =
+        GetTableByteIndex(reinterpret_cast<uint8_t *>(heapEndAddress) - 1) - GetTableByteIndex(heapStartAddress) + 1;
+    tableByteSize = ALIGN_UP(tableByteSize, sizeof(size_t));
+    return tableByteSize;
+}
+
+inline size_t SoftwareWriteWatch::GetTableStartByteOffset(size_t byteSizeBeforeTable)
+{
+    return ALIGN_UP(byteSizeBeforeTable, sizeof(size_t)); // start of the table needs to be aligned to size_t
+}
+
+inline uint8_t *SoftwareWriteWatch::TranslateTableToExcludeHeapStartAddress(uint8_t *table, void *heapStartAddress)
+{
+    assert(table != nullptr);
+    assert(heapStartAddress != nullptr);
+
+    // Exclude the table byte index corresponding to the heap start address from the table pointer, so that each lookup in the
+    // table by address does not have to calculate (address - heapStartAddress)
+    return table - GetTableByteIndex(heapStartAddress);
+}
+
+inline void SoftwareWriteWatch::TranslateToTableRegion(
+    void *baseAddress,
+    size_t regionByteSize,
+    uint8_t **tableBaseAddressRef,
+    size_t *tableRegionByteSizeRef)
+{
+    VerifyCreated();
+    VerifyMemoryRegion(baseAddress, regionByteSize);
+    assert(tableBaseAddressRef != nullptr);
+    assert(tableRegionByteSizeRef != nullptr);
+
+    size_t baseAddressTableByteIndex = GetTableByteIndex(baseAddress);
+    *tableBaseAddressRef = &GetTable()[baseAddressTableByteIndex];
+    *tableRegionByteSizeRef =
+        GetTableByteIndex(reinterpret_cast<uint8_t *>(baseAddress) + (regionByteSize - 1)) - baseAddressTableByteIndex + 1;
+}
+
+inline void SoftwareWriteWatch::ClearDirty(void *baseAddress, size_t regionByteSize)
+{
+    VerifyCreated();
+    VerifyMemoryRegion(baseAddress, regionByteSize);
+
+    uint8_t *tableBaseAddress;
+    size_t tableRegionByteSize;
+    TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize);
+    memset(tableBaseAddress, 0, tableRegionByteSize);
+}
+
+inline void SoftwareWriteWatch::SetDirty(void *address, size_t writeByteSize)
+{
+    VerifyCreated();
+    VerifyMemoryRegion(address, writeByteSize);
+    assert(address != nullptr);
+    assert(writeByteSize <= sizeof(void *));
+
+    size_t tableByteIndex = GetTableByteIndex(address);
+    assert(GetTableByteIndex(reinterpret_cast<uint8_t *>(address) + (writeByteSize - 1)) == tableByteIndex);
+
+    uint8_t *tableByteAddress = &GetTable()[tableByteIndex];
+    if (*tableByteAddress == 0)
+    {
+        *tableByteAddress = 0xff;
+    }
+}
+
+inline void SoftwareWriteWatch::SetDirtyRegion(void *baseAddress, size_t regionByteSize)
+{
+    VerifyCreated();
+    VerifyMemoryRegion(baseAddress, regionByteSize);
+
+    uint8_t *tableBaseAddress;
+    size_t tableRegionByteSize;
+    TranslateToTableRegion(baseAddress, regionByteSize, &tableBaseAddress, &tableRegionByteSize);
+    memset(tableBaseAddress, ~0, tableRegionByteSize);
+}
+
+#endif // !DACCESS_COMPILE
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#endif // !__SOFTWARE_WRITE_WATCH_H__
diff --git a/src/gc/wks/CMakeLists.txt b/src/gc/wks/CMakeLists.txt
new file mode 100644
index 0000000000..fcb95a385e
--- /dev/null
+++ b/src/gc/wks/CMakeLists.txt
@@ -0,0 +1 @@
+add_library_clr(gc_wks STATIC ${GC_SOURCES_WKS})