summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
authorDavid Mason <davmason@microsoft.com>2019-02-15 14:02:31 -0800
committerGitHub <noreply@github.com>2019-02-15 14:02:31 -0800
commit342c80a0c400e85e0507adaefae47f737c48f06f (patch)
treec6ca697b63ae22233d2678e72ee73397944a9ae5 /Documentation
parent51d033897eb5663ea8bab53704406d9fd82af98f (diff)
downloadcoreclr-342c80a0c400e85e0507adaefae47f737c48f06f.tar.gz
coreclr-342c80a0c400e85e0507adaefae47f737c48f06f.tar.bz2
coreclr-342c80a0c400e85e0507adaefae47f737c48f06f.zip
Port profiler howtos from David Broman's blog to the documentation folder (#22363)
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/Profiling/Profiler Loading.md23
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Attach.md116
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Attach2.md156
-rw-r--r--Documentation/Profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md34
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Creating an IL-rewriting profiler.md54
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Debugging - Activation.md65
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Debugging - SOS and IDs.md142
-rw-r--r--Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md13
-rw-r--r--Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md44
-rw-r--r--Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md35
-rw-r--r--Documentation/Profiling/davbr-blog-archive/ELT Hooks - The Basics.md131
-rw-r--r--Documentation/Profiling/davbr-blog-archive/ELT Hooks - tail calls.md390
-rw-r--r--Documentation/Profiling/davbr-blog-archive/GC Heap and Alignment Padding.md12
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Generics and Your Profiler.md133
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md114
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Profiler Detach.md75
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md341
-rw-r--r--Documentation/Profiling/davbr-blog-archive/README.md6
-rw-r--r--Documentation/Profiling/davbr-blog-archive/ReJIT - Limitations.md60
-rw-r--r--Documentation/Profiling/davbr-blog-archive/ReJIT - The Basics.md125
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md63
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Tail call JIT conditions.md49
-rw-r--r--Documentation/Profiling/davbr-blog-archive/Type Forwarding.md203
-rw-r--r--Documentation/Profiling/davbr-blog-archive/When is it safe to use ObjectIDs.md14
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/2110.image_051F632D.pngbin0 -> 55569 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/4276.image1_31CAADB7.pngbin0 -> 56952 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/8715.image1_thumb_38118445.pngbin0 -> 41014 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/8715.image_thumb_01A0D243.pngbin0 -> 39212 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/Attach.jpgbin0 -> 42600 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/NoBirthAnnouncement.JPGbin0 -> 909506 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/deadlock.jpgbin0 -> 41486 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/media/gccycle.jpgbin0 -> 38406 bytes
-rw-r--r--Documentation/Profiling/davbr-blog-archive/samples/Add local to LocalVarSig.cpp137
-rw-r--r--Documentation/Profiling/davbr-blog-archive/samples/PlugInToYourProfiler.cpp1
-rw-r--r--Documentation/Profiling/davbr-blog-archive/samples/sigformat.cpp449
-rw-r--r--Documentation/Profiling/davbr-blog-archive/samples/sigparse.cpp1
36 files changed, 2986 insertions, 0 deletions
diff --git a/Documentation/Profiling/Profiler Loading.md b/Documentation/Profiling/Profiler Loading.md
new file mode 100644
index 0000000000..73286298da
--- /dev/null
+++ b/Documentation/Profiling/Profiler Loading.md
@@ -0,0 +1,23 @@
+
+To enable profiling set the following environment variables:
+- `CORECLR_ENABLE_PROFILING=1`
+- `CORECLR_PROFILER={_CLSID of profiler_}`
+
+# Finding the profiler library
+Once profiling is enabled there are two ways we load your profiler, with enviroment variables (cross-plat) or through the registry (Windows only)
+
+## Environment Variable (cross-plat)
+Set one of the following (if all are set, the bitness-specific variables take precedence). The 32/64 ones specify which bitness of profiler is loaded
+- `CORECLR_PROFILER_PATH=full path to your profiler's DLL`
+- `CORECLR_PROFILER_PATH_32=full path to your profiler's DLL`
+- `CORECLR_PROFILER_PATH_64=full path to your profiler's DLL`
+
+If any of these environment variable are present, we skip the registry look up altogether, and just use the path from `CORECLR_PROFILER_PATH` to load your DLL.
+
+A couple things to note about this:
+- If you specify `CORECLR_PROFILER_PATH` _and_ register your profiler, then `CORECLR_PROFILER_PATH` always wins. Even if `CORECLR_PROFILER_PATH` points to an invalid path, we will still use `CORECLR_PROFILER_PATH`, and just fail to load your profiler.
+- `CORECLR_R_PROFILER` is _always required_. If you specify `CORECLR_PROFILER_PATH`, we skip the registry look up. We still need to know your profiler's CLSID, so we can pass it to your class factory's CreateInstance call.
+
+
+## Through the registry (Windows Only)
+If the environment variables above are not set (and you're running on Windows) then coreclr will look up the CLSID from `CORECLR_PROFILER` in the registry to find the full path to your profiler's DLL. Just like with any COM server DLL, we look for your profiler's CLSID under HKEY_CLASSES_ROOT, which merges the classes from HKLM and HKCU.
diff --git a/Documentation/Profiling/davbr-blog-archive/Attach.md b/Documentation/Profiling/davbr-blog-archive/Attach.md
new file mode 100644
index 0000000000..25da0486fe
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Attach.md
@@ -0,0 +1,116 @@
+*This blog post originally appeared on David Broman's blog on 11/4/2009*
+
+
+Profiler attach is a feature that allows you to attach a profiler to an already running process. The usefulness of this is fairly obvious to anyone who's ever attached a debugger to a running-process: It's helpful when diagnosing hard-to-reproduce problems, and particularly useful when encountering issues in production.
+
+Please note! You can't just take any profiler you bought and suddenly be able to attach it to a running application. The profiler must be built with "attachability" in mind. So if you're a profiler developer looking to pump some attachability into your product, read on--this article is for you. Everyone else, this article will probably be less useful--but just as riveting.
+
+#
+
+# The Players
+
+So how do you get your profiler attached to a running process? The process has already started, and the CLR code which interrogates the environment to determine whether to load a profiler has already run. So how do you kick the process into loading your profiler? The answer: Another process!
+
+![](media/Attach.jpg)
+
+In order to force your profiler DLL to load into the target profilee process, you'll need to create a "trigger" process to send the attach message to the target profilee. Many profilers already ship with a GUI shell to control launching processes to profile. That shell will typically act as your trigger process as well.
+
+# Inside the Trigger Process
+
+Your trigger uses a simple API method, AttachProfiler, to request the target process to load your profiler. Where is this method defined? Well, it doesn't make much sense to put it on ICorProfilerInfo, since that interface is only available to a profiler after it's been loaded. You could imagine a C export from mscoree.dll. But because of in-process side-by-side CLR instances, we're moving away from mscoree.dll exports to a COM-based interface model called "metahost".
+
+## Meta-whos-its?
+
+Whereas the "hosting" interfaces enable one to host and manage a CLR in a process, the "metahost" interfaces allow one to manage multiple CLRs that may be installed onto a machine or loaded into a single process. Here's a high-level view of how you navigate your way through metahost to find AttachProfiler() (there’s a pointer to actual sample code below).
+
+- Get ICLRMetaHost
+- Enumerate the CLRs loaded into the target process
+- Get ICLRRuntimeInfo for the particular CLR in the target process you want to profile
+- Get the corresponding ICLRProfiling
+- Call ICLRProfiling::AttachProfiler
+
+## Users and Integrity
+
+The permissions required to attach a profiler are similar to those required to attach a debugger. First, the trigger process must run as the same user as the target profilee OR as an administrator. Second, on OS's that support process integrity levels, the trigger process must be running at an integrity level higher than or equal to that of the target profilee process. For more information about integrity and mandatory labels, [here's](http://msdn.microsoft.com/en-us/library/bb625964.aspx) some reference from MSDN.
+
+## Sample Trigger Source Code
+
+For some sample code to attach a profiler to a process, take a look at the sample uploaded to the MSDN Code Gallery [here](http://code.msdn.microsoft.com/ProfilerAttacher/).
+
+You'll notice the code attempts to enable the SE\_DEBUG\_NAME privilege, as this is required to open a process running as another user with PROCESS\_ALL\_ACCESS. Again, cross-user attach (i.e., trigger runs as a different user than the target profilee process) is only supported when the trigger is run as an administrator. Otherwise, only same-user attach is supported, and would not need to enable the SE\_DEBUG\_NAME privilege.
+
+# Inside the Profilee Process
+
+Once your trigger has called AttachProfiler(), a message is sent to the target profilee process to load your actual profiler DLL, containing info such as your profiler's GUID. At this point, it's business as usual in the profilee. The profilee locates and loads your profiler DLL, the CLR then calls your class factory object to create an instance of your profiler's ICorProfilerCallback implementation.
+
+Note that, instead of the CLR calling your Initialize() method, the CLR will call your ICorProfilerCallback3::InitializeForAttach() method. There are two reasons for this difference. First, this ensures that only profilers that have been upgraded to work with CLR V4 and opt into attaching will actually be attached. All other profilers can simply return an error from their InitializeForAttach() method (or won't have an ICorProfilerCallback3 implementation to begin with).
+
+The second reason is that InitializeForAttach allows for some extra data to be passed from the trigger to your profilee via a blob of binary data. What's that for? Many profiler products pass configuration information from their shell to their startup-loaded profiler DLL via environment variables. After all, the shell has to set COR\_PROFILER & COR\_ENABLE\_PROFILING in the environment anyway, so why not set some more values there to be read by their profiler? This scheme doesn't work for an attaching profiler, since the shell cannot affect the environment of the already-running profilee. Instead, the AttachProfiler API allows the caller to specify a pointer to a buffer containing whatever data the caller wishes. The CLR makes a copy of the data, sends it to the profilee, and then passes a pointer to this data to the profiler DLL via InitializeForAttach.
+
+The management of the memory containing this binary data follows the usual COM rules. In the trigger process, your trigger code allocates memory for the blob, passes it to AttachProfiler (which will make its own copy of the data), and then your trigger code frees it once AttachProfiler returns. Stack allocation is perfect here; your trigger could just push your own custom structure of data onto the stack and pass a pointer to it in your call to AttachProfiler. Inside the profilee process, your profiler gets access to the blob of data from its InitializeForAttach method. Inside InitializeForAttach, your profiler accesses that memory. If your profiler will need to use that memory later on, your profiler should make a copy of the memory now. After InitializeForAttach returns, the CLR will free the memory.
+
+From your InitializeForAttach implementation, your profiler will call SetEventMask as usual to announce your intentions, and you're off to the races.
+
+# Limitations
+
+It was impossible to enable all profiling scenarios for attach in the time we had for the V4 release. So only profilers that do **sampling** and **memory** analysis will function properly after attaching to a live process. Attempts to use other profiling APIs after attach will be met with CORPROF\_E\_UNSUPPORTED\_FOR\_ATTACHING\_PROFILER.
+
+###
+
+## Specific Callback Limitations
+
+When your attaching profiler calls SetEventMask, you will be limited to only those event mask flags present in the COR\_PRF\_ALLOWABLE\_AFTER\_ATTACH bitmask (you'll find it in corprof.idl). Any other flags, and SetEventMask will return CORPROF\_E\_UNSUPPORTED\_FOR\_ATTACHING\_PROFILER.
+
+## Specific Info Limitations
+
+Most of the ICorProfilerInfo\* methods are available to your attaching profiler, however some are not--particularly those involved in **IL rewriting**. Here's a list of all ICorProfilerInfo\* methods NOT supported for attaching profilers:
+
+- GetILFunctionBody
+- GetILFunctionBodyAllocator
+- SetILFunctionBody
+- SetILInstrumentedCodeMap
+- SetEnterLeaveFunctionHooks\*
+- SetFunctionIDMapper\*
+- GetNotifiedExceptionClauseInfo
+- All methods related to Enter/Leave/Tailcall
+
+It's expected that future releases of the CLR will enable more API methods for use by attaching profilers.
+
+## GC Limitations
+
+### GC Modes
+
+To understand limitations around the GC modes, here's a quick review of the GC modes an app can run under:
+
+- **Workstation Blocking mode**. The thread that triggered the GC performs the GC while all other threads executing managed code must wait.
+- **Workstation Concurrent / Background mode (the default)**. Concurrent GC (V1 & V2) allows portions of a full GC to execute while other threads are allowed to run. Background GC (its replacement in V4) takes it one step further, and also allows an ephemeral GC (i.e., gen 0 or gen 1) to execute while a gen 2 GC is executing.
+- **Server mode**. Hosts like ASP.NET may choose to enable server mode which creates a heap + dedicated GC thread per CPU. This allows GCs to be fanned out to multiple threads.
+
+Of course, [Maoni's blog](http://blogs.msdn.com/maoni/) is required reading for anyone who wants to understand how the GC works.
+
+The profiling API is able to work against workstation blocking mode and server mode, but not concurrent / background mode. This has been the case in V1 & V2, and remains the case in V4. When the app starts up, if a profiler is configured to load, then the CLR forcibly turns off concurrent / background mode, and you end up in workstation blocking mode (or you end up in server mode if the host requested that instead). Again, this has been the case in V1 & V2, and remains true in V4.
+
+So here's the catch. What if a V4 app starts up in background GC mode _without_ a profiler loading on startup, and you later attach a profiler to the process? If the profiler specifies COR\_PRF\_MONITOR\_GC in its call to SetEventMask, then the CLR returns the error CORPROF\_E\_CONCURRENT\_GC\_NOT\_PROFILABLE. In other words, if the profiler is late to the party, then it simply won't work if background GC is on. Since this is the default for client apps, the bottom line is that you can generally successfully attach your memory profiler to server apps (e.g., ASP.NET), but probably not to client apps.
+
+Of course, you could forcibly turn off concurrent / background mode every time the app starts up via a config file:
+
+|
+
+\<configuration\>
+ \<runtime\>
+ \<gcConcurrent enabled="false"/\>
+ \</runtime\>
+\</configuration\>
+
+ |
+
+But you don't really want to be running your apps with a sub-optimal GC mode all the time, just on the off-chance you might need to attach a memory profiler to it. If you suspect you might need to do some memory profiling of a client app, you should just start up your app with the memory profiler to begin with.
+
+### ObjectAllocated
+
+The ObjectAllocated callback is disallowed for attaching profilers (i.e., COR\_PRF\_ENABLE\_OBJECT\_ALLOCATED is not part of the COR\_PRF\_ALLOWABLE\_AFTER\_ATTACH mask).
+
+# Go Forth and Attach
+
+All right, dig through that sample trigger code, and see if you can add "attach" to your list of features. In later posts, I'll talk about how to catch up on application state once your profiler attaches, and also how to detach your profiler when it's done with its business.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Attach2.md b/Documentation/Profiling/davbr-blog-archive/Attach2.md
new file mode 100644
index 0000000000..2f41733f7c
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Attach2.md
@@ -0,0 +1,156 @@
+*This blog post originally appeared on David Broman's blog on 1/18/2010*
+
+
+In a previous [post](Attach.md), I outlined to all you profiler writers how to modify your profiler so it can attach to running processes, and what sorts of limitations your profiler will have when it attaches. In this post, I answer the question, “My profiler is attached. What should it do next?”
+
+# Catch Up
+
+A profiler that loads on startup of an application has the option to know the entire history of that application. By requesting the appropriate callback events, the profiler can know all the classes and modules that have loaded, functions that have JITted, objects that have been allocated, etc. However, a profiler that loads by attaching to an already-running application is a bit like Dorothy who lands in the middle of Oz and has no idea what’s going on. She doesn’t have the luxury of arriving at the beginning of time, and watching everyone from the moment of their birth. She runs into people after they’re fully grown, and is expected to deal gracefully—often by making friends with them. It would not be socially acceptable for Dorothy to encounter an access violation upon meeting someone new.
+
+[NoBirthAnnouncement](media/NoBirthAnnouncement.JPG)
+
+Drawing by Magdalena Hermawan
+
+
+There are two fundamental ways your profiler can catch up on the current state of an application:
+
+- Lazy catch-up—as the profiler encounters new IDs, the profiler queries information about those IDs as it needs them, rather than assuming it has a full cache that’s always built up as the IDs are first created. This is analogous to Dorothy meeting a new grown-up, and gracefully accepting the fact that that person exists.
+- Enumeration—for certain kinds of IDs, the profiler can (at attach time) request a complete list of the currently active IDs and query information about them at that time. Sort of like Dorothy first going to the Oz City Hall and looking up the birth records for everyone.
+
+Lazy catch-up is fairly self-explanatory. For example, if your sampling profiler encounters an IP in a FunctionID you’ve never seen before, just look up whatever info you need about that FunctionID the first time you encounter it, rather than assuming you’d already built up a cache when the function was first JITted. And if you discover that FunctionID resides in a module you’ve never seen before, then just look up whatever info you need about that ModuleID at that point, rather than assuming you already have a complete cache of all modules. Many of you are already doing something like this today if you support sampling against regular NGENd images (since you don’t get JIT notifications of those functions anyway).
+
+Enumeration, on the other hand, has some caveats and is worthwhile to describe in more detail.
+
+# Enumeration via Enum\* APIs
+
+Some kinds of IDs have new enumerator methods as part of the profiling API. In particular:
+
+- ICorProfilerInfo3::EnumModules
+- ICorProfilerInfo3::EnumJITedFunctions
+
+Your profiler calls these methods, and they return a standard enumerator you use to iterate through all of the currently-loaded IDs of that type. It’s worth noting that EnumJITedFunctions only enumerates FunctionIDs for which you would receive JITCompilationStarted/Finished events, and will not include FunctionIDs from NGENd modules.
+
+The primary caveat with using these enumerators is that you’re iterating through a snapshot of the IDs while the process is active and running. (Imagine Dorothy looking through a copy of birth records while babies are still getting born in Oz, who weren’t yet in the copy of records Dorothy is reading.) This means there are races your profiler needs to be resilient to.
+
+## Race #1: When to enumerate? ProfilerAttachComplete()
+
+As you may recall, once your profiler is attached to the process, the CLR calls InitializeForAttach() on your profiler. After your profiler returns from InitializeForAttach(), the CLR turns on callbacks into your profiler. So if your profiler requested COR\_PRF\_MONITOR\_MODULE\_LOADS (by calling SetEventMask() at some point inside your implementation of InitializeForAttach), then as modules start loading and unloading after InitializeForAttach() returns, your profiler will receive the corresponding events. The thing is, “after InitializeForAttach() returns” is a vague phrase. And modules can load or unload at totally arbitrary times with respect to the timing of when your profiler attaches and calls EnumModules(). The thing to avoid here is a hole: a ModuleID your profiler does not find in the enumeration, and for which your profiler receives no ModuleLoad event. This can happen if your profiler calls the enumeration API too soon (i.e., before CLR has enabled event callbacks for your profiler).
+
+Bad timeline (loading; enumerating too soon):
+
+1. Profiler attaches
+2. Profiler calls EnumModules
+3. Module starts to load
+4. ModuleID is now enumerable
+5. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet!)
+6. CLR enables events
+
+The problem is that the profiler calls EnumModules too early. If your profiler only calls EnumModules after CLR enables events, then you’re assured of either seeing a ModuleID via EnumModules or via a ModuleLoad event. In the above scenario, your profiler might as well have never done enumeration at all, since it will still not be notified of the ModuleID before it comes across that ModuleID in action later on. It gets even worse for modules that unload:
+
+Bad timeline (unloading; enumerating too soon):
+
+1. Module loads
+2. ModuleID is now enumerable
+3. Profiler attaches
+4. Profiler calls EnumModules (includes the ModuleID)
+5. Module starts to unload
+6. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet!)
+7. CLR enables events
+
+In the above case, the profiler discovers a ModuleID via EnumModules, but has no idea that the module is now in the process of unloading. So the profiler might query information about the stale ModuleID, potentially causing an AV. Again, this is caused because the profiler called the enumeration API too soon (i.e., before the CLR enabled event callbacks).
+
+The solution is for the profiler to call enumeration APIs only after events have been enabled. Since events are enabled at some point “after InitializeForAttach() returns”, it was necessary for the CLR to provide a new API to notify the profiler that event callbacks have actually been enabled: ICorProfilerCallback3::ProfilerAttachComplete(). **The best place for your profiler to call the enumeration APIs is inside its implementation of ProfilerAttachComplete.** Since events are enabled _just before_ the CLR calls ProfilerAttachComplete, your profiler is assured that events are enabled by the time it calls the enumeration API (from inside ProfilerAttachComplete). This eliminates any potential holes in catch-up information your profiler queries.
+
+## Race #2: Duplicates
+
+When your profiler calls the Enum\* methods, the CLR creates a snapshot of all “enumerable” IDs of the specified type, and gives your profiler an enumerator over those. In the CLR we had a choice. We could either consider an ID to be “enumerable” before or after the corresponding load finished event (or JITCompilationFinished event) would normally be issued. Consider for a moment what we _didn’t_ do. We didn’t consider IDs to be enumerable after the event. If so, that would have led to holes. A profiler could have attached and grabbed an enumeration in the middle and never been notified about the ID.
+
+Bad timeline (loading):
+
+1. Module starts to load
+2. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet—no profiler is attached!)
+3. Profiler attaches
+4. CLR enables events, calls ProfilerAttachComplete()
+5. Profiler calls EnumModules
+6. ModuleID is now enumerable
+
+Because 2 comes before 6, it’s possible for a profiler to attach and grab an enumeration in the middle, and thus never hear about a ModuleID (even though the profiler avoided Race #1 from the previous section). Again, an even worse problem occurs for module unloading. Suppose the CLR were to change an ID’s enumerable status to false after sending the unload event. That would also lead to holes:
+
+Bad timeline (unloading):
+
+1. Module loads, event would fire if profiler were attached (but it’s not), then ModuleID becomes enumerable
+2. Module starts to unload
+3. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet—no profiler is attached!)
+4. Profiler attaches
+5. CLR enables events, calls ProfilerAttachComplete()
+6. Profiler calls EnumModules (ModuleID is still enumerable, so profiler discovers ModuleID at this point)
+7. ModuleID is no longer enumerable
+
+Because 3 comes before 7, a profiler could attach in the middle, grab an enumeration, discover the ModuleID via the enumeration, and have no idea that module was in the process of unloading. If the profiler were to use that ModuleID later on, an AV could result. The above led to the following golden rule:
+
+| **Golden rule: An ID’s enumerability status shall change _before_ the corresponding load/unload event is fired.** |
+
+In other words, an ID becomes enumerable _before_ the LoadFinished (or JITCompilationFinished) event. And an ID ceases to be enumerable _before_ the UnloadStarted event. Or you can think of it as, “The event is always last”. This eliminates any potential holes. So to be even more explicit, here’s the enumerability vs. event ordering:
+
+1. ID available in enumerations snapped now
+2. LoadFinished
+3. ID no longer in enumerations snapped now
+4. UnloadStarted
+
+If an ID is present, the profiler will discover the ID via the enumerator or a LoadFinished event (or both). If an ID is not present, the profiler will either not see the ID via the enumerator or will see an UnloadStarted event (or both). In all cases, the event is more recent, and so the profiler should always trust an event over an enumeration that was generated prior. (More on that last point later.)
+
+The astute reader will notice that what we’ve done here is trade one race for another. We’ve eliminated holes, but the cost is that the profiler must deal with duplicates. For example:
+
+Good timeline (loading with duplicate):
+
+1. Module starts to load
+2. ModuleID is now enumerable
+3. Profiler attaches
+4. CLR enables events, calls ProfilerAttachComplete()
+5. Profiler calls EnumModules
+6. Profiler receives ModuleLoadFinished
+
+At first it might seem a little strange. The enumerator contains the ModuleID, so the profiler sees that the module is loaded. But then the profiler receives a ModuleLoadFinished event, which might seem odd, since the enumerator implied the module was already loaded. This is what I mean by “duplicate”—the profiler is notified of a ModuleID twice (once via the enumeration, and once via the event). The profiler will need to be resilient to this. Although it’s a bit awkward, it’s better than the alternative of a hole, since the profiler would have no way to know the hole occurred. Unloading has a similar situation:
+
+Good timeline (unloading with duplicate):
+
+1. Module loads, event would have fired if profiler were attached (but it’s not), ModuleID becomes enumerable
+2. Module starts to unload
+3. ModuleID is no longer enumerable
+4. Profiler attaches
+5. CLR enables events, calls ProfilerAttachComplete()
+6. Profiler calls EnumModules
+7. Profiler receives ModuleUnloadStarted event
+
+In step 6, the profiler does not see the unloading ModuleID (since it’s no longer enumerable). But in step 7 the profiler is notified that the ModuleID is unloading. Perhaps it’s a bit awkward that the profiler would be told that a seemingly nonexistent ModuleID is unloading. But again, this is better than the alternative, where a profiler finds an unloading ID in the enumeration, and is never told that the ModuleID got unloaded. One more case that’s worthwhile to bring out occurs when we move the profiler attach a bit earlier in the sequence.
+
+Good timeline (unloading without duplicate):
+
+1. Module loads, event would fire if profiler were attached, ModuleID becomes enumerable
+2. Module starts to unload
+3. Profiler attaches
+4. CLR enables events, calls ProfilerAttachComplete()
+5. Profiler calls EnumModules (ModuleID is still present in the enumeration)
+6. ModuleID is no longer enumerable
+7. Profiler receives ModuleUnloadStarted event
+
+Here the profiler discovers the ModuleID exists in step 5 (as the ModuleID is still enumerable at that point), but the profiler almost immediately after discovers that the module is unloading in step 7. As stated above, events are more recent, and should always take precedence over enumerations that were generated prior. This could get a bit tricky, though, as the profiler generates an enumeration before it iterates over the enumeration. In the above sequence, the enumeration is generated in step 5. However, the profiler could be iterating though the generated enumeration for quite some time, and might not come across the unloading ModuleID until after step 7 (multiple threads means fun for everyone!). For this reason, it’s important for the profiler to give precedence to events that occur after the enumeration was _generated_, even though iteration over that enumeration might occur later.
+
+# Catching Up on the State of GC Heap
+
+If you’re writing a memory profiler, you likely have code that responds to the various GC events. In order for your memory profiler to attach to a running process, it needs to deal gracefully with the fact that it does not yet have a cache of objects on the heap. One straightforward way for the profiler to deal with this is to force a GC at attach time.
+
+## GC Already in Progress
+
+Remember that your profiler attaches at a completely arbitrary point during process execution, possibly while a GC is already in progress. This means that, once the profiler has enabled callback events, the profiler may start seeing GC callbacks (e.g., MovedReferences, ObjectReferences) from the middle of that GC, without seeing a GarbageCollectionStarted() first. Your profiler should be resilient to this situation, preferably by ignoring GC callbacks until the first full GC and / or profiler-induced GC begins. The profiler can do this by ignoring all GC callbacks until it sees the first GarbageCollectionStarted() callback OR by ignoring all GC callbacks until it sees the first GarbageCollectionStarted() callback after calling ForceGC().
+
+## Inducing Your First GC
+
+It may be beneficial to program your profiler such that, upon attaching to the process, the profiler induces a first full GC automatically—call this the “catch-up” GC. This will allow your profiler to use events like RootReferences2 and ObjectReferences during that initial “catch-up” GC, in order to build up its cache of objects from scratch. After that initial catch-up GC, your profiler should then be able to deal with successive GCs the usual way.
+
+It’s worth reiterating a limitation I stated in the first attach post (linked above): the ObjectAllocated() callback is unavailable to profilers that attach to running processes. Therefore, any logic your profiler has that assumes it gets all the ObjectAllocated() callbacks will need to be addressed. Any objects newly allocated since the last GC may still be unknown to your profiler until it comes across their references via GC callbacks during the next GC (unless your profiler comes across those objects in other ways—example: as parameters to methods you hook with the Enter/Leave/Tailcall probes).
+
+
+
+OK, that about covers the first steps your profiler should take once it attaches to a running process. It will either need to use lazy catch-up or the catch-up enumerations (or, quite likely, a combination of both). When using the enumerations, be careful to avoid holes (by calling the enumeration methods from inside ProfilerAttachComplete()), and be resilient to receiving information duplicated across the enumeration and the load / unload events. For memory profilers, be wary of GCs already in progress at the time your profiler attaches, and consider inducing your own GC at attach-time to build your initial cache of GC objects.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md b/Documentation/Profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md
new file mode 100644
index 0000000000..c6feb97c77
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md
@@ -0,0 +1,34 @@
+*This blog post originally appeared on David Broman's blog on 12/23/2008*
+
+
+_What follows is a long-lost blog entry that_ [_Jonathan Keljo_](http://blogs.msdn.com/jkeljo) _had been working on. I brushed off some of the dust and am posting it here for your enjoyment. Thank you, Jonathan!_
+
+In CLR 2.0 we added a new HRESULT, CORPROF\_E\_UNSUPPORTED\_CALL\_SEQUENCE. This HRESULT is returned from ICorProfilerInfo methods when called in an "unsupported way". This "unsupported way" is primarily an issue with those nasty beasts, hijacking profilers (though read on for cases where non-hijacking profilers can see this HRESULT, too). Hijacking profilers are those profilers that forcibly reset a thread's register context at completely arbitrary times to enter profiler code, and then usually to re-enter the CLR via ICorProfilerInfo. Why is that so bad? Well, for the sake of performance, lots of the IDs the profiling API gives out are just pointers to relevant data structures within the CLR. So lots of ICorProfilerInfo calls just rip information out of those data structures and pass them back. Of course, the CLR might be changing things in those structures as it runs, maybe (or maybe not) taking locks to do so. Imagine the CLR was already holding (or attempting to acquire) such locks at the time the profiler hijacked the thread. Now, the thread re-enters the CLR, trying to take more locks or inspect structures that were in the process of being modified, and are thus in an inconsistent state. Deadlocks and AVs are easy to come by in such situations.
+
+In general, if you're a non-hijacking profiler sitting inside an ICorProfilerCallback method and you're calling into ICorProfilerInfo, you're fine. For example, you get a ClassLoadFinished and you start asking for information about the class. You might be told that information isn't available yet (CORPROF\_E\_DATAINCOMPLETE) but the program won't deadlock or AV. This class of calls into ICorProfilerInfo are called "synchronous", because they are made from within an ICorProfilerCallback method.
+
+On the other hand, if you're hijacking or otherwise calling ICorProfilerInfo functions on a managed thread but **not** from within an ICorProfilerCallback method, that is considered an "asynchronous" call. In v1.x you never knew what would happen in an asynchronous call. It might deadlock, it might crash, it might give a bogus answer, or it might give the right answer.
+
+In 2.0 we've added some simple checks to help you avoid this problem. If you call an unsafe ICorProfilerInfo function asynchronously, instead of crossing its fingers and trying, it will fail with CORPROF\_E\_UNSUPPORTED\_CALL\_SEQUENCE. The general rule of thumb is, nothing is safe to call asynchronously. But here are the exceptions that are safe, and that we specifically allow to be called asynchronously:
+
+- GetEventMask/SetEventMask
+- GetCurrentThreadID
+- GetThreadContext
+- GetThreadAppDomain
+- GetFunctionFromIP
+- GetFunctionInfo/GetFunctionInfo2
+- GetCodeInfo/GetCodeInfo2
+- GetModuleInfo
+- GetClassIDInfo/GetClassIDInfo2
+- IsArrayClass
+- SetFunctionIDMapper
+- DoStackSnapshot
+
+There are also a few things to keep in mind:
+
+1. ICorProfilerInfo calls made from within the fast-path Enter/Leave callbacks are considered asynchronous. (Though ICorProfilerInfo calls made from within the _slow_-path Enter/Leave callbacks are considered synchronous.) See the blog entries [here](ELT - The Basics.md) and [here](http://blogs.msdn.com/jkeljo/archive/2005/08/11/450506.aspx) for more info on fast / slow path.
+2. ICorProfilerInfo calls made from within instrumented code (i.e., IL you've rewritten to call into your profiler and then into ICorProfilerInfo) are considered asynchronous.
+3. Calls made inside your FunctionIDMapper hook are considered to be synchronous.
+4. Calls made on threads created by your profiler, are always considered to be synchronous. (This is because there's no danger of conflicts resulting from interrupting and then re-entering the CLR on that thread, since a profiler-created thread was not in the CLR to begin with.)
+5. Calls made inside a StackSnapshotCallback are considered to be synchronous iff the call to DoStackSnapshot was synchronous.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Creating an IL-rewriting profiler.md b/Documentation/Profiling/davbr-blog-archive/Creating an IL-rewriting profiler.md
new file mode 100644
index 0000000000..21678cad4e
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Creating an IL-rewriting profiler.md
@@ -0,0 +1,54 @@
+*This blog post originally appeared on David Broman's blog on 3/6/2007*
+
+
+A frequent topic of discussion between those of us on the CLR Profiling API team at Microsoft and our customers is how to write a profiler that rewrites IL to do cool stuff. Unfortunately, there still is very little documentation on how to do this, and what documentation there is, is rather scattered. I'm not going to say anything new here. But I will try to bring together the scattered info into one place.
+
+**Q: Why do I care?**
+
+You may want to instrument managed code to insert custom calls into your profiler to measure timings or code coverage, or record execution flow. Maybe you want to perform custom actions, like taking note whenever thread synchronization is invoked (e.g., Monitor.Enter,Leave). One way to do this is to take the original IL of the application you're profiling, and rewrite that IL to contain extra code or hooks into your profiler or managed code you ship alongside your profiler.
+
+**Q: I can ship managed code alongside my profiler? Are you saying I can write my profiler in manage code?**
+
+No, sorry to get your hopes up. Your profiler DLL must be unmanaged. Your ICorProfilerCallback implementations must be unmanaged (and should not call managed code). However, if you rewrite IL, it's perfectly fine for that IL to call into managed code that you've written and shipped alongside your profiler.
+
+**Q: In a nutshell, what's involved?**
+
+Well, first off, you're making a profiler. That means you create an unmanaged in-proc COM server DLL. If this much is already new to you, you should probably stop reading this, search MSDN for "ICorProfilerCallback", and grope through the table of contents for background info on how to write a profiler in general.
+
+Keep in mind there are many ways to do this. I'll outline one of the more straightforward approaches here, and the adventurous should feel free to substitute their own ingredients:
+
+- In your **ICorProfilerCallback2::ModuleLoadFinished** callback, you call **ICorProfilerInfo2::GetModuleMetadata** to get a pointer to a metadata interface on that module.
+- QI for the metadata interface you want. Search MSDN for "IMetaDataImport", and grope through the table of contents to find topics on the metadata interfaces.
+- Once you're in metadata-land, you have access to all the types in the module, including their fields and function prototypes. You may need to parse metadata signatures and [this signature parser](samples/sigparser.cpp) may be of use to you.
+- In your **ICorProfilerCallback2::JITCompilationStarted** callback, you may use **ICorProfilerInfo2::GetILFunctionBody** to inspect the original IL, and **ICorProfilerInfo2::GetILFunctionBodyAllocator** and then **ICorProfilerInfo2::SetILFunctionBody** to replace that IL with your own.
+
+**Q: What about NGEN?**
+
+If you want to rewrite IL of NGENd modules, well, it's kind of too late because the original IL has already been compiled into native code. However, you do have some options. If your profiler sets the **COR\_PRF\_USE\_PROFILE\_IMAGES** monitor event flag, that will force the "NGEN /Profile" version of the modules to load if they're available. (I've already blogged a little about "NGEN /Profile", including how to generate those modules, [here](enter-leave-tailcall-hooks-part-1-basics.md).) So, at run-time, one of two things will happen for any given module.
+
+1) If you set **COR\_PRF\_USE\_PROFILE\_IMAGES** and the NGEN /Profile version is available, it will load. You will then have the opportunity to respond to the **JITCachedFunctionSearchStarted** callback. When a function from an NGEN /Profile module is about to be executed for the first time, your profiler receives the **JITCachedFunctionSearchStarted** callback. You may then set the \*pbUseCachedFunction [out] parameter to FALSE, and that will force the CLR to JIT the function instead of using the version that was already compiled into the NGEN /Profile module. Then, when the CLR goes to JIT the function, your profiler receives the **JITCompilationStarted** callback and can perform IL rewriting just as it does above for functions that exist in non-NGENd mdoules. What's nice about this approach is that, if you only need to instrument a few functions here and there, it can be faster not to have to JIT everything, just so you get the **JITCompilationStarted** callback for the few functions you're interested in. This approach can therefore improve startup performance of the application while it's being profiled. The disadvantage, though, is that your profiler must ensure the NGEN /Profile versions of all the modules get generated beforehand and get installed onto the user's machine. Depending on your scenarios and customers, this may be too cumbersome to ensure.
+
+2) If you set **COR\_PRF\_USE\_PROFILE\_IMAGES** and the NGEN /Profile version is _not_ available, the CLR will refuse to load the regular NGENd version of that module, and will instead JIT everything from the module. Thus, it's ensured that you have the opportunity to intercept **JITCompilationStarted** , and can replace the IL as described above.
+
+**Q: Any examples?**
+
+Here is an MSDN article that talks about making an IL rewriting profiler:
+[http://msdn.microsoft.com/en-us/magazine/cc188743.aspx](http://msdn.microsoft.com/en-us/magazine/cc188743.aspx)
+
+**Q: Any caveats?**
+
+Rewriting IL in mscorlib.dll functions can be dangerous, particularly in functions that are executed during startup initialization of the managed app or any of its AppDomains. The app may not be initialized enough to handle executing some of the managed code that might get called (directly or indirectly) from your rewritten IL.
+
+If you're going to modify the IL to call into some of your own managed code, be careful about which functions you choose to modify. If you're not careful, you might accidentally modify the IL belonging to your own assembly and cause infinite recursion.
+
+And then there's the worst of both worlds: when you need to rewrite IL to call into their own assemblies _and_ you happen to rewriting IL in mscorlib. Note that it's simply unsupported to force mscorlib.dll to reference any other assembly. The CLR loader treats mscorlib.dll pretty specially. The loader expects that, while everyone in the universe may reference mscorlib.dll, mscorlib.dll had better not reference any other assembly. If you absolutely must instrument mscorlib.dll by modifying IL, and you must have that IL reference some nifty new function of yours, you had better put that function into mscorlib.dll by dynamically modifying mscorlib.dll's metadata when it is loaded. In this case you no longer have the option of creating a separate assembly to house your custom code.
+
+**Q: Has anyone else tried making an IL-rewriting profiler?**
+
+Sure. If you want to learn from other people's experiences, read through the [Building Development and Diagnostic Tools for .Net Forum](http://forums.microsoft.com/MSDN/ShowForum.aspx?ForumID=868&SiteID=1). Here are some interesting threads:
+
+[http://social.msdn.microsoft.com/Forums/en-NZ/netfxtoolsdev/thread/5f30596b-e7b7-4b1f-b8e1-8172aa8dde31](http://social.msdn.microsoft.com/Forums/en-NZ/netfxtoolsdev/thread/5f30596b-e7b7-4b1f-b8e1-8172aa8dde31)
+[http://social.msdn.microsoft.com/Forums/en-GB/netfxtoolsdev/thread/c352266f-ded3-4ee2-b2f9-fbeb41a70c27](http://social.msdn.microsoft.com/Forums/en-GB/netfxtoolsdev/thread/c352266f-ded3-4ee2-b2f9-fbeb41a70c27)
+
+
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Debugging - Activation.md b/Documentation/Profiling/davbr-blog-archive/Debugging - Activation.md
new file mode 100644
index 0000000000..04b943e98b
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Debugging - Activation.md
@@ -0,0 +1,65 @@
+*This blog post originally appeared on David Broman's blog on 12/11/2007*
+
+
+This is the first of some tips to help you debug your profiler. Note that these tips assume you're using CLR 2.x (see [this entry](http://blogs.msdn.com/davbr/archive/2007/12/06/versions-of-microsoft-net-framework-clr-and-your-profiler.aspx) for info on how CLR version numbers map to .NET Framework version numbers). In today's post, I address a frequent question from profiler developers and users: "Why didn't my profiler load?".
+
+## Event log (Windows only)
+
+In the Application event log, you'll see entries if the CLR attempts, but fails, to load and initialize your profiler. So this is a nice and easy place to look first, as the message may well make it obvious what went wrong.
+
+## Weak link in the chain?
+
+The next step is to carefully retrace this chain to make sure everything is registered properly:
+
+Environment variables --\> Registry --\> Profiler DLL on File system.
+
+The first link in this chain is to check the environment variables inside the process that should be profiled. If you're running the process from a command-prompt, you can just try a "set co" from the command prompt:
+
+|
+```
+**C:\>** set co
+ (blah blah, other vars beginning with "co")
+```
+
+```
+Cor_Enable_Profiling=0x1
+ COR_PROFILER={C5F90153-B93E-4138-9DB7-EB7156B07C4C}
+```
+ |
+
+If your scenario doesn't allow you to just run the process from a command prompt, like say an asp.net scenario, you may want to attach a debugger to the process that's supposed to be profiled, or use IFEO (HKEY\_LOCAL\_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution Options) to force a debugger to start when the worker process starts. In the debugger, you can then use "!peb" to view the environment block, which will include the environment variables.
+
+Once you verify Cor\_Enable\_Profiling and COR\_PROFILER are ok, it's time to search the registry for the very same GUID set in your COR\_PROFILER environment variable. You should find it at a path like this:
+
+HKEY\_LOCAL\_MACHINE\SOFTWARE\Classes\CLSID\{C5F90153-B93E-4138-9DB7-EB7156B07C4C}
+
+If the registry has the GUID value, it's finally time to check out your file system. Go under the InprocServer32 subkey under the GUID:
+
+HKEY\_LOCAL\_MACHINE\SOFTWARE\Classes\CLSID\{C5F90153-B93E-4138-9DB7-EB7156B07C4C}\InprocServer32
+
+and look at the default value data. It should be a full path to your profiler's DLL. Verify it's accurate. If not, perhaps you didn't properly run regsvr32 against your profiler, or maybe your profiler's **DllRegisterServer** had problems.
+
+## Time for a debugger
+
+If the above investigation indicates everything's ok, then your profiler is properly registered and your environment is properly set up, but something bad must be happening at run time. You'll want symbols for the CLR, which are freely available via Microsoft's symbol server. If you set this environment variable, you can ensure windbg will always use the symbol server:
+
+set \_NT\_SYMBOL\_PATH=srv\*C:\MySymbolCache\*http://msdl.microsoft.com/download/symbols
+
+Feel free to add more paths (separate them via ";") so you can include your profiler's symbols as well. Now, from a command-prompt that has your Cor\_Enable\_Profiling and COR\_PROFILER variables set, run windbg against the executable you want profiled. The debuggee will inherit the environment, so the profiling environment variables will be propagated to the debuggee.
+
+Note: The following contains implementation details of the runtime. While these details are useful as a debugging aid, your profiler code cannot make assumptions about them. These implementation details are subject to change at whim.
+
+Once windbg is running, try setting this breakpoint:
+
+bu mscordbc!EEToProfInterfaceImpl::CreateProfiler
+
+Now go! If you hit that breakpoint, that verifies the CLR has determined that a profiler has been requested to load from the environment variables, but the CLR has yet to read the registry. Let's see if your DLL actually gets loaded. You can use
+
+sxe ld _NameOfYourProfiler_.dll
+
+or even set a breakpoint inside your Profiler DLL's **DllMain.** Now go, and see if your profiler is getting loaded. If you can verify your profiler's DLL is getting loaded, then you now know your registry is pointing to the proper path, and any static dependencies your profiler has on other DLLs have been resolved. But will your profiler COM object get instantiated properly? Set breakpoints in your class factory ( **DllGetClassObject** ) and your profiler COM object's **QueryInterface** to see if you can spot problems there. For example, if your profiler only works against CLR 1.x, then the CLR's call into your QueryInterface will fail, since you don't implement ICorProfilerCallback2.
+
+If you're still going strong, set a breakpoint in your profiler's **Initialize** () callback. Failures here are actually a popular cause for activation problems. Inside your Initialize() callback, your profiler is likely calling QueryInterface for the ICorProfilerInfoX interface of your choice, and then calling SetEventMask, and doing other initialization-related tasks, like calling SetEnterLeaveFunctionHooks(2). Do any of these fail? Is your Initialize() callback returning a failure HRESULT?
+
+Hopefully by now you've isolated the failure point. If not, and your Initialize() is happily returning S\_OK, then your profiler is apparently loading just fine. At least it is when you're debugging it. :-)
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Debugging - SOS and IDs.md b/Documentation/Profiling/davbr-blog-archive/Debugging - SOS and IDs.md
new file mode 100644
index 0000000000..8f03224492
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Debugging - SOS and IDs.md
@@ -0,0 +1,142 @@
+*This blog post originally appeared on David Broman's blog on 12/18/2007*
+
+
+In this debugging post, I'll talk about the various IDs the profiling API exposes to your profiler, and how you can use SOS to give you more information about the IDs. As usual, this post assumes you're using CLR 2.x.
+
+## S.O.What Now?
+
+SOS.DLL is a debugger extension DLL that ships with the CLR. You'll find it sitting alongside mscorwks.dll. While originally written as an extension to the windbg family of debuggers, Visual Studio can also load and use SOS. If you search the MSDN blogs for "SOS" you'll find lots of info on it. I'm not going to repeat all that's out there, but I'll give you a quick primer on getting it loaded.
+
+In windbg, you'll need mscorwks.dll to load first, and then you can load SOS. Often, I don't need SOS until well into my debugging session, at which point mscorwks.dll has already been loaded anyway. However, there are some cases where you'd like SOS loaded at the first possible moment, so you can use some of its commands early (like !bpmd to set a breakpoint on a managed method). So a surefire way to get SOS loaded ASAP is to have the debugger break when mscorwks gets loaded (e.g., "sxe ld mscorwks"). Once mscorwks is loaded, you can load SOS using the .loadby command:
+
+|
+```
+0:000\> **sxe ld mscorwks**
+ 0:000\> g
+ ModLoad: 79e70000 7a3ff000 C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorwks.dll
+ eax=00000000 ebx=00000000 ecx=00000000 edx=00000000 esi=7efdd000 edi=20000000
+ eip=77a1a9fa esp=002fea38 ebp=002fea78 iopl=0 nv up ei pl nz na po nc
+ cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00000202
+ ntdll!NtMapViewOfSection+0x12:
+ 77a1a9fa c22800 ret 28h
+ 0:000\> **.loadby sos mscorwks**
+```
+ |
+
+With SOS loaded, you can now use its commands to inspect the various IDs that the profiling API passes to your profiler.
+
+Note: The following contains implementation details of the runtime. While these details are useful as a debugging aid, your profiler code cannot make assumptions about them. These implementation details are subject to change at whim.
+
+## FunctionID Walkthrough
+
+For starters, take a look at FunctionIDs. Your profiler receives a FunctionID anytime you hit a callback that needs to, well, identify a function! For example, when it's time to JIT, the CLR issues JITCompilationStarted (assuming your profiler subscribed to that callback), and one of the parameters to the callback is a FunctionID. You can then use that FunctionID in later calls your profiler makes back into the CLR, such as GetFunctionInfo2.
+
+As far as your profiler is concerned, a FunctionID is just an opaque number. It has no meaning in itself; it's merely a handle you can pass back into the CLR to refer to the function. Under the covers, however, a FunctionID is actually a pointer to an internal CLR data structure called a MethodDesc. I must warn you again that you cannot rely on this information when coding your profiler. The CLR team reserves the right to change the underlying meaning of a FunctionID to be something radically different in later versions. This info is for entertainment and debugging purposes only!
+
+Ok, so FunctionID = (MethodDesc \*). How does that help you? SOS just so happens to have a command to inspect MethodDescs: !dumpmd. So if you're in a debugger looking at your profiler code that's operating on a FunctionID, it can beneficial to you to find out which function that FunctionID actually refers to. In the example below, the debugger will break in my proifler's JITCompilationStarted callback and look at the FunctionID. It's assumed that you've already loaded SOS as per above.
+
+|
+```
+0:000\> bu UnitTestSampleProfiler!SampleCallbackImpl::JITCompilationStarted
+ 0:000\> g
+ ...
+```
+
+```
+Breakpoint 0 hit
+ eax=00c133f8 ebx=00000000 ecx=10001218 edx=00000001 esi=002fec74 edi=00000000
+ eip=10003fc0 esp=002fec64 ebp=002feca4 iopl=0 nv up ei pl nz na po nc
+ cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00000202
+ UnitTestSampleProfiler!SampleCallbackImpl::JITCompilationStarted:
+ 10003fc0 55 push ebp
+```
+ |
+
+The debugger is now sitting at the beginning of my profiler's JITCompilationStarted callback. Let's take a look at the parameters.
+
+|
+```
+0:000\> dv
+ this = 0x00c133f8
+ **functionID = 0x1e3170**
+ fIsSafeToBlock = 1
+```
+ |
+
+Aha, that's the FunctionID about to get JITted. Now use SOS to see what that function really is.
+
+|
+```
+0:000\> !dumpmd 0x1e3170
+ Method Name: test.Class1.Main(System.String[])
+ Class: 001e1288
+**MethodTable: 001e3180** mdToken: 06000001
+ Module: 001e2d8c
+ IsJitted: no
+ m\_CodeOrIL: ffffffff
+```
+ |
+
+Lots of juicy info here, though the Method Name typically is what helps me the most in my debugging sessions. mdToken tells us the metadata token for this method. MethodTable tells us where another internal CLR data structure is stored that contains information about the class containing the function. In fact, the profiing API's ClassID is simply a MethodTable \*. [Note: the "Class: 001e1288" in the output above is very different from the MethodTable, and thus different from the profiling API's ClassID. Don't let the name fool you!] So we could go and inspect a bit further by dumping information about the MethodTable:
+
+|
+```
+0:000\> !dumpmt 0x001e3180
+ EEClass: 001e1288
+ Module: 001e2d8c
+ Name: test.Class1
+ mdToken: 02000002 (C:\proj\HelloWorld\Class1.exe)
+ BaseSize: 0xc
+ ComponentSize: 0x0
+ Number of IFaces in IFaceMap: 0
+ Slots in VTable: 6
+```
+ |
+
+And of course, !dumpmt can be used anytime you come across a ClassID and want more info on it.
+
+[
+
+Update 12/29/2011
+
+In the original posting, I neglected to mention that there are cases where ClassIDs are not actually MethodTable \*'s, and thus cannot be inspected via !dumpmt. The most common case are some kinds of arrays, though there are other cases as well, such as function pointers, byrefs, and others. In these cases, if you look at the ClassID value in a debugger, you'll see that it's not pointer-aligned. Some of the low-order bits may be intentionally set by the CLR to distinguish these ClassIDs from MethodTable pointers. Although !dumpmt cannot be used on these ClassIDs, you can safely call profiling API methods such as IsArrayClass or GetClassIDInfo(2) on them.
+
+]
+
+## IDs and their Dumpers
+
+Now that you see how this works, you'll need to know how the profiling IDs relate to the various SOS commands that dump info on them:
+
+| **ID** | **Internal CLR Structure** | **SOS command** |
+| AssemblyID | Assembly \* | !DumpAssembly |
+| AppDomainID | AppDomain \* | !DumpDomain |
+| ModuleID | Module \* | !DumpModule |
+| ClassID | MethodTable \* | !DumpMT |
+| ThreadID | Thread \* | !Threads (see note) |
+| FunctionID | MethodDesc \* | !DumpMD |
+| ObjectID | Object \* (i.e., a managed object) | !DumpObject |
+
+Note: !Threads takes no arguments, but simply dumps info on all threads that have ever run managed code. If you use "!Threads -special" you get to see other special threads separated out explicitly, including threads that perform GC in server-mode, the finalizer thread, and the debugger helper thread.
+
+## More Useful SOS Commands
+
+It would probably be quicker to list what _isn't_ useful! I encourage you to do a !help to see what's included. Here's a sampling of what I commonly use:
+
+!u is a nice SOS analog to the windbg command "u". While the latter gives you a no-frills disassembly, !u works nicely for managed code, including spanning the disassembly from start to finish, and converting metadata tokens to names.
+
+!bpmd lets you place a breakpoint on a managed method. Just specify the module name and the fully-qualified method name. For example:
+
+|
+```
+!bpmd MyModule.exe MyNamespace.MyClass.Foo
+```
+ |
+
+If the method hasn't jitted yet, no worries. A "pending" breakpoint is placed. If your profiler performs IL rewriting, then using !bpmd on startup to set a managed breakpoint can be a handy way to break into the debugger just before your instrumented code will run (which, in turn, is typically just after your instrumented code has been jitted). This can help you in reproducing and diagnosing issues your profiler may run into when instrumenting particular functions (due to something interesting about the signature, generics, etc.).
+
+!PrintException: If you use this without arguments you get to see a pretty-printing of the last outstanding managed exception on the thread; or specify a particular Exception object's address.
+
+
+
+Ok, that about does it for SOS. Hopefully this info can help you track down problems a little faster, or better yet, perhaps this can help you step through and verify your code before problems arise.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md
new file mode 100644
index 0000000000..2f65767082
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md
@@ -0,0 +1,13 @@
+*This blog post originally appeared on David Broman's blog on 10/24/2005*
+
+
+In my initial [post](DoStackSnapshot - Exception Filters.md) about DoStackSnapshot, I touched on how and when your profiler can "fill in the holes" by walking the unmanaged parts of the stack itself. Doing this requires that your profiler have access to a register context at the top of the unmanaged block that you can use to begin your walk. So it's quite reasonable for you to ask, "What registers will be valid in the context I receive in my StackSnapshotCallback call?"
+
+The quick answer is that **nonvolatile (i.e., preserved), integer registers** should be valid. You don't really need many registers to walk the stack anyway. Obviously, you want a good stack pointer and instruction pointer. And hey, a frame pointer is handy when you come across an EBP-based frame in x86 (RBP on x64). These are all included in the set, of course. Specifically by architecture, you can trust these fields in your context:
+
+x86: Edi, Esi, Ebx, Ebp, Esp, Eip
+x64: Rdi, Rsi, Rbx, Rbp, Rsp, Rip, R12:R15
+ia64: IntS0:IntS3, RsBSP, StIFS, RsPFS, IntSp, StIIP, StIPSR
+
+
+
diff --git a/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md
new file mode 100644
index 0000000000..60ce221b46
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md
@@ -0,0 +1,44 @@
+*This blog post originally appeared on David Broman's blog on 10/10/2005*
+
+
+Believe it or not, my last (rather large) post on stack walking actually left out several miscellaneous details about using DoStackSnapshot. I'll be posting those details separately. We'll start off with some light reading on exception filters. No deadlocks this time, I promise.
+
+For those of you diehard C# fans, you might be unaware of the existence of exception filters in managed code. While VB.NET makes them explicitly available to the programmer, C# does not. Filters are important to understand when you call DoStackSnapshot, as your results might look a little weird if you don't know how to interpret them.
+
+First, a little background. For the full deal, check out the MSDN Library topic on VB.NET's [try/catch/finally statements](http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vblr7/html/vastmTryCatchFinally.asp). But here's an appetizer. In VB.NET you can do this:
+
+```
+Function Negative() As Boolean
+ Return False
+End Function
+
+Function Positive() As Boolean
+ Return True
+End Function
+
+Sub Thrower
+ Throw New Exception
+End Sub
+
+Sub Main()
+ Try
+ Thrower()
+ Catch ex As Exception When Negative()
+ MsgBox("Negative")
+ Catch ex As Exception When Positive()
+ MsgBox("Positive")
+ End Try
+End Sub
+```
+
+The filters are the things that come after "When". We all know that, when an exception is thrown, its type must match the type specified in a Catch clause in order for that Catch clause to be executed. "When" is a way to further restrict whether a Catch clause will be executed. Now, not only must the exception's type match, but also the When clause must evaluate to True for that Catch clause to be chosen. In the example above, when we run, we'll skip the first Catch clause (because its filter returned False), and execute the second, thus showing a message box with "Positive" in it.
+
+The thing you need to realize about DoStackSnapshot's behavior (indeed, CLR in general) is that the execution of a When clause is really a separate function call. In the above example, imagine we take a stack snapshot while inside Positive(). Our managed-only stack trace, as reported by DoStackSnapshot, would then look like this (stack grows up):
+
+Positive
+Main
+Thrower
+Main
+
+It's that highlighted Main that seems odd at first. While the exception is thrown inside Thrower(), the CLR needs to execute the filter clauses to figure out which Catch wins. These filter executions are actually _function calls_. Since filter clauses don't have their own names, we just use the name of the function containing the filter clause for stack reporting purposes. Thus, the highlighted Main above is the execution of a filter clause located inside Main (in this case, "When Positive()"). When each filter clause completes, we "return" back to Thrower() to continue our search for the filter that returns True. Since this is how the call stack is built up, that's what DoStackSnapshot will report.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md
new file mode 100644
index 0000000000..7d9952ff75
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md
@@ -0,0 +1,35 @@
+*This blog post originally appeared on David Broman's blog on 10/17/2005*
+
+
+Generally, corerror.h tells you all you need to know about what kinds of HRESULTs to expect back from DoStackSnapshot. However, there are some fringe cases where you can get back an HRESULT that's not as descriptive as you might like.
+
+### E\_FAIL
+
+I don't much like E\_FAIL. If DoStackSnapshot fails, you will typically see a more descriptive, custom HRESULT. However, there are regrettably a few ways DoStackSnapshot can fail where you'll see the dreaded E\_FAIL instead. From your code's point of view, you shouldn't assume E\_FAIL will always imply one of the cases below (or conversely that each of these cases will always result in E\_FAIL). But this is just good stuff to know as you develop and debug your profiler, so you don't get blindsided.
+
+1) No managed frames on stack
+
+If you call DoStackSnapshot when there are no managed functions on your target thread's stack, you can get E\_FAIL. For example, if you try to walk the stack of a target thread very early on in its execution, there simply might not be any managed frames there yet. Or, if you try to walk the stack of the finalizer thread while it's waiting to do work, there will certainly be no managed frames on its stack. It's also possible that walking a stack with no managed frames on it will yield S\_OK instead of E\_FAIL (e.g., if the target thread is jit-compiling the first managed function to be called on that thread). Again, your code probably doesn't need to worry about all these cases. If we call your StackSnapshotCallback for a managed frame, you can trust that frame is there. If we don't call your StackSnapshotCallback, you can assume there are no managed frames on the stack.
+
+2) OS kernel handling a hardware exception
+
+This one is less likely to happen, but it certainly can. When an app throws a hardware exception (e.g., divide by 0), the offending thread enters the Windows kernel. The kernel spends some time recording the thread's current user-mode register context, modifying some registers, and moving the instruction pointer to the user-mode exception dispatch routine. At this point the thread is ready to reenter user-mode. But if you are unlucky enough to call DoStackSnapshot while the target thread is still in the kernel doing this stuff, you will get E\_FAIL.
+
+3) Detectably bad seed
+
+If you seed the stack walk with a bogus seed register context, we try to be nice. Before reading memory pointed to by the registers we run some heuristics to ensure all is on the up and up. If we find discrepancies, we will fail the stack walk and return E\_FAIL. If we don't find discrepancies until it's too late and we AV (first-chance), then we'll catch the AV and return E\_UNEXPECTED.
+
+### CORPROF\_E\_STACKSNAPSHOT\_ABORTED
+
+Generally, this HRESULT means that your profiler requested to abort the stack walk in its StackSnapshotCallback. However, you can also see this HRESULT if the CLR aborted the stack walk on your behalf due to a rare scenario on 64 bit architectures.
+
+One of the beautiful things about running 64-bit Windows is that you can get the Windows OS to perform (native) stack walks for you. Read up on [RtlVirtualUnwind](http://msdn.microsoft.com/library/default.asp?url=/library/en-us/debug/base/rtlvirtualunwind.asp) if you're unfamiliar with this. The Windows OS has a critical section to protect a block of memory used to help perform this stack walk. So what would happen if:
+
+- The OS's exception handling code causes a thread to walk its own stack
+- The thread therefore enters this critical section
+- Your profiler (via DoStackSnapshot) suspends this thread while the thread is still inside the critical section
+- DoStackSnapshot uses RtlVirtualUnwind to help walk this suspended thread
+- RtlVirtualUnwind (executing on the current thread) tries to enter the critical section (already owned by suspended target thread)
+
+If your answer was "deadlock", congratulations! DoStackSnapshot has some code that tries to avoid this scenario, by aborting the stack walk before the deadlock can occur. When this happens, DoStackSnapshot will return CORPROF\_E\_STACKSNAPSHOT\_ABORTED. Note that this whole scenario is pretty rare, and only happens on WIN64.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/ELT Hooks - The Basics.md b/Documentation/Profiling/davbr-blog-archive/ELT Hooks - The Basics.md
new file mode 100644
index 0000000000..af5d867ee4
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/ELT Hooks - The Basics.md
@@ -0,0 +1,131 @@
+*This blog post originally appeared on David Broman's blog on 3/22/2007*
+
+
+The CLR Profiling API allows you to hook managed functions so that your profiler is called when a function is entered, returns, or exits via tailcall. We refer to these as Enter/Leave/Tailcall hooks, or “ELT” hooks. In this special multi-part investigative series, I will uncover the truth behind ELT. Today I'll write about some of the basics, NGEN, and a word on what we call "slow-path" vs. "fast-path".
+
+### Setting up the hooks
+
+1. On initialization, your profiler must call SetEnterLeaveFunctionHooks(2) to specify which functions inside your profiler should be called whenever a managed function is entered, returns, or exits via tail call, respectively.
+ _(Profiler calls this…)_
+ ```
+ HRESULT SetEnterLeaveFunctionHooks(
+ [in] FunctionEnter \*pFuncEnter,
+ [in] FunctionLeave \*pFuncLeave,
+ [in] FunctionTailcall \*pFuncTailcall);
+ ```
+
+ _(Profiler implements these…)_
+ ```
+ typedef void FunctionEnter(FunctionID funcID);
+ typedef void FunctionLeave(FunctionID funcID);
+ typedef void FunctionTailcall(FunctionID funcID);
+ ```
+
+ **OR**
+
+ _(Profiler calls this…)_
+ ```
+ HRESULT SetEnterLeaveFunctionHooks2(
+ [in] FunctionEnter2 *pFuncEnter,
+ [in] FunctionLeave2 *pFuncLeave,
+ [in] FunctionTailcall2 *pFuncTailcall);
+ ```
+
+
+ _(Profiler implements these…)_
+ ```
+ typedef void FunctionEnter2(
+ FunctionID funcId,
+ UINT_PTR clientData,
+ COR_PRF_FRAME_INFO func,
+ COR_PRF_FUNCTION_ARGUMENT_INFO *argumentInfo);
+
+ typedef void FunctionLeave2(
+ FunctionID funcId,
+ UINT_PTR clientData,
+ COR_PRF_FRAME_INFO func,
+ COR_PRF_FUNCTION_ARGUMENT_RANGE *retvalRange);
+
+ typedef void FunctionTailcall2(
+ FunctionID funcId,
+ UINT_PTR clientData,
+ COR_PRF_FRAME_INFO func);
+ ```
+
+ This step alone does not cause the enter/leave/tailcall (ELT) hooks to be called. But you must do this on startup to get things rolling.
+
+2. At any time during the run, your profiler calls SetEventMask specifying COR\_PRF\_MONITOR\_ENTERLEAVE in the bitmask. Your profiler may set or reset this flag at any time to cause ELT hooks to be called or ignored, respectively.
+
+### FunctionIDMapper
+
+In addition to the above two steps, your profiler may specify more granularly which managed functions should have ELT hooks compiled into them:
+
+1. At any time, your profiler may call ICorProfilerInfo2::SetFunctionIDMapper to specify a special hook to be called when a function is JITted.
+
+_(Profiler calls this…)_
+```
+ HRESULT SetFunctionIDMapper([in] FunctionIDMapper \*pFunc);
+```
+
+
+ _(Profiler implements this…)_
+```
+typedef UINT_PTR __stdcall FunctionIDMapper(
+ FunctionID funcId,
+ BOOL *pbHookFunction);
+```
+
+
+2. When FunctionIDMapper is called:
+ a. Your profiler sets the pbHookFunction [out] parameter appropriately to determine whether the function identified by funcId should have ELT hooks compiled into it.
+ b. Of course, the primary purpose of FunctionIDMapper is to allow your profiler to specify an alternate ID for that function. Your profiler does this by returning that ID from FunctionIDMapper . The CLR will pass this alternate ID to your ELT hooks (as funcID if you're using the 1.x ELT, and as clientData if you're using the 2.x ELT).
+
+### Writing your ELT hooks
+
+You may have noticed that corprof.idl warns that your implementations of these hooks must be \_\_declspec(naked), and that you've got to save registers you use. Yikes! This keeps things nice and efficient on the CLR code generation side, but at the expense of making life a little more difficult for profilers. For great low-level details of writing the hooks (including yummy sample code!) visit Jonathan Keljo's blog entry [here](http://blogs.msdn.com/jkeljo/archive/2005/08/11/450506.aspx).
+
+### NGEN /Profile
+
+The profiling API makes use of the fact that it can control the JITting of functions to enable features like ELT hooks. When managed code is NGENd, however, this assumption goes out the door. Managed code is already compiled before the process is run, so there’s no opportunity for the CLR to bake in calls to ELT hooks.
+
+The solution is “NGEN /Profile”. For example, if you run this command against your assembly:
+
+`ngen install MyAssembly.dll /Profile`
+
+
+
+it will NGEN MyAssembly.dll with the “Profile” flavor (also called “profiler-enhanced”). This flavor causes extra hooks to be baked in to enable features like ELT hooks, loader callbacks, managed/unmanaged code transition callbacks, and the JITCachedFunctionSearchStarted/Finished callbacks.
+
+The original NGENd versions of all your assemblies still stay around in your NGEN cache. NGEN /Profile simply causes a new set of NGENd assemblies to be generated as well, marked as the “profiler-enhanced” set of NGENd assemblies. At run-time, the CLR determines which flavor should be loaded. If a profiler is attached and enables certain features that only work with profiler-enhanced (not regular) NGENd assemblies (such as ELT via a call to SetEnterLeaveFunctionHooks(2), or any of several other features that are requested by setting particular event flags via SetEventMask), then the CLR will only load profiler-enhanced NGENd images--and if none exist then the CLR degrades to JIT in order to support the features requested by the profiler. In contrast, if the profiler does not specify such event flags, or there is no profiler to begin with, then the CLR loads the regular-flavored NGENd assemblies.
+
+So how does NGEN /Profile make ELT hooks work? Well, in a profiler-enhanced NGEN module, each function gets compiled with calls at enter, leave, and tailcall time to a thunk. At run-time, the CLR decides what this thunk does. Either nothing (if no profiler requested ELT hooks), or jmp to the profiler's ELT hook. For example, if a profiler is loaded, requesting ELT notifications, and the CPU is executing near the top of a function inside a profiler-enhanced NGEN module, the disassembly will look something like this:
+
+ `5bcfb8b0 call mscorwks!JIT_Writeable_Thunks_Buf+0x1b8 (5d8401d8)`
+
+And where's the target of that call? Right here:
+
+ `5d8401d8 jmp UnitTestSampleProfiler!Enter2Naked (023136b0)`
+
+As you may have guessed, I happen to have a profiler named "UnitTestSampleProfiler" loaded and responding to ELT notifications, so that thunk will jmp right into my Enter2 hook. When I return from my hook, control goes right back to the managed function that called the thunk.
+
+### Fast-path vs. Slow-path
+
+There are two paths the CLR might take to get to your ELT hooks: fast & slow. Fast means the JIT inserts a call from the JITted function directly into the profiler. (In profiler-enhanced NGEN modules, this translates to the thunk jumping directly to your ELT hook.) Slow means that some fixup must be done before control can be passed to your profiler, so the JIT inserts a call from the JITted function into helper functions in the CLR to do the fixup and finally forward the call to your profiler. (Or, in NGEN-land, the thunks jmp to those CLR helper functions.)
+
+There are also two supported signatures for the ELT hooks: CLR 1.x (set via SetEnterLeaveFunctionHooks) and CLR 2.x-style (set via SetEnterLeaveFunctionHooks **2** ).
+
+If your profiler requests 1.x ELT hooks, then slow-path is used for them all, end of story.
+
+If your profiler requests 2.x ELT hooks, then slow-path is used for them all if any of the following event flags were set by your profiler:
+
+- COR\_PRF\_ENABLE\_STACK\_SNAPSHOT: “Slow” ensures that the CLR has an opportunity to do some housekeeping on the stack before your profiler is called so that if your profiler calls DoStackSnapshot from within the ELT hook, then the stack walk will have a marker to begin from.
+- COR\_PRF\_ENABLE\_FUNCTION\_ARGS: “Slow” gives the CLR an opportunity to gather the function’s arguments on the stack for passing to the profiler’s enter hook.
+- COR\_PRF\_ENABLE\_FUNCTION\_RETVAL: “Slow” gives the CLR an opportunity to gather the function’s return value on the stack for passing to your profiler’s leave hook.
+- COR\_PRF\_ENABLE\_FRAME\_INFO: “Slow” gives the CLR an opportunity to gather generics information into a COR\_PRF\_FRAME\_INFO parameter to pass to your profiler.
+
+Why do you care? Well, it's always good to know what price you're paying. If you don't need any of the features above, then you're best off not specifying those flags. Because then you'll see better performance as the managed code may call directly into your profiler without any gunk going on in the middle. Also, this information gives you some incentive to upgrade your profiler's old 1.x ELT hooks to the hip, new 2.x ELT style. Since 1.x ELT hooks always go through the slow path (so the CLR has an opportunity to rearrange the parameters to fit the old 1.x prototype before calling your profiler), you're better off using the 2.x style.
+
+### Next time...
+
+That about covers it for the ELT basics. Next installment of this riveting series will talk about that enigma known as tailcall.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/ELT Hooks - tail calls.md b/Documentation/Profiling/davbr-blog-archive/ELT Hooks - tail calls.md
new file mode 100644
index 0000000000..ef41fad34a
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/ELT Hooks - tail calls.md
@@ -0,0 +1,390 @@
+*This blog post originally appeared on David Broman's blog on 6/20/2007*
+
+
+For most people the idea of entering or returning from a function seems straightforward. Your profiler's Enter hook is called at the beginning of a function, and its Leave hook is called just before the function returns. But the idea of a tail call and exactly what that means for the Profiling API is less straightforward.
+
+In [Part 1](ELT Hooks - The Basics.md) I talked about the basics of the Enter / Leave / Tailcall hooks and generally how they work. You may want to review that post first if you haven't seen it yet. This post builds on that one by talking exclusively about the Tailcall hook, how it works, and what profilers should do inside their Tailcall hooks.
+
+## Tail calling in general
+
+Tail calling is a compiler optimization that saves execution of instructions and saves reads and writes of stack memory. When the last thing a function does is call another function (and other conditions are favorable), the compiler may consider implementing that call as a tail call, instead of a regular call.
+
+```
+static public void Main()
+{
+ Helper();
+}
+
+static public void Helper()
+{
+ One();
+ Two();
+ Three();
+}
+
+static public void Three()
+{
+ ...
+}
+```
+
+In the code above, the compiler may consider implementing the call from Helper() to Three() as a tail call. What does that mean, and why would that optimize anything? Well, imagine this is compiled without a tail call optimization. By the time Three() is called, the stack looks like this (my stacks grow UP):
+
+```
+Three
+Helper
+Main
+```
+
+Each of those functions causes a separate frame to be allocated on the stack. All the usual contents of a frame, including locals, parameters, the return address, saved registers, etc., get stored in each of those frames. And when each of those functions returns, the return address is read from the frame, and the stack pointer is adjusted to collapse the frame of the returning function. That's just the usual overhead associated with making a function call.
+
+Now, if the call from Helper() to Three() were implemented as a tail call, we'd avoid that overhead, and Three() would just "reuse" the stack frame that had been set up for Helper(). While Three() is executing, the call stack would look like this:
+
+```
+Three
+Main
+```
+
+And when Three() returns, it returns directly to Main() without popping back through Helper() first.
+
+Folks who live in functional programming languages like Scheme use recursion at least as often as C++ or C# folks use while and for loops. Such functional programming languages depend on tail call optimizations (in particular tail recursion) to avoid overflowing the stack. While imperative languages like C++ or C# don't have such a vital need for tail call optimizations, it's still pretty handy as it reduces the number of instructions executed and the writes to the stack. Also, it's worth noting that the amount of stack space used for a single frame can be more than you'd expect. For example, in CLR x64, each regular call (without the tail call optimization) uses a minimum of 48 bytes of stack space, even if it takes no arguments, has no locals, and returns nothing. So for small functions, the tail call optimization can provide a significant overhead reduction in terms of stack space.
+
+## The CLR and tail calls
+
+When you're dealing with languages managed by the CLR, there are two kinds of compilers in play. There's the compiler that goes from your language's source code down to IL (C# developers know this as csc.exe), and then there's the compiler that goes from IL to native code (the JIT 32/64 bit compilers that are invoked at run time or NGEN time). Both the source-\>IL and IL-\>native compilers understand the tail call optimization. But the IL-\>native compiler--which I'll just refer to as JIT--has the final say on whether the tail call optimization will ultimately be used. The source-\>IL compiler can help to generate IL that is conducive to making tail calls, including the use of the "tail." IL prefix (more on that later). In this way, the source-\>IL compiler can structure the IL it generates to persuade the JIT into making a tail call. But the JIT always has the option to do whatever it wants.
+
+### When does the JIT make tail calls?
+
+I asked Fei Chen and [Grant Richins](http://blogs.msdn.com/grantri), neighbors down the hall from me who happen to work on the JIT, under what conditions the various JITs will employ the tail call optimization. The full answer is rather detailed. The quick summary is that the JITs try to use the tail call optimization whenever they can, but there are lots of reasons why the tail call optimization can't be used. Some reasons why tail calling is a non-option:
+
+- Caller doesn't return immediately after the call (duh :-))
+- Stack arguments between caller and callee are incompatible in a way that would require shifting things around in the caller's frame before the callee could execute
+- Caller and callee return different types
+- We inline the call instead (inlining is way better than tail calling, and opens the door to many more optimizations)
+- Security gets in the way
+- The debugger / profiler turned off JIT optimizations
+
+[Here](Tail call JIT conditions.md) are their full, detailed answers.
+
+_Note that how the JIT decides whether to use the tail calling optimization is an implementation detail that is prone to change at whim. **You must not take dependencies on this behavior**. Use this information for your own personal entertainment only._
+
+## Your Profiler's Tailcall hook
+
+I'm assuming you've already read through [Part 1](ELT Hooks - The Basics.md) and are familiar with how your profiler sets up its Enter/Leave/Tailcall hooks, so I'm not repeating any of those details here. I will focus on what kind of code you will typically want to place inside your Tailcall hook:
+
+```
+typedef void FunctionTailcall2(
+ FunctionID funcId,
+ UINT_PTR clientData,
+ COR_PRF_FRAME_INFO func);
+```
+
+**Tip** : More than once I've seen profiler writers make the following mistake. They will take their naked assembly-language wrapper for their Enter2 and Leave2 hooks, and paste it again to use as the Tailcall2 assembly-language wrapper. The problem is they forget that the Tailcall2 hook takes a different number of parameters than the Enter2 / Leave2 hooks (or, more to the point, a different number of _bytes_ is passed on the stack to invoke the Tailcall2 hook). So, they'll take the "ret 16" at the end of their Enter2/Leave2 hook wrappers and stick that into their Tailcall2 hook wrapper, forgetting to change it to a "ret 12". Don't make the same mistake!
+
+It's worth noting what these parameters mean. With the Enter and Leave hooks it's pretty obvious that the parameters your hook is given (e.g., funcId) apply to the function being Entered or Left. But what about the Tailcall hook? Do the Tailcall hook's parameters describe the caller (function making the tail call) or the callee (function being tail called into)?
+
+Answer: the parameters refer to tail call **er**.
+
+The way I remember it is that the Tailcall hook is like an "Alternative Leave" hook. A function ends either by returning (in which case the CLR invokes your Leave hook) or a function ends by tail calling out to somewhere else (in which case the CLR invokes your Tailcall hook). In either case (Leave hook or Tailcall hook) the hook's parameters tell you about the function that's _ending_. If a function happens to end by making a tail call, your profiler is not told the target of that tail call. (The astute reader will realize that actually your profiler _is_ told what the target of the tail call is--you need only wait until your Enter hook is called next, and that function will be the tail call target, or "tail callee". (Well, actually, this is true most of the time, but not all! (More on that later, but consider this confusing, nested series of afterthoughts a hint to a question I pose further down in this post.)))
+
+Did you just count the number of closing parentheses to ensure I got it right? If so, I'd like to make fun of you but I won't--I'd have counted the parentheses, too. My house is glass.
+
+Ok, enough dilly-dallying. What should your profiler do in its Tailcall hook? Two of the more common reasons profilers use Enter/Leave/Tailcall hooks in the first place is to keep **shadow stacks** or to maintain **call traces** (sometimes with timing information).
+
+### Shadow stacks
+
+The [CLRProfiler](http://www.microsoft.com/downloads/details.aspx?FamilyID=a362781c-3870-43be-8926-862b40aa0cd0&DisplayLang=en) is a great example of using Enter/Leave/Tailcall hooks to maintain shadow stacks. A shadow stack is your profiler's own copy of the current stack of function calls on a given thread at any given time. Upon Enter of a function, you push that FunctionID (and whatever other info interests you, such as arguments) onto your data structure that represents that thread's stack. Upon Leave of a function, you pop that FunctionID. This gives you a live list of managed calls in play on the thread. The CLRProfiler uses shadow stacks so that whenever the managed app being profiled chooses to allocate a new object, the CLRProfiler can know the managed call stack that led to the allocation. (Note that an alternate way of accomplishing this would be to call DoStackSnapshot at every allocation point instead of maintaining a shadow stack. Since objects are allocated so frequently, however, you'd end up calling DoStackSnapshot extremely frequently and will often see worse performance than if you had been maintaining shadow stacks in the first place.)
+
+
+
+OK, so when your profiler maintains a shadow stack, it's clear what your profiler should do on Enter or Leave, but what should it do on Tailcall? There are a couple ways one could imagine answering that question, but only one of them will work! Taking the example from the top of this post, imagine the stack looks like this:
+
+```
+Helper
+Main
+```
+
+and Helper is about to make a tail call into Three(). What should your profiler do?
+
+Method 1: On tailcall, pop the last FunctionID. (In other words, treat Tailcall just like Leave.)
+
+So, in this example, when Helper() calls Three(), we'd pop Helper(). As soon as Three() is called, our profiler would receive an Enter for Three(), and our shadow stack would look like this:
+
+```
+Three
+Main
+```
+
+This approach mirrors reality, because this is what the actual physical stack will look like. Indeed, if one attaches a debugger to a live process, and breaks in while the process is inside a tail call, the debugger will show a call stack just like this, where you see the tail callee, but not the tail caller. However, it might be a little confusing to a user of your profiler who looks at his source code and sees that Helper() (not Main()) calls Three(). He may have no idea that when Helper() called Three(), the JIT chose to turn that into a tail call. In fact, your user may not even know what a tail call is. You might therefore be tempted to try this instead:
+
+Method 2: On tailcall, "mark" the FunctionID at the top of your stack as needing a "deferred pop" when its callee is popped, but don't pop yet.
+
+With this strategy, for the duration of the call to Three(), the shadow stack will look like this:
+
+Three
+Helper (marked for deferred pop)
+Main
+
+which some might consider more user-friendly. And as soon as Three() returns, your profiler will sneakily do a double-pop leaving just this:
+
+```
+Main
+```
+
+So which should your profiler use: Method 1 or Method 2? Before I answer, take some time to think about this, invoking that hint I cryptically placed above in nested parentheses. And no, the fact that the parentheses were nested is not part of the actual hint.
+
+The answer: Method 1. In principle, either method should be fine. However, the behavior of the CLR under certain circumstances will break Method 2. Those "certain circumstances" are what I alluded to when I mentioned "this is true most of the time, but not all" above. These mysterious "certain circumstances" involve a managed function tail calling into a native helper function inside the runtime. Here's an example:
+```
+static public void Main()
+{
+ Thread.Sleep(44);
+ Helper();
+}
+```
+
+It just so happens that the implementation of Thread.Sleep makes a call into a native helper function in the bowels of the runtime. And that call happens to be the last thing Thread.Sleep does. So the JIT may helpfully optimize that call into a tail call. Here are the hook calls your profiler will see in this case:
+
+```
+(1) Enter (into Main)
+(2) Enter (into Thread.Sleep)
+(3) Tailcall (from Thread.Sleep)
+(4) Enter (into Helper)
+(5) Leave (from Helper)
+(6) Leave (from Main)
+```
+
+Note that after you get a Tailcall telling you that Thread.Sleep is done, (in (3)), the very next Enter you get (in (4)) is NOT the Enter for the function being tail called. This is because the CLR only provides Enter/Leave/Tailcall hooks for _managed_ functions, and the very next managed function being entered is Helper(). So, how will Method 1 and Method 2 fare in this example?
+
+Method 1: Shadow stack works
+
+By popping on every Tailcall hook, your shadow stack stays up to date.
+
+Method 2: Shadow stack fails
+
+At stage (4), the shadow stack looks like this:
+
+Helper
+Thread.Sleep (marked for "deferred pop")
+Main
+
+If you think it might be complicated to explain tail calls to your users so they can understand the Method 1 form of shadow stack presentation, just try explaining why it makes sense to present to them that Thread.Sleep() is calling Helper()!
+
+And of course, this can get arbitrarily nasty:
+
+```
+static public void Main()
+{
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+ Helper();
+}
+```
+
+would yield:
+```
+Helper
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Main
+```
+
+And things get more complicated if you start to think about when you actually pop a frame marked for "deferred pop". In all the above examples, you would do so as soon as the frame above it gets popped. So once Helper() is popped (due to Leave()), you'd cascade-pop all the Thread.Sleeps. But what if there is no frame above the frames marked for "deferred pop"? To wit:
+
+```
+static public void Main()
+{
+ Helper()
+}
+
+static public void Helper()
+{
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+ Thread.Sleep(44);
+}
+```
+
+would yield:
+```
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Thread.Sleep (marked for "deferred pop")
+Helper
+Main
+```
+
+until you get a Leave hook for Helper(). At this point, you need to pop Helper() from your shadow stack, but he's not at the top-- he's buried under all your "deferred pop" frames. So your profiler would need to perform the deferred pops if a frame above OR below them gets popped. Hopefully, the yuckiness of this implementation will scare you straight. But the confusion of presenting crazy stacks to the user is the real reason to abandon Method 2 and go with Method 1.
+
+### Call tracing
+
+The important lesson to learn from the above section is that sometimes a Tailcall hook will match up with the next Enter hook (i.e., the tail call you're notified of in your Tailcall hook will have as its callee the very function you're notified of in the next Enter hook), and sometimes the Tailcall hook will NOT match with the next Enter hook (in particular when the Tailcall hook refers to a tail call into a native helper in the runtime). And the sad fact is that the Enter/Leave/Tailcall hook design does not currently allow you to predict whether a Tailcall will match the next Enter.
+
+As an illustration, consider two simple tail call examples:
+
+**Matching Example**
+
+```
+static public void Main()
+{
+ One();
+ ...(other code here)...
+}
+
+static public void One()
+{
+ Two();
+}
+```
+
+**Non-matching Example**
+
+```
+static public void Main()
+{
+ Thread.Sleep(44);
+ Two();
+}
+```
+
+In either case, your profiler will see the following hook calls
+
+```
+(1) Enter (into Main)
+(2) Enter (into One / Thread.Sleep)
+(3) Tailcall (from One / Thread.Sleep)
+(4) Enter (into Two)
+...
+```
+
+In the first example, (3) and (4) match (i.e., the tail call really does call into Two()). But in the second example, they do not (the tail call does NOT call into Two()).
+
+Since you don't know when Tailcall will match the next Enter, your implementation of call tracing, like shadow stack maintenance, must treat a Tailcall hook just like a Leave. If you're logging when functions begin and end, potentially with the amount of time spent inside the function, then your Tailcall hook should basically do the same thing as your Leave hook. A call to your Tailcall hook indicates that the specified function is over and done with, just like a call to your Leave hook.
+
+As with shadow stacks, this will sometimes lead to call graphs that could be confusing. "Matching Example" had One tail call Two, but your graph will look like this:
+
+```
+Main
+|
+|-- One
+|-- Two
+```
+
+But at least this effect is explainable to your users, and is self-correcting after the tail call is complete, while yielding graphs that are consistent with your timing measurements. If instead you try to outsmart this situation and assume Tailcalls match the following Enter, the errors can snowball into incomprehensible graphs (see the nasty examples from the shadow stack section above).
+
+### How often does this happen?
+
+So when does a managed function in the .NET Framework tail call into a native helper function inside the CLR? In the grand scheme of things, not a lot. But it's a pretty random and fragile list that depends on which JIT is in use (x86, x64, ia64), and can easily change as parts of the runtime are rev'd, or even as JIT compilation flags are modified by debuggers, profilers, and other environmental factors while a process is active. So you should not try to guess this list and make dependencies on it.
+
+### Can't I just turn tail calling off?!
+
+If all this confusion is getting you down, you might be tempted to just avoid the problem in the first place. And yes, there is a way to do so, but I wouldn't recommend it in general. If you call SetEventMask, specifying COR\_PRF\_DISABLE\_OPTIMIZATIONS inside your mask, that will tell the JIT to turn off the tail call optimization. But the JIT will also turn off ALL optimizations. Profilers that shouldn't perturb the behavior of the app should definitely _not_ do this, as the code generation will be very different.
+
+## Watching CLR tail calls in action
+
+If you're writing a profiler with Enter/Leave/Tailcall hooks, you'll want to make sure you exercise all your hooks so they're properly tested. It's easy enough to make sure your Enter/Leave hooks are called--just make sure the test app your profiler runs against has a Main()! But how to make sure your Tailcall hook is called?
+
+The surest way is to have a simple managed app that includes an obvious tail call candidate, and make sure the "tail." IL prefix is in place. You can use ilasm / ildasm to help build such an assembly. Here's an example I tried on x86 using C#.
+
+Start with some simple code that makes a call that should easily be optimized into a tail call:
+
+```
+using System;
+class Class1
+{
+ static int Main(string[] args)
+ {
+ return Helper(4);
+ }
+
+ static int Helper(int i)
+ {
+ Random r = new Random();
+ i = (i / 1000) + r.Next();
+ i = (i / 1000) + r.Next();
+ return MakeThisATailcall(i);
+ }
+
+ static int MakeThisATailcall(int i)
+ {
+ Random r = new Random();
+ i = (i / 1000) + r.Next();
+ i = (i / 1000) + r.Next();
+ return i;
+ }
+}
+```
+
+You'll notice there's some extra gunk, like calls to Random.Next(), to make the functions big enough that the JIT won't inline them. There are other ways to avoid inlining (including from the profiling API itself), but padding your test functions is one of the easier ways to get started without impacting the code generation of the entire process. Now, compile that C# code into an IL assembly:
+
+```
+csc /o+ Class1.cs
+```
+
+(If you're wondering why I specified /o+, I've found that if I _don't_, then suboptimal IL gets generated, and some extraneous instructions appear inside Helper between the call to MakeThisATailcall and the return from Helper. Those extra instructions would prevent the JIT from making a tail call.)
+
+Run ildasm to get at the generated IL
+
+```
+ildasm Class1.exe
+```
+
+Inside ildasm, use File.Dump to generate a text file that contains a textual representation of the IL from Class1.exe. Call it Class1WithTail.il. Open up that file and add the tail. prefix just before the call you want optimized into a tail call (see highlighted yellow for changes):
+
+```
+.method private hidebysig static int32
+ Helper(int32 i) cil managed
+ {
+~~// Code size 45 (0x2d)
+~~ // Code size 46 (0x2e)
+ .maxstack 2
+ .locals init (class [mscorlib]System.Random V_0)
+ IL_0000: newobj instance void [mscorlib]System.Random::.ctor()
+ IL_0005: stloc.0
+ IL_0006: ldarg.0
+ IL_0007: ldc.i4 0x3e8
+ IL_000c: div
+ IL_000d: ldloc.0
+ IL_000e: callvirt instance int32 [mscorlib]System.Random::Next()
+ IL_0013: add
+ IL_0014: starg.s i
+ IL_0016: ldarg.0
+ IL_0017: ldc.i4 0x3e8
+ IL_001c: div
+ IL_001d: ldloc.0
+ IL_001e: callvirt instance int32 [mscorlib]System.Random::Next()
+ IL_0023: add
+ IL_0024: starg.s i
+ IL_0026: ldarg.0
+~~IL_0027: call int32 Class1::MakeThisATailcall(int32)
+ IL_002c: ret
+~~ IL_0027: tail.
+ IL_0028: call int32 Class1::MakeThisATailcall(int32)
+ IL_002d: ret
+ } // end of method Class1::Helper
+```
+
+Now you can use ilasm to recompile your modified textual IL back into an executable assembly.
+
+```
+ilasm /debug=opt Class1WithTail.il
+```
+
+You now have Class1WithTail.exe that you can run! Hook up your profiler and step through your Tailcall hook.
+
+## You Can Wake Up Now
+
+If you didn't learn anything, I hope you at least got some refreshing sleep thanks to this post. Here's a quick recap of what I wrote while you were napping:
+
+- If the last thing a function does is call another function, that call may be optimized into a simple jump (i.e., "tail call"). Tail calling is an optimization to save the time of stack manipulation and the space of generating an extra call frame.
+- In the CLR, the JIT has the final say on when it employs the tail call optimization. The JIT does this whenever it can, except for a huge list of exceptions. Note that the x86, x64, and ia64 JITs are all different, and you'll see different behavior on when they'll use the tail call optimizations.
+- Since some managed functions may tail call into native helper functions inside the CLR (for which you won't get an Enter hook notification), your Tailcall hook should treat the tail call as if it were a Leave, and not depend on the next Enter hook correlating to the target of the last tail call. With shadow stacks, for example, this means you should simply pop the calling function off your shadow stack in your Tailcall hook.
+- Since tail calls can be elusive to find in practice, it's well worth your while to use ildasm/ilasm to manufacture explicit tail calls so you can step through your Tailcall hook and test its logic.
+
+_David has been a developer at Microsoft for over 70 years (allowing for his upcoming time-displacement correction). He joined Microsoft in 2079, first starting in the experimental time-travel group. His current assignment is to apply his knowledge of the future to eliminate the "Wait for V3" effect customers commonly experience in his source universe. By using Retroactive Hindsight-ellisenseTM his goal is to "get it right the first time, this time" in a variety of product groups._
+
diff --git a/Documentation/Profiling/davbr-blog-archive/GC Heap and Alignment Padding.md b/Documentation/Profiling/davbr-blog-archive/GC Heap and Alignment Padding.md
new file mode 100644
index 0000000000..79d4527104
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/GC Heap and Alignment Padding.md
@@ -0,0 +1,12 @@
+*This blog post originally appeared on David Broman's blog on 12/29/2011*
+
+
+The docs for [GetObjectSize](http://msdn.microsoft.com/en-US/library/ms231885(v=VS.100).aspx) have recently been updated with this info, but I wanted to mention it here, too, to ensure you were aware of this information.
+
+Some profilers manually advance through objects on the heap, inspecting their field values, by starting at an ObjectID and moving forward by its size to the next ObjectID, repeating this process, for all the reported generation ranges (via GetGenerationBounds or MovedReferences/SurvivingReferences). If your profiler doesn’t do this, then this blog entry will be of no interest to you, and you can skip it. But if your profiler does do this, you need to be aware of the alignment rules that the CLR employs as it allocates and moves objects around on the GC heap.
+
+- **On x86** : All objects are 4-byte aligned, except for objects on the large-object-heap, which are always 8-byte aligned.
+- **On x64** : All objects are always 8-byte aligned, in all generations.
+
+And the important point to note is that GetObjectSize does NOT include alignment padding in the size that it reports. Thus, as your profiler manually skips from object to object by using GetObjectSize() to determine how far to skip, your profiler must manually add in any alignment padding necessary to achieve the alignment rules listed above.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Generics and Your Profiler.md b/Documentation/Profiling/davbr-blog-archive/Generics and Your Profiler.md
new file mode 100644
index 0000000000..21c748d986
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Generics and Your Profiler.md
@@ -0,0 +1,133 @@
+*This blog post originally appeared on David Broman's blog on 1/28/2010*
+
+
+If you’re writing a profiler that you expect to run against CLR 2.0 or greater, you probably care about generics. Whether you're reporting call stacks or instrumenting code, it's possible the users of your profiler wrote some of that code using generic types. And if not, it's still quite likely they used generic types from libraries they depend on, such as those that ship with the .NET Framework. Reporting as much detail as you can, such as which type arguments were used to instantiate a generic type that appears in a call stack, can help your users diagnose their problems more effectively.
+
+## Terminology
+
+Let's say a C# developer writes code like this:
+
+
+```
+class MyClass<S>
+{
+ static string Foo<T>(S instanceOfS, T instanceOfT)
+ {
+ return instanceOfS.ToString() + instanceOfT.ToString();
+ }
+}
+```
+
+Here we have a generic function, MyClass\<S\>.Foo\<T\>. Let's say the developer instantiated MyClass & Foo by making the following function call:
+
+```
+MyClass<int>.Foo<float>(4, 8.8);
+```
+
+It's important to distinguish between **function** arguments and **type** arguments. The function arguments are the dudes inside the parentheses—4 and 8.8 in the example above. Type arguments are the things you find inside the angle brackets \<\>. Foo is given one type argument, float. Foo belongs to class MyClass, which itself is given the type argument, int.
+
+It’s worth spending a bit of time thinking about this. When one sees the term “type arguments”, one might mistake that for “argument types”, or “types of the function arguments”, which in the above case would be int _and_ float, since the function takes two function arguments. But this is not what I mean by “type argument”. A “type argument” is what the developer provides in place of a generic type parameter that sits inside the angle brackets. This is irrespective of what function arguments are passed to the function. For example the generic function Alloc\<U\>:
+
+```
+U Alloc<U>() { return new U(); }
+```
+
+takes no function arguments at all, but it still requires a type argument (for the “U”) in order to be instantiated.
+
+## GetFunctionInfo2
+
+So if you were to get the FunctionID for MyClass\<int\>.Foo\<float\>, and you passed that FunctionID to GetFunctionInfo2, what should you get back in the [out] parameters?
+
+```
+HRESULT GetFunctionInfo2([in] FunctionID funcId,
+ [in] COR_PRF_FRAME_INFO frameInfo,
+ [out] ClassID *pClassId,
+ [out] ModuleID *pModuleId,
+ [out] mdToken *pToken,
+ [in] ULONG32 cTypeArgs,
+ [out] ULONG32 *pcTypeArgs,
+ [out] ClassID typeArgs[]);
+```
+
+\*pClassId: This will be the ClassID for the instantiated MyClass\<int\>. More on this later.
+
+\*pModuleId: module defining the mdMethodDef token returned (see next parameter). If funcId is a generic function defined in one module, its instantiating type arguments are defined in other modules, and the function is instantiated and called from yet another module, this parameter will always tell you that first module—the one containing the original definition of the generic function (i.e., funcId’s mdMethodDef).
+
+\*pToken: This is the metadata token (mdMethodDef) for MyClass\<S\>.Foo\<T\>. Note that you get the same mdMethodDef for any conceivable instantiation of a generic method.
+
+typeArgs[]: This is the array of **type arguments** to MyClass\<int\>.Foo\<float\>. So this will be an array of only one element: the ClassID for float. (The int in MyClass\<int\> is a type argument to MyClass, not to Foo, and you would only see that when you call GetClassIDInfo2 with MyClass\<int\>.)
+
+##
+
+## GetClassIDInfo2
+
+OK, someone in parentheses said something about calling GetClassIDInfo2, so let’s do that. Since we got the ClassID for MyClass\<int\> above, let’s pass it to GetClassIDInfo2 to see what we get:
+
+```
+HRESULT GetClassIDInfo2([in] ClassID classId,
+ [out] ModuleID *pModuleId,
+ [out] mdTypeDef *pTypeDefToken,
+ [out] ClassID *pParentClassId,
+ [in] ULONG32 cNumTypeArgs,
+ [out] ULONG32 *pcNumTypeArgs,
+ [out] ClassID typeArgs[]);
+```
+
+\*pModuleId: module defining the mdTypeDef token returned (see next parameter). If classId is a generic class defined in one module, its instantiating type arguments are defined in other modules, and the class is instantiated in yet another module, this parameter will always tell you that first module—the one containing the definition of the generic class (i.e., classId’s mdTypeDef).
+
+\*pTypeDefToken: This is the metadata token (mdTypeDef) for MyClass\<S\>. As with the mdMethodDef in the previous section, you’ll get the same mdTypeDef for any conceivable instantiation of MyClass\<S\>.
+
+\*pParentClassId: As with any class, this [out] parameter will tell you the base class. If the base class itself were a generic class, then this would be the ClassID for the fully instantiated base class. You could then use GetClassIDInfo2 on \*pParentClassId to determine its generic type arguments.
+
+typeArgs: This is the array of type arguments used to instantiate classId, which in the above example is MyClass\<int\>. So in this example, typeArgs will be an array of only one element: the ClassID for int.
+
+## COR\_PRF\_FRAME\_INFO
+
+You may have noticed I ignored this parameter in my description of GetFunctionInfo2. You can pass NULL if you want, and nothing really bad will happen to you, but you’ll often get some incomplete results: you won’t get very useful typeArgs coming back, and you’ll often see NULL returned in \*pClassId.
+
+To understand why, it’s necessary to understand an internal optimization the CLR uses around sharing code for generics: If two instantiations of the same generic function would result in identical JITted code, then why not have them share one copy of that code? The CLR chooses to share code if all of the type parameters are instantiated with reference types. If you want to read more about this, [here’s](http://blogs.msdn.com/carlos/archive/2009/11/09/net-generics-and-code-bloat-or-its-lack-thereof.aspx) a place to go.
+
+For now, the important point is that, once we’re inside JITted code that is shared across different generic instantiations, how can one know which instantiation is the actual one that caused the current invocation? Well, in many cases, the CLR may not have that data readily lying around. However, as a profiler, you can capture this information and pass it back to the CLR when it needs it. This is done through a COR\_PRF\_FRAME\_INFO. There are two ways your profiler can get a COR\_PRF\_FRAME\_INFO:
+
+1. Via slow-path Enter/Leave/Tailcall probes
+2. Via your DoStackSnapshot callback
+
+I lied. #1 is really the only way for your profiler to get a COR\_PRF\_FRAME\_INFO. #2 may seem like a way—at least the profiling API suggests that the CLR gives your profiler a COR\_PRF\_FRAME\_INFO in the DSS callback—but unfortunately the COR\_PRF\_FRAME\_INFO you get there is pretty useless. I suspect the COR\_PRF\_FRAME\_INFO parameter was added to the signature of the profiler’s DSS callback function so that it could “light up” at some point in the future when we could work on finding out how to create a sufficiently helpful COR\_PRF\_FRAME\_INFO during stack walks. However, that day has not yet arrived. So if you want a COR\_PRF\_FRAME\_INFO, you’ll need to grab it—and use it from—your slow-path Enter/Leave/Tailcall probe.
+
+With a valid COR\_PRF\_FRAME\_INFO, GetFunctionInfo2 will give you helpful, specific ClassIDs in the typeArgs [out] array and pClassId [out] parameter. If the profiler passes NULL for COR\_PRF\_FRAME\_INFO, here’s what you can expect:
+
+- If you’re using CLR V2, pClassId will point to NULL if the function sits on _any_ generic class (shared or not). In CLR V4 this got a little better, and you’ll generally only see pClassId point to NULL if the function sits on a “shared” generic class (instantiated with reference types).
+ - Note: If it’s impossible for the profiler to have a COR\_PRF\_FRAME\_INFO handy to pass to GetFunctionInfo2, and that results in a NULL \*pClassID, the profiler can always use the metadata interfaces to find the mdTypeDef token of the class on which the function resides for the purposes of pretty-printing the class name to the user. Of course, the profiler will not know the specific instantiating type arguments that were used on the class in that case.
+- the typeArgs [out] array will contain the ClassID for **System.\_\_Canon** , rather than the actual instantiating type(s), if the function itself is generic and is instantiated with reference type argument(s).
+
+It’s worth noting here that there is a bug in GetFunctionInfo2, in that the [out] pClassId you get for the class containing the function can be wrong with generic virtual functions. Take a look at [this forum post](http://social.msdn.microsoft.com/Forums/en-US/netfxtoolsdev/thread/ed6f972f-712a-48df-8cce-74f8951503fa/) for more information and a workaround.
+
+##
+
+## ClassIDs & FunctionIDs vs. Metadata Tokens
+
+Although you can infer this from the above, let’s take a breather and review. When you have multiple generic instantiations of a generic type, that type is defined with one mdTypeDef (metadata token), but you’ll see multiple ClassIDs (one per instantiation). When you have multiple generic instantiations of a generic method, it’s defined with one mdMethodDef (metadata token), but you’ll see multiple FunctionIDs (one per instantiation).
+
+For example, if we have code that uses MyClass\<int\>.Foo\<float\> and MyClass\<int\>.Foo\<long\>, you will see two JITCompilationStarted/JITCompilationFinished pairs, with two different FunctionIDs (one for each instantiation). But when you look up the metadata token for those two FunctionIDs via GetFunctionInfo2, you’ll get the same mdMethodDef.
+
+CLR’s generics sharing optimization complicates this somewhat. You’ll really only see separate JIT notifications and separate FunctionIDs for different _unshared_ instantiations, and not necessarily for every different instantiation. So if instead we have code that uses MyClass\<object\>.Foo\<string\> and MyClass\<SomeClassICreated\>.Foo\<AnotherClassICreated\>, you may only see one JITCompilationStarted/JITCompilationFinished pair, with only one FunctionID (representing the instantiation using System.\_\_Canon for the type arguments). I say “may”, because generics sharing is an internal CLR optimization that can change at any time without affecting the correctness of managed code. So your profiler cannot rely on a particular scheme the CLR may use to share generic code. But it would be wise to be aware that sharing _can_ happen, so your profiler can deal with it appropriately.
+
+So that covers JIT notifications—what about ClassLoad\* notifications in the same example? Although the CLR shares _JITted code_ across reference-type instantiations, the CLR still maintains separate loaded _types_ for each generic instantiation of a generic class. So in the example from the paragraph above you will see separate ClassLoad\* notifications with different ClassIDs for MyClass\<object\> and MyClass\<SomeClassICreated\>. In fact, you will also see a separate ClassLoad\* notification (with yet another ClassID) for MyClass\<System.\_\_Canon\>.
+
+If you got curious, and ran such a profiler under the debugger, you could use the SOS !dumpmt command with those different ClassIDs to see what you get. By doing so, you’ll notice something interesting. !dumpmt shows many values, including “Name”, which will correctly be the specific, fully-instantiated name of the type (different for all three ClassIDs). !dumpmt also shows a thing called “EEClass”. And you’ll notice this “EEClass” value is actually the _same_ for all 3 types. (Remember from this [post](http://blogs.msdn.com/davbr/archive/2007/12/18/debugging-your-profiler-ii-sos-and-ids.aspx) that EEClass is NOT the same thing as ClassID!) That gives you a little window into some additional data sharing optimizations the CLR uses. Stuff that remains the same across different generic instantiations of a class can be stored in a single place (the EEClass) and that single place can be referenced by the different generic instantiations of the class. Note that if you also use a value type as the type argument when instantiating MyClass\<T\> (e.g., MyClass\<int\>), and then run !dumpmt on that ClassID, you’ll see an entirely different EEClass value in the output, as the CLR will not be sharing that subset of type data across generic instantiations that use type arguments that are value types.
+
+## Instrumenting Generic Functions
+
+If your profiler performs IL rewriting, it’s important to understand that it must NOT do instantiation-specific IL rewriting. Huh? Let’s take an example. Suppose you’re profiling code that uses MyClass\<int\>.Foo\<float\> and MyClass\<int\>.Foo\<long\>. Your profiler will see two JITCompilationStarted callbacks, and will have two opportunities to rewrite the IL. Your profiler may call GetFunctionInfo2 on those two FunctionIDs and determine that they’re two different instantiations of the same generic function. You may then be tempted to make use of the fact that one is instantiated with float, and the other with long, and provide different IL for the two different JIT compilations. The problem with this is that the IL stored in metadata, as well as the IL provided to SetILFunctionBody, is always specified relative to the mdMethodDef. (Remember, SetILFunctionBody doesn’t take a FunctionID as input; it takes an mdMethodDef.) And it’s the profiler’s responsibility always to specify the same rewritten IL for any given mdMethodDef no matter how many times it’s JITted. And a given mdMethodDef can be JITted multiple times due to a number of reasons:
+
+- Two threads simultaneously trying to call the same function for the first time (and thus both trying to JIT that function)
+- Strange dependency chains involving class constructors (more on this in the MSDN [reference topic](http://msdn.microsoft.com/en-us/library/ms230586.aspx))
+- Multiple AppDomains using the same (non-domain-neutral) function
+- And of course multiple generic instantiations!
+
+Regardless of the reason, the profiler must always rewrite with exactly the same IL. Otherwise, an invariant in the CLR will have been broken by the profiler, and you will get strange, undefined behavior as a result. And no one wants that.
+
+
+
+That’s it! Hopefully this gives you a good idea of how the CLR Profiling API will behave in the face of generic classes and functions, and what is expected of your profiler.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md b/Documentation/Profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md
new file mode 100644
index 0000000000..1b1e2f74d7
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md
@@ -0,0 +1,114 @@
+*This blog post originally appeared on David Broman's blog on 10/17/2011*
+
+
+# Overview
+
+In this post, I write about the two primary kinds of IDs your profiler deals with, when each kind is appropriate to use, how to convert between those two types of IDs, and some gotchas with those conversions—particularly in how they may invoke the type loader.
+
+# The two kinds of IDs
+
+Profilers have to deal with two kinds of IDs. The first kind are IDs from metadata, a.k.a., **metadata tokens**. These are the mdToken values, like mdMethodDef or mdTypeDef, which are read straight out of the metadata of managed modules. These values do not change for a given module from process to process. They are placed in the module by the language compiler that generates the IL (e.g., csc.exe). Profilers typically use metadata tokens in order to look up symbolic information from the metadata (e.g., for pretty-printing names of methods or classes), and for performing IL rewriting. Metadata tokens are also fantastic for deferring symbolic lookup to a post-processing phase. For example, a sampling profiler could log metadata tokens for classes and functions encountered on a sample at run-time and defer looking up the names of those classes and functions to a post-processing phase that occurs after the profiled process has exited. This keeps the profiler’s data collection lightweight, and is only possible because metadata tokens don’t change so long as the managed modules defining those tokens don’t change.
+
+The second kind of IDs are **run-time IDs** , such as FunctionID or ClassID which are defined in corprof.idl. These values do change from process to process, and they represent internal data structures that the CLR builds up at run-time as it loads modules, loads types, JIT compiles functions, etc. Profilers use these values as its main currency between ICorProfilerInfo\* and ICorProfilerCallback\* methods. The CLR uses these values when it notifies profilers of various events (ICorProfilerCallback\* methods), and the profiler passes these values back into the CLR (ICorProfilerInfo\* methods) in order to get further information about them. These IDs are handy because they are your profiler’s key to unlocking class layout, generated code, object addresses, and everything else that the CLR maintains about the actively executing managed code at run-time. See [this post](Debugging - SOS and IDs.md) for more info about what these IDs really are.
+
+# Converting between metadata tokens and run-time IDs
+
+Since metadata tokens are good for some things and run-time IDs are good for others, you will inevitably find yourself in situations where you have one kind of ID handy, but you really need the other kind of ID. Can you convert from one kind of ID to another? Yes, but there are some caveats!
+
+It’s always safe to go this direction: run-time ID –\> metadata token. Just use methods such as GetFunctionInfo2 and GetClassIDInfo2, which take run-time IDs as input, and provide their module + metadata token as (part of) the output.
+
+However, it is problematic going the opposite direction: metadata token –\> run-time ID. Why? Because a given type may not be loaded yet, and thus the run-time ID may not exist. There exist methods on the ICorProfilerInfo\* interfaces that go this direction, namely GetFunctionFromToken(AndTypeArgs) and GetClassFromToken(AndTypeArgs). However, they are dangerous to use (see below), and should be avoided. Instead, it’s preferable that your profiler build up its own run-time ID –\> metadata token map as it encounters run-time IDs, and then performing reverse lookups in that map as necessary. For example, as your profiler encounters ClassIDs via callbacks like ClassLoadFinished, it goes the “safe” direction (run-time ID –\> metadata token), to build up its map. When it later encounters an mdTypeDef for a class, it checks to see if that mdTypeDef exists yet in its map—if so, your profiler uses that map to find the corresponding ClassID. Safe and easy.
+
+“Dave, stop telling us to do impossible things. You know full well that profilers which attach to a process after it has started up don’t have the benefit of seeing all the ClassLoad\* notifications. Also, if regular NGEN’d images are used, ClassLoad\* notifications are not reliably sent.”
+
+True. Though you will come across ClassIDs other ways. Memory profilers will encounter ObjectIDs on the heap, and can call GetClassFromObject to start filling up its map of ClassIDs and thus mdTypeDefs. Similarly, sampling profilers encounter FunctionIDs during stack walks, and can then get the ClassIDs containing those FunctionIDs and thus build up its map that way.
+
+“You’re a dreamer, man. There will still be cases where I have a metadata token, but have not yet encountered the ClassID. Think about deep inspection of embedded structs!”
+
+Yes, that is a good example. You are an astute reader. Memory profilers that wish to deeply inspect values of classes and structures on the heap need to know the ClassIDs in order to call GetClassLayout. This works great when you’re dealing with reference types whose fields point to other reference types: as you bounce from object to object, you can take the ObjectID (i.e., the location in memory where the object starts), pass it to GetClassFromObject, and there’s your ClassID. But what happens when a struct is embedded inside an object? Sure, you can get the layout of the object, and determine the offset into the object where the embedded struct lives. But then what? How to inspect and report on the values of fields _inside the embedded struct_? At this point, all you can get is the mdTypeDef for the struct (from the metadata of the containing class), but you may never have seen the ClassID for that struct.
+
+“Told you so.”
+
+# Going from metadata token to run-time ID
+
+#
+
+#
+
+As I mentioned above, the safest way to do this is to build up your own map and do reverse-lookups as necessary. If that scheme meets your needs, then by all means do that, and stop reading! But in the cases where this is insufficient, you may need to resort to using GetFunctionFromToken(AndTypeArgs) and GetClassFromToken(AndTypeArgs). There is no simple, foolproof way to use these APIs safely, but here is your guideline:
+
+**Never call GetFunctionFromToken(AndTypeArgs) and GetClassFromToken(AndTypeArgs) unless you’re certain the relevant types have been loaded.** (“Relevant types” include the ClassID containing the FunctionID whose mdMethodDef you pass to GetFunctionFromToken(AndTypeArgs), and the ClassID whose mdTypeDef you pass to GetClassFromToken(AndTypeArgs).) If these types have not been loaded, _you may cause them to be loaded now_! This is bad because:
+
+- This is an easy way to crash the app. Trying to load a type at the wrong time could cause cycles, causing infinite loops (depending on what your profiler does in response to class load notifications) or outright crashes. For example, trying to load a type while its containing assembly is still in an early phase of loading is a great and fun way to crash the CLR.
+- You will impact the behavior of the app. If you’re lucky enough not to crash the app, you’ve still impacted its behavior, by causing types to get loaded in a different order than they normally would. Any impact to app behavior like this makes it difficult for your users to reproduce problems that they are trying to use your tool to diagnose, or may hide problems that they don’t discover until they run their application outside of your tool.
+
+## Determining whether a class was loaded
+
+So how do you know a class has been fully loaded?
+
+Unfortunately, receiving the **ClassLoadFinished** callback does not necessarily mean that ClassID has been fully loaded yet, as the MSDN [documentation](http://msdn.microsoft.com/en-us/library/ms230794.aspx) warns us.
+
+Basically, the CLR type loader is one of the laziest things on this planet. It doesn’t want to do anything unless it really, really has to. The best guideline I can give you is this: If the app is currently executing managed code that uses a type, then the type is loaded. For example, if you do a stackwalk, and determine that the app is executing inside of
+
+MyRetType MyClass::MyFunction(MyArgumentType myArgumentType)
+
+then you can be reasonably assured that the following are loaded:
+
+- MyClass
+- MyArgumentType (if it’s a value-type)
+- MyRetType (if it’s a value-type)
+- For any class you know is loaded, so should be:
+ - its base class
+ - its value-type fields (not necessarily reference-type fields!)
+ - implemented interfaces
+ - value-type generic type arguments (and even reference-type generic type arguments in the case of MyClass)
+
+So much for stacks. What if you encounter an instance of a class on the heap? Surely the class is loaded then, right? Well, probably. If you encounter an object on heap just after GC (inside **GarbageCollectionFinished** , before you return), it should be safe to inspect the class’s layout, and then peek through ObjectIDs to see the values of their fields.
+
+But what if you encounter an object earlier than that? For example, if you receive an **ObjectAllocated** callback, and call **GetClassFromObject** on the allocated ObjectID, can you be certain the ClassID has been fully loaded? Well, usually. But I have seen cases in the past, with types stored in NGENd images, where the CLR may issue an ObjectAllocated callback _just before_ the type has been fully loaded from the NGENd image. I’ve recently tried to get this to happen again but couldn’t, which probably means this is rather unlikely, but not necessarily impossible. Ugh.
+
+In general, a lot of the uncertainty above comes from types stored in NGENd modules. If we actually JIT-compile a function at run-time and load the types it uses from non-NGENd modules, then you can have much greater certainty about the above types being loaded. You can even make further assumptions about locals and types from signatures of direct callees being loaded.
+
+## Interlude: Remember the Unloads!
+
+Now is a good time remind you that, not only is it dangerous to inspect run-time IDs too early (i.e., before they load); it’s also dangerous to inspect run-time IDs too late (i.e., after they **unload** ). For example, if you store ClassIDs and FunctionIDs for later use, and use them “too late”, you can easily crash the CLR. The profiling API does pretty much no validation of anything (in many cases, it’s incapable of doing so without using up significant amounts of memory to maintain lookup tables for everything). So we generally take any run-time ID that you pass to ICorProfilerInfo\* methods, cast it to an internal CLR structure ptr, and go boom if the ID is bad.
+
+There is no way to just ask the CLR if a FunctionID or ClassID is valid. Indeed, classes could get unloaded, and new classes loaded, and your ClassID may now refer to a totally different (valid) class.
+
+You need to keep track of the unloads yourself. You are notified when run-time IDs go out of scope (today, this happens at the level of an AppDomain unloading or a collectible assembly unloading—in both cases all IDs “contained” in the unloading thing are now invalid). Once a run-time ID is out of scope, you are not allowed to pass that run-time ID back to the CLR. In fact, you should consider whether thread synchronization will be necessary in your profiler to maintain this invariant. For example, if a run-time ID gets unloaded on thread A, you’re still not allowed to pass that run-time ID back to the CLR on thread B. So you may need to block on a critical section in thread A during the \*UnloadStarted / AppDomainShutdown\* callbacks, to prevent them from returning to the CLR until any uses of the contained IDs in thread B are finished.
+
+Take a look at the [docs](http://msdn.microsoft.com/en-us/library/bb384619.aspx) is for more info.
+
+# TypeRefs
+
+So far I’ve been talking about how to go from a typeDef to its run-time ID, and by now that should seem hard enough that we don’t need to throw a monkey wrench into the works. But the sad fact is we’re rarely lucky enough even to have a typeDef. A class’s fields or even base type, might have their types defined in _other modules_, in which case the metadata tells us the fields or base type might actually be typeRefs, and not typeDefs. Ugh. Whaddya do with that?!
+
+I’ll tell you what you _don’t_ do. You don’t call the enticingly-named IMetaDataImport::ResolveTypeRef. On the surface, it seems like ResolveTypeRef would do exactly what you want: starting from a typeRef, please find the referenced module and return an IMetaDataImport on that module, along with the typeDef in that target module to which the typeRef refers. But the problem lies with how ResolveTypeRef determines the module to which a typeRef refers.
+
+I think ResolveTypeRef was originally designed for use at build-time (by language compilers), though I don’t know if it’s even used in that scenario anymore. It is certainly not good for use at run-time, where the loader’s decision on how to locate a referenced assembly can be arbitrarily complex. Different AppDomains in the same process may have different rules on how to locate the referenced assembly due to varying permission sets, host settings, or assembly versions. In the limit, the CLR may even _call into the user’s managed code_ to dynamically influence the decision of where the referenced assembly exists (see [AppDomain.AssemblyResolve Event](http://msdn.microsoft.com/en-us/library/system.appdomain.assemblyresolve.aspx)).
+
+ResolveTypeRef doesn’t know about any of this—it was never designed to be used in a running application with all these environmental factors. It has an extremely simple (and inaccurate) algorithm to iterate through a set of “known modules”, in an arbitrary order, looking for the first one that matches the reference. What does “known modules” mean? It’s a set of modules that have been opened into the metadata system, which is NOT the same as the list of modules already loaded by the assembly loader (and thus notified to your profiler). And it’s certainly not the same as the set of modules installed onto the disk.
+
+If you absolutely need to resolve refs to defs, your best bet may be to use your own algorithm which will be as accurate as you can make it, under the circumstances, and which will never try to locate a module that hasn’t been loaded yet. That means that you shouldn’t try to resolve a ref to a def if that def hasn’t actually been loaded into a type by the CLR. Consider using an algorithm similar to the following:
+
+1. Get the AssemblyRef from the TypeRef to get to the name, public key token and version of the assembly where the type should reside.
+2. Enumerate all loaded modules that the Profiling API has notified you of (or via [EnumModules](http://msdn.microsoft.com/en-us/library/dd490890)) (you can filter out a specific AppDomain at this point if you want).
+3. In each enumerated module, search for a TypeDef with the same name and namespace as the TypeRef (IMetaDataImport::FindTypeDefByName)
+4. Pay attention to **type forwarding**! Once you find the TypeDef, it may actually be an “exported” type, in which case you will need to follow the trail to the next module. Read toward the bottom of [this post](Type Forwarding.md) for more info.
+
+The above can be a little bit smarter by paying attention to what order you choose to search through the modules:
+
+- First search for the TypeDef in assemblies which exactly match the name, public key token and version for the AssemblyRef.
+- If that fails, then search through assemblies matching name and public key token (where the version is higher than the one supplied – this can happen for Framework assemblies).
+- If that fails, then search through all the other assemblies
+
+I must warn you that the above scheme is **not tested and not supported. Use at your own risk!**
+
+# Future
+
+Although I cannot comment on what will or will not be in any particular future version of the CLR, I can tell you that it is clear to us on the CLR team that we have work to do, to make dealing with metadata tokens and their corresponding run-time type information easier from the profiling API. After all, it doesn’t take a rocket scientist to read the above and conclude that it does take a rocket scientist to actually follow all this advice. So for now, enjoy the fact that what you do is really hard, making you difficult to replace, and thus your job all the more secure. You’re welcome.
+
+
+
+Special thanks to David Wrighton and Karel Zikmund, who have helped considerably with all content in this entry around the type system and metadata.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Profiler Detach.md b/Documentation/Profiling/davbr-blog-archive/Profiler Detach.md
new file mode 100644
index 0000000000..987ee24507
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Profiler Detach.md
@@ -0,0 +1,75 @@
+*This blog post originally appeared on David Broman's blog on 2/3/2010*
+
+
+I described how profilers may attach to already-running processes in other posts ([#1](Attach.md) and [#2](Attach2.md)). In this post I’m writing about how profilers that are already loaded may detach from a running process before that process exits. Like Profiler Attach, this is a new feature available starting with CLR V4.
+
+The Detach feature allows a profiler that the user is finished with to be unloaded. That means the application may return to its usual behavior and performance characteristics, without a profiler loaded and doing stuff. Also, since only one profiler may be loaded at a time, detaching a profiler makes room for a different (or the same) profiler to be loaded later on when the user wishes to do more diagnostics.
+
+## Limitations
+
+Not every V4 profiler is allowed to detach from a running process. The general rule is that a profiler which has caused an irreversible impact in the process it’s profiling should _not_ attempt to detach. The CLR catches the following cases:
+
+- Profiler set immutable flags (COR\_PRF\_MONITOR\_IMMUTABLE) via SetEventMask.
+- Profiler performed IL rewriting via SetILFunctionBody
+- Profiler used the Enter/Leave/Tailcall methods to add callouts to its probes
+
+If the profiler attempts to detach after doing any of the above, the CLR will disallow the attempt (see below for details).
+
+That said, there are still other irreversible things the profiler might do to a process (which would also make detaching a bad idea). Imagine a profiler that allocates memory without cleaning up after itself, creates threads without waiting for them to exit, uses metadata APIs to modify aspects of the running managed code, etc. Profiler writers need to use good judgment when considering whether to allow their profilers to detach from running processes. You don’t want to give your customers the experience of noticing the app they profile always behaves weirdly after detaching your profiler. So do not use the detach feature unless you’ve thought through the ramifications and can ensure the profiler does not leave the application in a noticeably different state.
+
+By the way, you may notice I said nothing about a profiler needing to load via attach in order for it to be able to use the detach feature. In fact, any profiler that loads on startup of the application (i.e., via environment variables and not via the AttachProfiler API) is perfectly welcome to use the detach feature—so long as it does not leave an impact on the process as per above.
+
+## How Detaching Works
+
+There’s one, deceptively simple-looking method the profiler calls to detach itself from the running process. However, detaching is a big responsibility, and profiler writers need to give thoughtful consideration to doing it properly. The CLR does its part to ensure it doesn’t accidentally call into the profiler via Profiling API methods after the CLR unloads the profiler DLL. However, if the profiler has set into motion extra threads, Windows callbacks, timer interrupts, etc., then the profiler must “undo” all of these things before it attempts to detach from the running process. Basically, any way for control to re-enter the profiler DLL must be disabled before detaching, or else your users will experience crashes after trying to detach your profiler.
+
+So, the sequence works like this:
+
+1. The profiler **deactivates all the ways control could enter the profiler** (aside from the CLR Profiling API itself). This means removing any Windows callbacks, timer interrupts, hijacking, disabling any other components that may try to call into the profiler DLL, etc. The profiler must also wait for all threads that it has created (e.g., a sampling thread, inter-process communication threads, a ForceGC thread, etc.) to exit, except for the one thread the profiler will use to call RequestProfilerDetach(). Any threads created by the CLR, of course, should not be tampered with.
+ - Your profiler must block here until all those ways control can enter your profiler DLL have truly been deactivated (e.g., just setting a flag to disable sampling may not be enough if your sampling thread is currently performing a sample already in progress). You must coordinate with all components of your profiler so that your profiler DLL knows that everything is verifiably deactivated, and all profiler-created threads have exited (except for the one thread the profiler will use to call RequestProfilerDetach()).
+2. If the profiler will use a thread of its own creation to call RequestProfilerDetach() (which is the typical way this API will be called), that thread must own a reference onto the profiler’s DLL, via its own **LoadLibrary()** call that it makes on the profiler DLL. This can either be done when the thread starts up, or now, or sometime in between. But that reference must be added at some point before calling RequestProfilerDetach().
+3. Profiler calls ICorProfilerInfo3:: **RequestProfilerDetach** ().
+ - (A) This causes the CLR to (synchronously) set internal state to avoid making any further calls into the profiler via the ICorProfilerCallback\* interfaces, and to refuse any calls from the profiler into ICorProfilerInfo\* interfaces (such calls will now fail early with CORPROF\_E\_PROFILER\_DETACHING).
+ - (B) The CLR also (asynchronously) begins a period safety check on another thread to determine when all pre-existing calls into the profiler via the ICorProfilerCallback\* interfaces have returned.
+ - Note: It is expected that your profiler will not make any more “unsolicited” calls back into the CLR via any interfaces (ICorProfilerInfo\*, hosting, metahost, metadata, etc.). By “unsolicited”, I’m referring to calls that didn’t originate from the CLR via ICorProfilerCallback\*. In other words, it’s ok for the profiler to continue to do its usual stuff in its implementation of ICorProfilerCallback methods (which may include calling into the CLR via ICorProfilerInfo\*), as the CLR will wait for those outer ICorProfilerCallback methods to return as per 3B. But the profiler must not make any other calls into the CLR (i.e., that are not sandwiched inside an ICorProfilerCallback call). You should already have deactivated any component of your profiler that would make such unsolicited calls in step 1.
+4. Assuming the above RequestProfilerDetach call was made on a profiler-created thread, that thread must now call [**FreeLibraryAndExitThread**](http://msdn.microsoft.com/en-us/library/ms683153(VS.85).aspx)**()**. (Note: that’s a specialized Windows API that combines FreeLibrary() and ExitThread() in such a way that races can be avoided—do not call FreeLibrary() and ExitThread() separately.)
+5. On another thread, the CLR continues its **period safety checks** from 3B above. Eventually the CLR determines that there are no more ICorProfilerCallback\* interface calls currently executing, and it is therefore safe to unload the profiler.
+6. The CLR calls ICorProfilerCallback3:: **ProfilerDetachSucceeded**. The profiler can use this signal to know that it’s about to be unloaded. It’s expected that the profiler will do very little in this callback—probably just notifying the user that the profiler is about to be unloaded. Any cleanup the profiler needs to do should already have been done during step 1.
+7. CLR makes the necessary number of **Release** () calls on ICorProfilerCallback3. The reference count should go down to 0 at this point, and the profiler may deallocate any memory it had previously allocated to support its callback implementation.
+8. CLR calls **FreeLibrary** () on the profiler DLL. This should be the last reference to the profiler’s DLL, and your DLL will now be unloaded.
+ - Note: in some cases, it’s theoretically possible that step 4 doesn’t happen until _after_ this step, in which case the last reference to the profiler’s DLL will actually be released by your profiler’s thread that called RequestProfilerDetach and then FreeLibraryAndExitThread. That’s because steps 1-4 happen on your profiler’s thread, and steps 5-8 happen on a dedicated CLR thread (for detaching profilers) sometime after step 3 is completed. So there’s a race between step 4 and all of steps 5-8. There’s no harm in this, so long as you’re playing nice by doing your own LoadLibrary and FreeLibraryAndExitThread as described above.
+9. The CLR adds an Informational entry to the Application Event Log noting that the profiler has been unloaded. The CLR is now ready to service any profiler attach requests.
+
+## RequestProfilerDetach
+
+Let’s dive a little deeper into the method you call to detach your profiler:
+
+`HRESULT RequestProfilerDetach([in] DWORD dwExpectedCompletionMilliseconds);`
+
+
+
+First off, you’ll notice this is on ICorProfilerInfo3, the interface your profiler DLL uses, in the same process as your profilee. Although the AttachProfiler API is called from outside the process, this detach method is called from in-process. Why? Well, the general rule with profilers is that _everything_ is done in-process. Attach is an exception because your profiler isn’t in the process yet. You need to somehow trigger your profiler to load, and you can’t do that from a process in which you have no code executing yet! So Attach is sort of a boot-strapping API that has to be called from a process of your own making.
+
+Once your profiler DLL is up and running, it is in charge of everything, from within the same process as the profilee. And detach is no exception. Now with that said, it’s probably typical that your profiler will detach in response to an end user action—probably via some GUI that you ship that runs in its own process. So a case could be made that the CLR team could have made your life easier by providing an out-of-process way to do a detach, so that your GUI could easily trigger a detach, just as it triggered the attach. However, you could make that same argument about all the ways you might want to control a profiler via a GUI, such as these commands:
+
+- Do a GC now and show me the heap
+- Dial up or down the sampling frequency
+- Change which instrumented methods should log their invocations
+- Start / stop monitoring exceptions
+- etc.
+
+The point is, if you have a GUI to control your profiler, then you probably already have an inter-process mechanism for the GUI to communicate with your profiler DLL. So think of “detach” as yet one more command your GUI will send to your profiler DLL.
+
+Ok, fine, so your profiler DLL is the one to call RequestProfilerDetach. What should it specify for “dwExpectedCompletionMilliseconds”? The purpose of this parameter is for the profiler to give a guess as to how long the CLR should expect to wait until all control has exited the profiler, thus ensuring success of the CLR’s periodic safety checks (step 5). So consider all of your callback implementations and what they do. Pick the “longest” one—the one that does the most processing or blocking or complex calls back into the CLR via ICorProfilerInfo or other interfaces. Roughly how long will that callback implementation take? That’s the value (in milliseconds) that you specify for this parameter.
+
+The CLR uses that value in its Sleep() statement that sits between each periodic safety check done as part of step 5. Although the CLR reserves the right to change the details of this algorithm, currently during step 5 the CLR sleeps dwExpectedCompletionMilliseconds before checking whether all callback methods have popped off all stacks. If they haven’t, the CLR will sleep an additional dwExpectedCompletionMilliseconds (for a total sleep time of 2\*dwExpectedCompletionMilliseconds) and try again. If callback methods are still on any stacks, then the CLR degrades to a steady-state of sleeping for 10 minutes and retrying, repeating until the profiler may be unloaded.
+
+Until the profiler can be unloaded, it will be considered “loaded” (though deactivated in the sense that no new callback methods will be called). This prevents any new profiler from attaching.
+
+
+
+Ok, that wraps up how detaching works. If you remember only one thing from this post, remember that it’s really easy to cause an application you profile to AV after your profiler unloads if you’re not careful. While the CLR tracks outgoing ICorProfilerCallback\* calls, it does not track any other way that control can enter your profiler DLL. _Before_ your profiler calls RequestProfilerDetach:
+
+- You must take care to deactivate all other ways control can enter your profiler DLL
+- Your profiler must block until all those other ways control can enter your profiler DLL have verifiably been deactivated
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md b/Documentation/Profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md
new file mode 100644
index 0000000000..682ad7bf58
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md
@@ -0,0 +1,341 @@
+*This blog post originally appeared on David Broman's blog on 10/6/2005*
+
+
+**Introduction**
+
+This article is targeted toward profiler authors, and anyone interested in building a profiler to examine managed applications. I will describe how you can program your profiler to walk managed stacks in version 2.0 of the Common Language Runtime. I’ll try to keep the mood light and zany, as one might expect given the overtly wacky subject matter.
+
+The profiling API in version 2.0 of the CLR has a new method that lets your profiler walk the call stack of the application you’re profiling: DoStackSnapshot. Version 1.x of the CLR exposed similar functionality via the in-process debugging interface. But it’s easier, more accurate, and more stable with DoStackSnapshot. DoStackSnapshot uses the same stack walker used by the garbage collector, security system, exception system, etc. So you _know_ it’s got to be right.
+
+Access to a full stack trace gives customers of your profiler the ability to get the big picture of what’s going on in an application when something “interesting” happens. Depending on the nature of the application and what a user wants to profile, you can imagine a user wanting a call stack when an object is allocated, a class is loaded, an exception is thrown, etc. Even getting a call stack not in response to an application event, but say a timer event, would be interesting for a sampling profiler. Looking at hot spots in code becomes more enlightening when you can see who called the function that called the function that called the function containing the hot spot.
+
+I’m going to focus on getting stack traces via the DoStackSnapshot API. But it is worth noting that another way to get stack traces is by building shadow stacks: you can hook FunctionEnter/Leave to keep your own copy of the current thread’s managed call stack. Shadow stack building is useful if you need stack information at all times during the app’s execution, and don’t mind paying the performance cost of having your profiler’s code run on every managed call and return. DoStackSnapshot is best if you need slightly sparser reporting of stacks, such as in response to interesting events. Even a sampling profiler taking stack snapshots every few milliseconds is much sparser than building shadow stacks. So DoStackSnapshot is well-suited for sampling profilers.
+
+**Stack walk on the wild side**
+
+It’s nice to be able to get call stacks whenever you want them. But with power comes responsibility. A user of a profiler will not want stack walking to be used for evil purposes like causing an AV or deadlock in the runtime. As a profiler writer, you will have to choose how to wield your power. If you choose the side of good, that will be but your first step. I will talk about how to use DoStackSnapshot, and how to do so carefully. It turns out that the more you want to do with this method, the harder it is to get it right. Unless you actually want to be evil. In which case I’m not talking to you.
+
+So let’s take a look at the beast. Here’s what your profiler calls (you can find this in ICorProfilerInfo2, in corprof.idl):
+```
+HRESULT DoStackSnapshot(
+ [in] ThreadID thread,
+ [in] StackSnapshotCallback *callback,
+ [in] ULONG32 infoFlags,
+ [in] void *clientData,
+ [in, size_is(contextSize), length_is(contextSize)] BYTE context[],
+ [in] ULONG32 contextSize);
+```
+And here’s what the CLR calls on your profiler (you can also find this in corprof.idl). You’ll pass a pointer to your implementation of this function in the callback parameter above.
+```
+typedef HRESULT \_\_stdcall StackSnapshotCallback(
+ FunctionID funcId,
+ UINT_PTR ip,
+ COR_PRF_FRAME_INFO frameInfo,
+ ULONG32 contextSize,
+ BYTE context[],
+ void *clientData);
+```
+
+It’s like a sandwich. When your profiler wants to walk the stack, you call DoStackSnapshot. Before the CLR returns from that call, it calls your StackSnapshotCallback several times, once for each managed frame (or run of unmanaged frames) on the stack:
+```
+Profiler calls DoStackSnapshot. Whole wheat bread
+ CLR calls StackSnapshotCallback. Lettuce frame (“leaf”-most frame, ha)
+ CLR calls StackSnapshotCallback. Tomato frame
+ CLR calls StackSnapshotCallback. Bacon frame (root or “main” frame)
+CLR returns back to profiler from DoStackSnapshot Whole wheat bread
+```
+
+As you can see from my hilarious notations, we notify you of the frames in the reverse order from how they were pushed onto the stack—leaf (last-pushed) frame first, main (first-pushed) frame last. So what do all these parameters mean? I'm not ready to discuss them all yet, but I guess I’m in the mood to talk about a few of them. Let's start with DoStackSnapshot. infoFlags comes from the COR\_PRF\_SNAPSHOT\_INFO enum in corprof.idl, and it allows you to control whether we’ll give you register contexts for the frames we report. You can specify any value you like for clientData and we’ll faithfully give it back to you in your StackSnapshotCallback.
+
+In StackSnapshotCallback, we’ll give you the FunctionID of the currently “walked” frame in funcId. This value will be 0 if the current frame is a run of unmanaged frames, but I’m not ready to talk about that just yet. If it’s nonzero, feel free to pass funcId and / or frameInfo to other methods like GetFunctionInfo2 and GetCodeInfo2 to get more info about the function. You can get this function info now during your stack walk, or save the funcIds and get the function info later on to reduce your impact on the running app. If you do the latter, remember that a frameInfo is only valid inside the callback that gives it to you. While it’s ok to save the funcIds for later use, you may not save the frameInfo for later use. Or, if you like, just report the raw numbers to your users; users love seeing meaningless numbers.
+
+When you return from StackSnapshotCallback, you will typically return S\_OK and we will continue walking the stack. If you like, you can return S\_FALSE, and that will cause us to abort the stack walk. Your DoStackSnapshot call will then return with CORPROF\_E\_STACKSNAPSHOT\_ABORTED.
+
+**Synchronous / Asynchronous**
+
+There are two ways you can call DoStackSnapshot. A **synchronous** call is the easiest to get right. You make a synchronous call when, in response to the CLR calling one of your profiler’s ICorProfilerCallback(2) methods, you call DoStackSnapshot to walk the stack of the current thread. This is useful when you want to see what the stack looks like at an interesting notification like ObjectAllocated. So you just call DoStackSnapshot from within your ICorProfilerCallback(2) method, passing 0 / NULL for those parameters I haven’t told you about yet.
+
+When you want to get jiggy with it, you’re kicking it **asynchronous** style. An asynchronous stack walk occurs when you walk the stack of a different thread or, heaven forbid, forcefully interrupt a thread to perform a stack walk (on itself or another thread). The latter involves hijacking the instruction pointer of a thread to force it execute your own code at arbitrary times. This is insanely dangerous for too many reasons to list here. Just, please, don’t do it. I’ll restrict my description of asynchronous stack walks to non-hijacking uses of DoStackSnapshot to walk a separate target thread. I call this “asynchronous” because the target thread was doing any old arbitrary thing at the time we chose to walk its stack. This technique is commonly used by sampling profilers.
+
+**_Walking all over someone else_**
+
+So let’s break down the cross-thread stack walk a little. You got two threads: the **current** thread and the **target** thread. The current thread is the thread executing DoStackSnapshot. The target thread is the thread whose stack is being walked by DoStackSnapshot. You specify the target thread by passing its thread ID in the thread parameter to DoStackSnapshot. What happens next is not for the faint of heart. Remember, the target thread was executing any old arbitrary code when you came along and asked to walk its stack. So what do we do? We suspend the target thread, and it stays suspended the whole time we walk it. Unbelievable!
+
+Have we crossed over to evil? Or can we do this safely?
+
+I’m pleased you asked. This is indeed dangerous, and I’ll talk some later about how to do this safely. But first, I'm going to get into “mixed-mode stacks”.
+
+**Multiple Personality Disorder**
+
+A managed application is likely not going to spend all of its time in managed code. PInvokes and COM interop allow managed code to call into unmanaged code, and sometimes back again via delegates. Also, if you blink, you might miss managed code calling directly into the unmanaged runtime (CLR) to do JIT compilation, deal with exceptions, do a garbage collection, etc. So when you do a stack walk you will probably encounter a mixed-mode stack: some frames are managed functions, and others are unmanaged functions. What is one to do?
+
+**_Grow up, already!_**
+
+Before I continue from this exciting cliffhanger, a brief interlude. Everyone knows that stacks on our faithful modern PCs grow (i.e., “push”) to smaller addresses. But when we visualize these addresses in our minds or on whiteboards, we disagree with how to sort them vertically. Some of us imagine the stack growing _up_ (little addresses on top); some see it growing _down_ (little addresses on the bottom). We’re divided on this issue in our team as well. I choose to side with any debugger I’ve ever used: call stack traces and memory dumps tell me the little addresses are “above” the big addresses. So stacks grow up. Main is at the bottom, the leaf callee is at the top. If you disagree, you’ll have to do some mental rearranging to get through this article.
+
+**_Waiter, there are holes in my stack_**
+
+Now that we’re speaking the same language. Let’s look at a mixed-mode stack:
+
+|
+
+Unmanaged
+
+ |
+|
+
+D (Managed)
+
+ |
+|
+
+Unmanaged
+
+ |
+|
+
+C (Managed)
+
+ |
+|
+
+B (Managed)
+
+ |
+|
+
+Unmanaged
+
+ |
+|
+
+A (Managed)
+
+ |
+|
+
+Main (Managed)
+
+ |
+
+Stepping back a bit, it’s worthwhile to understand why DoStackSnapshot exists in the first place. It’s there to help you walk _managed_ frames on the stack. If you tried to walk managed frames yourself, you would get unreliable results, particularly on 32 bits, because of some wacky calling conventions used in managed code. The CLR understands these calling conventions, and DoStackSnapshot is therefore in a uniquely suitable position to help you decode them. However, DoStackSnapshot is not a complete solution if you want to be able to walk the entire stack, including unmanaged frames. Here’s where you have a choice:
+
+1. Do nothing and report stacks with “unmanaged holes” to your users, or
+2. Write your own unmanaged stack walker to fill in those holes.
+
+When DoStackSnapshot comes across a block of unmanaged frames, it calls your StackSnapshotCallback with funcId=0. (I think I mentioned this before, but I’m not sure you were listening.) If you’re going with option #1 above, simply do nothing in your callback when funcId=0. We’ll call you again for the next managed frame and you can wake up at that point.
+
+Note that if this unmanaged block actually consists of more than one unmanaged frame we still only call StackSnapshotCallback once. Remember, we’re making no effort to decode the unmanaged block—we have special cheat sheets that help us skip over the block to the next managed frame, and that’s how we progress. We don’t necessarily know what’s inside the unmanaged block. That’s for you to figure out.
+
+**_That first step’s a doozy_**
+
+Unfortunately, filling in the unmanaged holes isn’t the only hard part. Just beginning the walk is a challenge. Take a look at our stack above. No, really, don’t be lazy; just scroll up and take a look. There’s unmanaged gunk at the top. Sometimes you’ll be lucky, and that unmanaged gunk will be COM or PInvoke code. If so, the CLR is smart enough to know how to skip it and will happily begin your walk at the first managed frame (D). However, you might still want to walk the top-most unmanaged block to report as complete a stack as possible. And even if you don’t, you might be forced to anyway if you’re _not_ lucky, and that unmanaged gunk represents not COM or PInvoke code, but helper code in the CLR itself (e.g., to do jitting, a GC, etc.). If that’s the case we won’t be able to find the D frame without your help. So an unseeded call to DoStackSnapshot will result in an error (CORPROF\_E\_STACKSNAPSHOT\_UNMANAGED\_CTX or CORPROF\_E\_STACKSNAPSHOT\_UNSAFE). By the way, if you haven’t visited corerror.h, you really should. It’s beautiful this time of year.
+
+If you’re still alert, you might have noticed I used the word “unseeded” without defining it. Well, now’s the time. DoStackSnapshot takes a “seed context” via the context and contextSize parameters. Context is an overused term with many meanings. In this case, I'm talking about a register context. If you peruse the architecture-dependent windows headers (e.g., nti386.h) you’ll find a struct CONTEXT. It contains values for the CPU registers, and represents the CPU’s state at a particular moment in time. This is the type of context I'm talking about here.
+
+If you pass NULL for the context parameter, the stack walk we perform is “unseeded”, and we just start at the top. However, if you pass a non-NULL value for the context parameter, presumably representing the CPU-state at some spot lower down on the stack (preferably pointing to the D frame), then we perform a stack walk “seeded” with your context. We ignore the real top of the stack and just start wherever you point us.
+
+Ok, that was a lie. The context you pass us is more of a “hint” than an outright directive. If the CLR is certain it can find the first managed frame (because the top-most unmanaged block is PInvoke or COM code), it’ll just do that and ignore your seed. Don’t take it personally, though. The CLR is trying to help you by providing the most accurate stack walk it can. Your seed is only useful if the top-most unmanaged block is helper code in the CLR itself, for which we have no cheat sheet to help us skip it. Since that’s the only situation your seed is useful, that’s the only situation your seed is used.
+
+Now if you’re not only still alert but also astute, you will wonder how on earth you can even provide us the seed in the first place. If the target thread is still in motion, you can’t just go and walk this target thread’s stack to find the D frame (and thus calculate your seed context). And yet I’m sitting here telling you to calculate your seed context by doing your unmanaged walk _before_ calling DoStackSnapshot (and thus before DoStackSnapshot takes care of suspending the target thread for you). So what… does the target thread need to be suspended by you _and_ the CLR?! Well, yeah. I think it’s time to choreograph this ballet.
+
+But before you get too deep, note that the issue of whether and how to seed a stack walk applies only to _asynchronous_ walks. If you’re doing a synchronous walk, DoStackSnapshot will always be able to find its way to the top-most managed frame without your help. No seed necessary.
+
+**_All together now_**
+
+For the truly adventurous profiler that is doing an asynchronous, cross-thread, seeded stack walk while filling in the unmanaged holes, here’s what it would look like.
+
+|
+
+Block of
+Unmanaged
+Frames
+
+ |
+1. You suspend the target thread (target thread’s suspend count is now 1)
+2. You get the target thread’s current register context
+3. You determine if the register context points to unmanaged code (e.g., call ICorProfilerInfo2::GetFunctionFromIP(), and see if you get back a 0 FunctionID)
+4. In this case the register context does point to unmanaged code, so you perform an unmanaged stack walk until you find the top-most managed frame (D)
+ |
+|
+
+Function D
+(Managed)
+
+ |
+1. You call DoStackSnapshot with your seed context. CLR suspends target thread again: its suspend count is now 2. Our sandwich begins.
+
+1. CLR calls your StackSnapshotCallback with FunctionID for D.
+ |
+|
+
+Block of
+Unmanaged
+Frames
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID=0. You’ll need to walk this block yourself. You can stop when you hit the first managed frame, or you can cheat: delay your unmanaged walk until sometime after your next callback, as the next callback will tell you exactly where the next managed frame begins (and thus where your unmanaged walk should end).
+ |
+|
+
+Function C
+(Managed)
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID for C.
+ |
+|
+
+Function B
+(Managed)
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID for B.
+ |
+|
+
+Block of
+Unmanaged
+Frames
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID=0. Again, you’ll need to walk this block yourself.
+ |
+|
+
+Function A
+(Managed)
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID for A.
+ |
+|
+
+Main
+(Managed)
+
+ |
+1. CLR calls your StackSnapshotCallback with FunctionID for Main.
+2. DoStackSnapshot “resumes” target thread (its suspend count is now 1) and returns. Our sandwich is complete.
+
+1. You resume target thread (its suspend count is now 0, so it’s resumed for real).
+ |
+
+**Triumph over evil**
+
+Ok, this is way too much power without some serious caution. In the most advanced case, you’re responding to timer interrupts and suspending application threads arbitrarily to walk their stacks. Yikes!
+
+Being good is hard and involves rules that are not obvious at first. So let's dive in.
+
+**_The bad seed_**
+
+Let’s start with an easy one. If your profiler supplies a bogus (non-null) seed when you call DoStackSnapshot, we’ll give you bogus results. We’ll look at the stack where you point us, and make assumptions about what the values on the stack are supposed to represent. That will cause us to dereference what we expect are addresses on the stack. So if you screw up, we’ll dereference values off into la la land. We do everything we can to avoid an all-out 2nd-chance AV (which would tear down your customer’s process). But you really should make an effort to get your seed right and not take any chances.
+
+**_Woes of suspension_**
+
+The second you decide to do cross-thread walking, you’ve decided, at a minimum, to ask the CLR to start suspending threads on your behalf. And, even worse, if you want to walk the unmanaged block at the top of the stack, you’ve decided to start suspending threads all by yourself without invoking the CLR’s wisdom on whether this might be a good idea at the current time.
+
+We all remember the dining philosophers from CS training, right? Everyone picks up his right fork, and no one can grab his left fork because each dude is waiting on the dude to his left to put down the needed fork. And if they’re all seated in a circle, you’ve got a cycle of waiting and a lot of empty stomachs. The reason these idiots starve to death is, well, for one thing, they think they each need two forks to eat, which is pretty dumb. But that’s not the point. They break a simple rule of deadlock avoidance: if multiple locks are to be taken, always take them in the same order. Following this rule would avoid the cycle where A waits on B, B waits on C, C waits on A.
+
+So here’s where it gets interesting. Suppose an app follows the rule and always takes locks in the same order. But now imagine someone comes along and starts arbitrarily suspending threads (that would be your profiler doing the suspending, by the way). The complexity has leaped substantially. What if the suspender now needs to take a lock held by the suspendee? Or more insidious, maybe the suspender needs a lock held by a dude who’s waiting for a lock held by another dude who’s waiting for a lock held by the suspendee? Suspension adds a new edge to our thread-dependency graph, which can introduce cycles. Let’s take a look at some specific problems:
+
+Problem 1: _Suspendee owns locks needed by suspender, or needed by threads the suspender depends on._
+
+Problem 1a: _Those locks are CLR locks._
+
+As you might imagine, the CLR has a bit of thread synchronization to do here and there, and therefore has several locks that are used internally. When you call DoStackSnapshot, the CLR detects the condition that the target thread owns a CLR lock that will be needed by the current thread (in order to perform the stack walk). When the condition arises, the CLR refuses to do the suspension, and DoStackSnapshot immediately returns with an error (CORPROF\_E\_STACKSNAPSHOT\_UNSAFE). At this point, if you’ve suspended the thread yourself before your call to DoStackSnapshot, then you will resume the thread yourself, and the pickle remains empty of you.
+
+Problem 1b: _Those locks are your own profiler’s locks_
+
+This is more common-sense than anything, really. You may have your own thread synchronization to do here and there, so you can imagine an application thread (Thread A) hits a profiler callback, and runs some of your profiler code that involves taking one of your own locks. Then poof, another thread (Thread B) decides to walk A. This means B will suspend A. So you just need to remember that, while A is suspended, you really shouldn’t have B try to take any of your own locks that A might possibly own. For example, thread B will execute StackSnapshotCallback during the stack walk, so you shouldn’t be trying to take any locks during that callback that could be owned by the suspended target thread (A).
+
+Problem 2: _While you suspend the target thread, the target thread tries to suspend you_
+
+“Come on! Like that could really happen.” Believe it or not, if:
+
+- Your app runs on a multiproc box, and
+- Thread A runs on one proc and thread B runs on another, and
+- A tries to suspend B while B tries to suspend A
+
+then it’s possible that both suspensions win, and both threads end up suspended. It’s like the line from that movie: “Multiproc means never having to say, ‘I lose.’”. Since each thread is waiting for the other to wake it up, they stay suspended forever. It is the most romantic of all deadlocks.
+
+This really can happen, and it is more disconcerting than problem #1, because you can’t rely on the CLR to detect this for you when you do the suspension yourself before calling DoStackSnapshot. Once you’ve done the suspension, it’s too late!
+
+Ok, so, why is the target thread trying to suspend you anyway? Well, in a hypothetical, poorly-written profiler, you could imagine that the stack walking code (along with the suspension code) might be executed by any number of threads at arbitrary times. In other words, imagine A is trying to walk B at the same time B is trying to walk A. They both try to suspend each other simultaneously (because they’re both executing the SuspendThread() portion of the profiler’s stack walking routine), both win, and we deadlock. The rule here is obvious—don’t do that!
+
+A less obvious reason that the target thread might try to suspend your walking thread is due to the inner workings of the CLR. The CLR suspends application threads to help with things like garbage collection. So if your walker tries to walk (and thus suspend) the thread doing the GC at the same time the thread doing the GC tries to suspend your walker, you are hosed.
+
+The way out, fortunately, is quite simple. The CLR is only going to suspend threads it needs to suspend in order to do its work. Let’s label the two threads involved in your stack walk: Thread A = the current thread (the thread performing the walk), and Thread B = the target thread (the thread whose stack is walked). As long as Thread A has _never executed managed code_ (and is therefore of no use to the CLR during a garbage collection), then the CLR will never try to suspend Thread A. This means it’s safe for your profiler to have Thread A suspend Thread B, as the CLR will have no reason for B to suspend A.
+
+If you’re writing a sampling profiler, it’s quite natural to ensure all of this. You will typically have a separate thread of your own creation that responds to timer interrupts and walks the stacks of other threads. Call this your sampler thread. Since you create this sampler thread yourself and have control over what it executes, the CLR will have no reason to suspend it. And this also fixes the “poorly-written profiler” example above, since this sampler thread is the only thread of your profiler trying to walk or suspend other threads. So your profiler will never try to directly suspend the sampler thread.
+
+This is our first nontrivial rule so, for emphasis, let’s repeat with some neat formatting:
+
+Rule 1: Only a thread that has never run managed code can suspend another thread
+
+**_Nobody likes to walk a corpse_**
+
+If you are doing a cross-thread stack walk, you need to ensure your target thread remains alive for the duration of your walk. Just because you pass the target thread as a parameter to the DoStackSnapshot call doesn’t mean you’ve implicitly added some kind of lifetime reference to it. If the app wants the thread to go away it will. And if that happens while you’re trying to walk it, you could easily AV.
+
+Lucky for you, the CLR notifies profilers when a thread is about to be destroyed via the aptly-named ThreadDestroyed callback (ICorProfilerCallback(2)). So it’s your responsibility to implement ThreadDestroyed and have it wait until anyone walking that thread is finished. This is interesting enough to qualify as our next rule:
+
+Rule 2: Block in ThreadDestroyed callback until that thread’s stack walk is complete
+
+
+
+**_GC helps you make a cycle_**
+
+Ok, at this point you might want to take a bathroom break or get some caffeine or something. Things get a little hairy here. Let’s start with the text of the next rule, and decipher it from there:
+
+Rule 3: Do not hold a lock during a profiler call that can trigger a GC
+
+A while back I mentioned that it is clearly a bad idea for your profiler to hold one if its own locks if the owning thread might get suspended and then walked by another thread that will need the same lock. Rule 3 warns us against something more subtle. Here, I'm saying you shouldn’t hold _any_ of your own locks if the owning thread is about to call an ICorProfilerInfo(2) method that might trigger a garbage collection. A couple examples should help.
+
+Example #1:
+
+- Thread A successfully grabs and now owns one of your profiler locks
+- Thread B = thread doing the GC
+- Thread B calls profiler’s GarbageCollectionStarted callback
+- Thread B blocks on the same profiler lock
+- Thread A executes GetClassFromTokenAndTypeArgs()
+- GetClassFromTokenAndTypeArgs tries to trigger a GC, but notices a GC is already in progress.
+- Thread A blocks, waiting for GC currently in progress (Thread B) to complete
+- But B is waiting for A, because of your profiler lock.
+
+![](media/gccycle.jpg)
+
+Example #2:
+
+- Thread A successfully grabs and now owns one of your profiler locks
+- Thread B calls profiler’s ModuleLoadStarted callback
+- Thread B blocks on the same profiler lock
+- Thread A executes GetClassFromTokenAndTypeArgs()
+- GetClassFromTokenAndTypeArgs triggers a GC
+- Thread A (now doing the GC) waits for B to be ready to be collected
+- But B is waiting for A, because of your profiler lock.
+
+![](media/deadlock.jpg)
+
+Have you digested the madness? The crux of the problem is that garbage collection has its own synchronization mechanisms. Example 1 involved the fact that only one GC can occur at a time. This is admittedly a fringe case, as GCs don’t spontaneously occur quite so often that one has to wait for another, unless you’re operating under stressful conditions. Even so, if you profile long enough, this will happen, and you need to be prepared. Example 2 involved the fact that the thread doing the GC must wait for the other application threads to be ready to be collected. The problem arises when you introduce one of your own locks into the mix, thus forming a cycle. In both cases we broke the rule by allowing A to own one of your locks and then call GetClassFromTokenAndTypeArgs (though calling any method that might trigger a GC is sufficient to doom us).
+
+How’s that caffeine holding out? If it’s working, you probably have a couple questions.
+
+“How do I know which ICorProfilerInfo(2) methods might trigger a garbage collection?”
+
+We plan to document this on MSDN, or at the least, in my or [Jonathan Keljo’s blog](http://blogs.msdn.com/jkeljo/default.aspx).
+
+“What does this have to do with stack walking?”
+
+Yeah, if you read carefully, you’ll see that this rule never even mentions DoStackSnapshot. And no, DoStackSnapshot is not even one of those mysterious ICorProfilerInfo(2) methods that trigger a GC. The reason I'm discussing this rule here is that it’s precisely you daring cowboys—who asynchronously walk stacks at arbitrary samples—who will be most likely to implement your own profiler locks, and thus be prone to falling into this trap. Indeed, rule 2 above downright tells you to add some synchronization into your profiler. It is quite likely a sampling profiler will have other synchronization mechanisms as well, perhaps to coordinate reading / writing shared data structures at arbitrary times. Of course, it’s still quite possible for a profiler that never touches DoStackSnapshot to need to deal with this issue. So tell your friends.
+
+**Enough is enough**
+
+I’m just about tuckered out, so I’m gonna close this out with a quick summary of the highlights. Here's what's important to remember.
+
+1. Synchronous stack walks involve walking the current thread in response to a profiler callback. These don’t require seeding, suspending, or any special rules. Enjoy!
+2. Asynchronous walks require a seed if the top of the stack is unmanaged code not part of a PInvoke or COM call. You supply a seed by directly suspending the target thread and walking it yourself, until you find the top-most managed frame. If you don’t supply a seed in this case, DoStackSnapshot will just return a failure code to you.
+3. If you directly suspend threads, remember that only a thread that has never run managed code can suspend another thread
+4. When doing asynchronous walks, always block in your ThreadDestroyed callback until that thread’s stack walk is complete
+5. Do not hold a lock while your profiler calls into a CLR function that can trigger a GC
+
+Finally, a note of thanks to the rest of the CLR Profiling API team, as the writing of these rules is truly a team effort. And special thanks to Sean Selitrennikoff who provided an earlier incarnation of much of this content.
diff --git a/Documentation/Profiling/davbr-blog-archive/README.md b/Documentation/Profiling/davbr-blog-archive/README.md
new file mode 100644
index 0000000000..608494d1ef
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/README.md
@@ -0,0 +1,6 @@
+David Broman's Profiling Blog Archive
+===========================
+
+David Broman was the original author of many profiler features in desktop .NET. At the time he had many helpful blog posts, and even today are some of the most thorough documentation we have available on how to use the profiling APIs. They are starting to show their age a bit, and only talk about desktop .NET since they were written before .NET Core existed. However they are still largely accurate and a valuable source of documentation for the profiling APIs.
+
+His blog is being retired, so the relevant profiler articles have been ported here. Ideally we (the profiler team) will work over time to translate them from blog posts to official documentation, and update the crufty bits at the same time, until that happens they are here in an as is state with the knowledge that some parts are out of date.
diff --git a/Documentation/Profiling/davbr-blog-archive/ReJIT - Limitations.md b/Documentation/Profiling/davbr-blog-archive/ReJIT - Limitations.md
new file mode 100644
index 0000000000..8ab4c19958
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/ReJIT - Limitations.md
@@ -0,0 +1,60 @@
+*This blog post originally appeared on David Broman's blog on 10/10/2011*
+
+## Is ReJIT For You?
+
+If you’re writing a monitoring tool, typically run in production, and…
+
+If your tool is always on, always monitoring, but needs a way to fine-tune the amount of instrumentation it does without forcing the monitored application to restart, and…
+
+If your tool instruments potentially everything, including framework assemblies like mscorlib, and you therefore disable the use of NGENd images and are willing to put up with longer startup times as a result, then…
+
+ReJIT may be for you.
+
+## List those Limitations!
+
+ReJIT + Attach? No!
+
+In order to enable ReJIT, your profiler must load at startup, and set an immutable flag in your Initialize method that enables the ReJIT functionality.
+
+ReJIT + NGEN? No!
+
+In order to set that ReJIT flag I just mentioned, you must also set a new flag which completely disables the use of NGENd images. Kind of similar to the existing COR\_PRF\_USE\_PROFILE\_IMAGES flag, except even NGEN /Profile images (should they exist) will be ignored, and everything will be JITted, when you set this new flag. This includes all framework assemblies like mscorlib.
+
+Metadata changes in ModuleLoadFinished only
+
+If you add any new methods, AssemblyRefs, MemberRefs, etc., your metadata changes must be done during the module’s ModuleLoadFinished callback. This is not a new limitation, but it could be a surprise to some that it’s still a limitation even with ReJIT. There is an exception to this. If you need to create a new LocalVarSigToken, you may do this “late”, at ReJIT time, rather than early at ModuleLoadFinished time.
+
+Memory reclaimed at AppDomain unload, _not_ revert
+
+ReJIT will include the ability to “revert” back to the original IL from the assembly. Doing so, however, will not reclaim any memory that was allocated to support the ReJIT (e.g., the instrumented IL, the JITted code, internal bookkeeping, etc.) This memory will be reclaimed when the containing AppDomain is unloaded. And if the code is owned by the shared domain, then, well, that memory is never reclaimed.
+
+ReJIT inlined functions? No!
+
+If function A inlines function B, then you cannot ReJIT function B. Well, technically you can, you just won’t see the effect of that anytime A (or another inlining caller of B) is called. The reason is that, even if you create your new, instrumented B’, A still inlined the original B. So every time A is called, the code from the original B will be executed, and your B’ will be ignored.
+
+Since your profiler must be loaded at startup, you can work around this by either turning off inlining altogether, or by monitoring it (via the JITInlining callback), so you know which callers to ReJIT. In the example above, you’d have to rejit A, and could then request that the rejitted A not inline anyone, so that your new B’ would get called. Note that you’d have to track the inliners recursively, as there can be arbitrarily many levels of inlining.
+
+ReJIT + managed debugging? No!
+
+While not technically disabled, it is not advised or supported to run a managed debugger against a process that also has a ReJITting profiler enabled. (Native-only debugging is just fine, though.)
+
+Whatever debugging support there is, is only there for you, the profiler writer, and _not_ for your profiler’s users. For example, there is no way for the ReJITting profiler to adjust the instrumented IL map for rejitted code (i.e., no equivalent of SetILInstrumentedCodeMap for ReJIT). And attempting to step into or set breakpoints in rejitted code will have unpredictable results.
+
+However, as a profiler writer attempting to debug your own profiler, you should have a good experience debugging other parts of the process. For example, if rejitted user code calls into an on-disk profiler IL assembly, you could set breakpoints in your profiler’s IL assembly, and step through that code.
+
+ReJIT dynamic code? No!
+
+Not a new limitation, but just to be explicit, profilers are not allowed to instrument dynamic code generated via the Reflection.Emit namespace, and that includes ReJIT.
+
+## Why so strict?
+
+ReJIT, as originally conceived by the CLR team, involved allowing profilers to attach to running processes and then instrument arbitrary code at any time. Just that one sentence would eliminate almost all the restrictions mentioned above. So what happened?
+
+Reality, that’s what.
+
+Stuff takes time. And in this case a _lot_ of time. Lifting just about any of the above restrictions may well have increased development or testing time, or general risk, to the point where the entire ReJIT feature might have been jeopardized. So although it was a painful process, we had to think hard about every sub-feature we wanted to support that had non-trivial cost to implement. And at the same time, we had to think about actual, real-world, end-to-end scenarios that would be using ReJIT to ensure that we ended up with something that would be useful, if not perfect.
+
+So we picked the real-world use-case of production monitoring tools that use instrumentation to gather data from various servers in a data center. “Attach” isn’t interesting to many of these tools (which run all the time), but they do want to dynamically change the level of instrumentation to help diagnose problems as they come up, without having to restart the process. This scenario fit very nicely with the time we had, and so that’s what we shot for.
+
+I can’t comment on what we will or will not do in any releases of the CLR after 4.5, but I like to think that ReJIT in .NET 4.5 might simply be a first step toward a richer instrumentation feature set, such that some of these limitations may eventually get lifted. We won’t know if that’s true until the time comes, though.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/ReJIT - The Basics.md b/Documentation/Profiling/davbr-blog-archive/ReJIT - The Basics.md
new file mode 100644
index 0000000000..746a45d38c
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/ReJIT - The Basics.md
@@ -0,0 +1,125 @@
+*This blog post originally appeared on David Broman's blog on 10/12/2011*
+
+This post is organized in chronological order, telling what your profiler should be doing at the following times in the process:
+
+- Startup Time
+- ModuleLoadFinished Time
+- RequestReJIT Time
+- Actual ReJIT Time
+- RequestRevert Time
+
+
+
+## Startup Time
+
+The first thing your profiler will do is get itself loaded on startup of a managed application—the old environment variable way, not the new attach way. I’m sure you’ve already read up on the [limitations](ReJIT - Limitations.md)!
+
+Inside your profiler’s Initialize() method, it will of course call SetEventMask(). In that call, your profiler must include ( **COR\_PRF\_ENABLE\_REJIT | COR\_PRF\_DISABLE\_ALL\_NGEN\_IMAGES** ) in the bitmask. COR\_PRF\_ENABLE\_REJIT is required to use any of the ReJIT APIs later on (they’ll fail immediately otherwise). COR\_PRF\_DISABLE\_ALL\_NGEN\_IMAGES causes the CLR’s assembly loader to ignore all NGENd images (even NGEN /Profile images), and thus all code will be JITted from scratch, and all classes loaded from scratch. If you try to be tricky and specify only COR\_PRF\_ENABLE\_REJIT (without COR\_PRF\_DISABLE\_ALL\_NGEN\_IMAGES), then SetEventMask will fail. Conversely, though, you’re perfectly welcome to specify COR\_PRF\_DISABLE\_ALL\_NGEN\_IMAGES without COR\_PRF\_ENABLE\_REJIT if you want.
+
+At this time you will likely want to set other flags that control optimizations, particularly **inlining** (COR\_PRF\_DISABLE\_OPTIMIZATIONS, COR\_PRF\_DISABLE\_INLINING), or at least subscribe to the inlining callbacks (COR\_PRF\_MONITOR\_JIT\_COMPILATION).
+
+Typically, your profiler will also create a new thread at this point, call it your “ **ReJIT Thread** ”. The expected use-case of ReJIT is to perform instrumentation “on demand”, triggered by some user action (like fiddling with dials in your profiler’s out-of-process GUI). As such, you’ll need an unmanaged thread of your own creation to receive and act on these requests from out-of-process. Perhaps you already have such a thread to service other kinds of requests. It’s perfectly acceptable for such a thread to now also act as your ReJIT Thread.
+
+## ModuleLoadFinished Time
+
+###
+
+###
+
+###
+
+### Metadata Changes
+
+As each module loads, you will likely need to add metadata so that your future ReJITs will have the tokens they need. What you do here heavily depends on the kind of instrumentation you want to do. I’m assuming you’re doing instrumentation that adds some calls from the user code into brand new profiler helper methods you will add somewhere. If you plan to instrument mscorlib, you will likely want to add those profiler helper methods into mscorlib (remember, mscorlib is not allowed to contain an AssemblyRef that points to any other assembly!). Otherwise, perhaps you plan to ship a managed helper assembly that will sit on your user’s disk, and all your profiler helper methods will reside in this on-disk managed helper assembly.
+
+So…
+
+IF the module loading is mscorlib AND you plan to **add your profiler helper methods** into mscorlib, THEN use the metadata APIs now to add those methods.
+
+IF the module loading contains methods that you might possibly ever want to instrument, THEN use the metadata APIs to **add any AssemblyRefs, TypeRefs, MemberRefs, etc.** , which point to your profiler helper methods, that you might possibly need later when you potentially instrument methods from this loading module. The guiding principle here is that metadata changes may be done at ModuleLoadFinished time, and not later. So you need to assume you might possibly want to ReJIT methods in the loading module _eventually_, and proactively add to the loading module whatever metadata you will eventually need (should you actually perform the ReJIT later), and add that metadata _now_, just in case.
+
+### Re-Request Prior ReJITs
+
+This won’t make much sense until you’ve read the next section, but I’m placing it here to keep it in chronological order. If you’ve made a prior call to RequestReJIT for an unshared (non-domain-neutral) ModuleID, AND if you want that request to apply to the mdMethodDef that appears in all other unshared copies of the module, AND if you’re inside ModuleLoadFinished for the load of a new ModuleID that is just such a new unshared copy of the module, THEN you’ll want to explicitly call RequestReJIT on this newly-loaded ModuleID with that mdMethodDef. Note that this is optional—if you want to treat AppDomains differently and want, say, only one unshared copy of the function to be ReJITted, then you’re perfectly welcome to cause that behavior and not to call RequestReJIT on any new ModuleIDs relating to the module. Come back and re-read those last two sentences after you’ve read the next section.
+
+## RequestReJIT Time
+
+Now imagine your user has turned some dial on your out-of-process GUI, to request that some functions get instrumented (or re-instrumented (or re-re-instrumented (or …))). This results in a signal sent to your in-process profiler component. Your ReJIT Thread now knows it must call **RequestReJIT**. You can call this API once in bulk for a list of functions to ReJIT. Note that functions are expressed in terms of ModuleID + mdMethodDef metadata tokens. A few things to note about this:
+
+- You request that all instantiations of a generic function (or function on a generic class) get ReJITted with a single ModuleID + mdMethodDef pair. You cannot request a specific instantiation be ReJITted, or provide instantiation-specific IL. This is nothing new, as classic first-JIT-instrumentation should never be customized per instantiation either. But the ReJIT API is designed with this restriction in mind, as you’ll see later on.
+- ModuleID is specific to one AppDomain for unshared modules, or the SharedDomain for shared modules. Thus:
+ - If ModuleID is shared, then your request will simultaneously apply to all domains using the shared copy of this module (and thus function)
+ - If ModuleID is unshared, then your request will apply only to the single AppDomain using this module (and function)
+ - Therefore, if you want this ReJIT request to apply to _all unshared copies_ of this function:
+ - You’ll need to include all such ModuleIDs in this request.
+ - And… any _future_ unshared loads of this module will result in new ModuleIDs. So as those loads happen, you’ll need to make further calls to RequestReJIT with the new ModuleIDs to ensure those copies get ReJITted as well.
+ - This is optional, and only need be done if you truly want this ReJIT request to apply to all unshared copies of the function. You’re perfectly welcome to ReJIT only those unshared copies you want (and / or the shared copy).
+ - Now you can re-read the “Re-Request Prior ReJITs” section above. :-)
+
+##
+
+###
+
+### More on AppDomains
+
+This whole shared / multiple unshared business can get confusing. So to bring it home, consider your user. If your user expresses instrumentation intent at the level of a class/method name, then you pretty much want to ReJIT every copy of that function (all unshared copies plus the shared copy). But if your user expresses instrumentation intent at the level of a class/method name _plus AppDomain_ (think one single AppPool inside ASP.NET), then you’d only want to ReJIT the copy of the function that resides in the single ModuleID associated with that AppDomain.
+
+The SharedDomain can make that last alternative tricky, though. Because if the ModuleID ends up belonging to the SharedDomain, and you ReJIT a method in that ModuleID, then all AppDomains that share that module will see your instrumentation (whether you want them to or not). This is due to the very nature of SharedDomain / domain-neutrality. There’s only one shared copy of this function to instrument, so if two domains share the function, they both see it, either with or without instrumentation. It doesn’t make sense to instrument the function from the point of view of only one of those two domains.
+
+### Pre-ReJIT
+
+Obviously, the main coolness of RequestReJIT is that you can call it with a function that has already been JITted. But one of the niceties of RequestReJIT is that you don’t actually have to wait until a function is first JITted to use it. You can request a ReJIT on a function that has never been JITted before (I call this “Pre-ReJIT”). Indeed, with generics, there’s no way to know if all the instantiations that will ever be used in an AppDomain have been JITted or not. There may always be some important instantiation that has not been JITted yet. RequestReJIT takes all this into account as follows:
+
+If a function (or generic instantiation) has already been JITted, it is marked for ReJIT next time it is called.
+
+If a function (or generic instantiation) has not yet been JITted, then it is marked internally for “Pre-ReJIT”. This means that once it is called, its original (non-instrumented) IL gets JIT-compiled as usual. Immediately after, it is then ReJITted. In this way, a Pre-ReJIT request works exactly like a ReJIT request. Original IL is compiled first, and then instrumented IL is compiled later. This ensures we can easily “revert” back to the original code at a later time using the same revert mechanism. (See below.)
+
+## Actual ReJIT Time
+
+You may have noticed that you have read a whole lot of words so far, but we haven’t yet provided the instrumented IL to the CLR. This is because the function hasn’t ReJITted yet. You’ve only _requested_ that it be ReJITted. But the actual ReJITting happens the next time the function is called. Until then, any threads already executing inside functions you requested to be ReJITted _stay_ in those functions, and don’t see the instrumented code until they return and call the functions again. Once a function is finally called for the first time after its RequestReJIT, you get some callbacks.
+
+IF this is the first generic instantiation to ReJIT, for a given RequestReJIT call (or this is not a generic at all), THEN:
+
+- CLR calls **GetReJITParameters**
+ - This callback passes an ICorProfilerFunctionControl to your profiler. Inside your implementation of GetReJITParameters (and no later!) you may call into ICorProfilerFunctionControl to provide the instrumented IL and codegen flags that the CLR should use during the ReJIT
+ - Therefore it is here where you may:
+ - Call GetILFunctionBody
+ - Add any new LocalVarSigTokens to the function’s module’s metadata. (You may not do any other metadata modifications here, though!)
+ - Rewrite the IL to your specifications, passing it to ICorProfilerFunctionControl::SetILFunctionBody.
+ - You may NOT call ICorProfilerInfo::SetILFunctionBody for a ReJIT! This API still exists if you want to do classic first-JIT IL rewriting only.
+ - Note that GetReJITParameters expresses the function getting compiled in terms of the ModuleID + mdMethodDef pair you previously specified to RequestReJIT, and _not_ in terms of a FunctionID. As mentioned before, you may not provide instantiation-specific IL!
+
+And then, for all ReJITs (regardless of whether they are for the first generic instantiation or not):
+
+- CLR calls **ReJITCompilationStarted**
+- CLR calls **ReJITCompilationFinished**
+
+These callbacks express the function getting compiled in terms of FunctionID + ReJITID. (ReJITID is simply a disambiguating value so that each ReJITted version of a function instantiation can be uniquely identified via FunctionID + ReJITID.) Your profiler doesn’t need to do anything in the above callbacks if it doesn’t want to. They just notify you that the ReJIT is occurring, and get called for each generic instantiation (or non-generic) that gets ReJITted.
+
+And of course, for any calls to these functions after they have been ReJITted, there are no further ReJIT compilations or callbacks to your profiler. This ReJITted version is now the current and only version for all new calls to the function.
+
+### Versions
+
+Your profiler is welcome to call RequestReJIT again on these functions, and the cycle starts again. The next time a call comes in, they’ll get ReJITted again, and you’ll provide instrumented IL at that time, as usual. At any given time, only the most recently ReJITted version of a function is active and in use for new calls. But any prior calls still inside previously ReJITted (or original) versions of the function stay in that version until they return.
+
+## RequestRevert Time
+
+Eventually your user may turn the dial back down, and request that the original, un-instrumented, version of the function be reinstated. When this happens, your profiler receives this signal from out-of-proc using your nifty cross-proc communication channel, and your ReJIT Thread calls **RequestRevert**.
+
+At this time, the CLR sets the original version of the function that it JITted the first time as being the _current_ version for all future calls. Any prior calls still executing in various ReJITted versions of the function remain where they’re at until they return. All new calls go into the version originally JITted (from the original IL).
+
+Note that RequestRevert allows you to revert back to the original JITted IL, and not back to some previous ReJITted version of the IL. If you want to revert back to a previous ReJITted version of the IL, you’ll need to do so manually, by using RequestReJIT instead, and providing that IL explicitly to the CLR.
+
+## Errors
+
+If there are any errors with performing the ReJIT, you will be notified by the dedicated callback ICorProfilerCallback4::ReJITError(). Errors can happen at a couple times:
+
+- RequestReJIT Time: These are fundamental errors with the request itself. This can include bad parameter values, requesting to ReJIT dynamic (Ref.Emit) code, out of memory, etc. If errors occur here, you’ll get a callback to your implementation of ReJITError(), sandwiched inside your call to RequestReJIT on your ReJIT Thread.
+- Actual ReJIT Time: These are errors we don’t encounter until actually trying to ReJIT the function itself. When these later errors occur, your implementation of ReJITError() is called on whatever CLR thread encountered the error.
+
+You’ll note that ReJITError can provide you not only the ModuleID + mdMethodDef pair that caused the error, but optionally a FunctionID as well. Depending on the nature of the error occurred, the FunctionID may be available, so that your profiler may know the exact generic instantiation involved with the error. If FunctionID is null, then the error was fundamental to the generic function itself (and thus occurred for all instantiations).
+
+
+
+Ok, that about covers it on how your profiler is expected to use ReJIT. As you can see, there are several different tasks your profiler needs to do at different times to get everything right. But I trust you, you’re smart.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md b/Documentation/Profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md
new file mode 100644
index 0000000000..928092fbe6
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md
@@ -0,0 +1,63 @@
+*This blog post originally appeared on David Broman's blog on 10/13/2005*
+
+
+If your profiler plays with metadata, you've undoubtedly come across signature blobs. They’re used to encode type information for method definitions & references, local variables, and a whole lot more. They’re wonderfully compact, recursively versatile, and sometimes, well, challenging to parse. Fortunately, [Rico Mariani](http://blogs.msdn.com/ricom/) was feeling generous one day, and churned out a simple parser that can read these types of signatures:
+
+MethodDefSig
+MethodRefSig
+StandAloneMethodSig
+FieldSig
+PropertySig
+LocalVarSig
+
+Here are the files:
+[sigparse.cpp](samples/sigparse.cpp) (Rico's signature parser)
+[sigformat.cpp](samples/sigformat.cpp) (An example extension to the parser)
+[PlugInToYourProfiler.cpp](samples/PlugInToYourProfiler.cpp) (Example code to plug the extension into your profiler)
+
+Open up **sigparse.cpp** in your favorite editor and take a look at the grammar at the top. The grammar comes from the ECMA CLI spec. Jonathan Keljo has a [link](http://blogs.msdn.com/jkeljo/archive/2005/08/04/447726.aspx) to it from his blog. This tells you the types of signature blobs the parser can handle.
+
+Sigparse.cpp is structured without any dependencies on any headers, so you can easily absorb it into your profiler project. There are two things you will need to do to make use of the code. I provided examples of each of these in the download above to help you out:
+
+1. You will **extend the code** to make use of the parsed components of the signature however you like. Perhaps you’ll build up your own internal structures based on what you find. Or maybe you’ll build a pretty-printer that displays method prototypes in the managed language of your choice.
+2. You will then **call the code** to perform the parse on signature blobs you encounter while profiling.
+
+## Extending the code
+
+Simply derive a new class from SigParser, and override the virtual functions. The functions you override are events to be handled as the parser traverses the signature in top-down fashion. For example, when the parser encounters a MethodDef, you might see calls to your overrides of:
+
+NotifyBeginMethod()
+ NotifyParamCount()
+ NotifyBeginRetType()
+ NotifyBeginType()
+ NotifyTypeSimple()
+ NotifyEndType()
+ NotifyEndRetType()
+ NotifyBeginParam()
+ NotifyBeginType()
+ NotifyTypeSimple()
+ NotifyEndType()
+ NotifyEndParam()
+ _… (more parameter notifications occur here if more parameters exist)_
+NotifyEndMethod()
+
+And yes, generics are handled as well.
+
+In your overrides, it’s up to you to do what you please. **SigFormat.cpp** provides an example of a very simple pretty-printer that just prints to stdout.
+
+You’ll notice that metadata tokens (TypeDefs, TypeRefs, etc.) are not resolved for you. This is because the parser has no knowledge of the assemblies in use—it only knows about the signature blob you give it. When the parser comes across a token it just reports it to you directly via the overrides (e.g., NotifyTypeDefOrRef()). It’s up to your profiler to figure out what to do with the tokens once it finds them.
+
+## Calling the code
+
+I saved the easy step for last. When your profiler encounters a signature blob to parse, just create an instance of your SigParser-derived class, and call Parse(). Could it be simpler? An example of this is in **PlugInToYourProfiler.cpp**. Here you’ll find example code that you’d add to a profiler to read metadata and feed the signature blobs to SigFormat to print all signatures found.
+
+Go ahead! Plug this all into your profiler and watch it tear open the signature blobs in mscorlib, and pretty-print the results. Dude, can this get any more exciting?!
+
+## Homework?!
+
+Don't worry, it's optional. I mentioned above that only signatures whose grammar appears in the comments in sigparse.cpp are parseable by this sample. For example, it can’t parse TypeSpecs and MethodSpecs. However, adding this capability is pretty straightforward given the existing code, and so this is left as an exercise to the reader. :-)
+
+The only gotcha is that TypeSpecs & MethodSpecs don’t have a unique byte that introduces them. For example, GENERICINST could indicate the beginning of a TypeSpec or a MethodSpec. You’ll see that SigParser::Parse() switches on the intro byte to determine what it’s looking at. So to keep things simple, you’ll want to add a couple more top-level functions to SigParser to parse TypeSpecs & MethodSpecs (say, ParseTypeSpec() & ParseMethodSpec()). You’d then call those functions instead of Parse() when you have a TypeSpec or MethodSpec on your hands. Of course, if you don’t care about TypeSpecs and MethodSpecs, you can use the code as is and not worry. But this stuff is so much fun, you’ll probably want to add the capability anyway.
+
+Hope you find this useful. And thanks again to Rico Mariani for sigparse.cpp!
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Tail call JIT conditions.md b/Documentation/Profiling/davbr-blog-archive/Tail call JIT conditions.md
new file mode 100644
index 0000000000..194c0ba517
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Tail call JIT conditions.md
@@ -0,0 +1,49 @@
+*This blog post originally appeared on David Broman's blog on 6/20/2007*
+
+
+_Here are the full details I received from Grant Richins and Fei Chen when I asked how the JIT decides whether to employ the tail call optimization. Note that these statements apply to the JITs as they were when Grant and Fei looked through the code base, and are prone to change at whim. **You must not take dependencies on this behavior**. Use this information for your own personal entertainment only._
+
+_First, Grant talked about the 64-bit JITs (one for x64, one for ia64):_
+
+For the 64-bit JIT, we tail call whenever we’re allowed to. Here’s what prevents us from tail calling (in no particular order):
+
+- We inline the call instead (we never inline recursive calls to the same method, but we will tail call them)
+- The call/callvirt/calli is followed by something other than nop or ret IL instructions.
+- The caller or callee return a value type.
+- The caller and callee return different types.
+- The caller is synchronized (MethodImplOptions.Synchronized).
+- The caller is a shared generic method.
+- The caller has imperative security (a call to Assert, Demand, Deny, etc.).
+- The caller has declarative security (custom attributes).
+- The caller is varargs
+- The callee is varargs.
+- The runtime forbids the JIT to tail call. (_There are various reasons the runtime may disallow tail calling, such as caller / callee being in different assemblies, the call going to the application's entrypoint, any conflicts with usage of security features, and other esoteric cases._)
+- The il did not have the tail. prefix and we are not optimizing (the profiler and debugger control this)
+- The il did not have the tail. prefix and the caller had a localloc instruction (think alloca or dynamic stack allocation)
+- The caller is getting some GS security cookie checks
+- The il did not have the tail. prefix and a local or parameter has had its address taken (ldarga, or ldloca)
+- The caller is the same as the callee and the runtime disallows inlining
+- The callee is invoked via stub dispatch (_i.e., via intermediate code that's generated at runtime to optimize certain types of calls_).
+- For x64 we have these additional restrictions:
+
+ - The callee has one or more parameters that are valuetypes of size 3,5,6,7 or \>8 bytes
+ - The callee has more than 4 arguments (don’t forget to count the this pointer, generics, etc.) and more than the caller
+ - For all of the parameters passed on the stack the GC-ness must match between the caller and callee. (_"GC-ness" means the state of being a pointer to the beginning of an object managed by the GC, or a pointer to the interior of an object managed by the GC (e.g., a byref field), or neither (e.g., an integer or struct)._)
+- For ia64 we have this additional restriction:
+
+ - Any of the callee arguments do not get passed in a register.
+
+If all of those conditions are satisfied, we will perform a tail call. Also note that for verifiability, if the code uses a “tail.” prefix, the subsequent call opcode must be immediately followed by a ret opcode (no intermediate nops or prefixs are allowed, although there might be additional prefixes between the “tail.” prefix and the actual call opcode).
+
+_Fei has this to add about the 32-bit JIT:_
+
+I looked at the code briefly and here are the cases I saw where tailcall is disallowed:
+
+- tail. prefix does not exist in the IL stream (note that tail. prefix is ignored in the inlinee).
+- Synchronized method or method with varargs.
+- P/Invoke to unmanaged method.
+- Return types don’t match between the current method and the method it attempts to tailcall into.
+- The runtime forbids the JIT to tail call.
+- Callee has valuetype return.
+- Many more restrictions that mirror those Grant mentioned above
+
diff --git a/Documentation/Profiling/davbr-blog-archive/Type Forwarding.md b/Documentation/Profiling/davbr-blog-archive/Type Forwarding.md
new file mode 100644
index 0000000000..f503b612cd
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/Type Forwarding.md
@@ -0,0 +1,203 @@
+*This blog post originally appeared on David Broman's blog on 9/30/2009*
+
+
+MSDN defines “type forwarding” as moving “a type to another assembly without having to recompile applications that use the original assembly”. In this post, I’ll talk about examining a particular type in Microsoft’s .NET Framework library that gets forwarded, how you can use type forwarding for your own types, and what type forwarding looks like to consumers of the profiling API. For some more official background on type forwarding, visit the MSDN [topic](http://msdn.microsoft.com/en-us/library/ms404275(VS.100).aspx). If you Bing type forwarding you’ll find many blogs that talk about it as well. Yes, that’s right. I used Bing as a verb. Get used to it; Bing is awesome.
+
+Type forwarding is nothing new. However, in CLR V4, we are enabling type forwarding to work with generic types. And there has been some new refactoring in System.Core. This means you should expect to see type forwarding used more often than it had been in the past. So if you code up a profiler, you should make sure you can deal with type forwarding appropriately. The good news is that profiler code that uses the profiling API to inspect types generally should not need to change. But if you do certain kinds of metadata lookups yourself, you may need to be aware of type forwarding. More on that later.
+
+## Example: TimeZoneInfo
+
+The example I’ll use where the .NET Framework uses type forwarding is the TimeZoneInfo class. In CLR V4, TimeZoneInfo is now forwarded from System.Core.dll to mscorlib.dll. If you open the CLR V4 copy of System.Core.dll in ildasm and choose Dump, you'll see the following:
+
+|
+```
+.class extern /*27000004*/ forwarder System.TimeZoneInfo
+ {
+ .assembly extern mscorlib /*23000001*/
+ }
+```
+ |
+
+In each assembly’s metadata is an exported types table. The above means that System.Core.dll's exported types table includes an entry for System.TimeZoneInfo (indexed by token 27000004). What's significant is that System.Core.dll no longer has a typeDef for System.TimeZoneInfo, only an exported type. The fact that the token begins at the left with 0x27 tells you that it's an mdtExportedType (not a mdtTypeDef, which begins at the left with 0x02).
+
+At run-time, if the CLR type loader encounters this exported type, it knows it must now look in mscorlib for System.TimeZoneInfo. And by the way, if someday mscorlib chooses to forward the type elsewhere, and thus the type loader found another exported type with name System.TimeZoneInfo in mscorlib, then the type loader would have to make yet another hop to wherever that exported type pointed.
+
+## Walkthrough 1: Observe the forwarding of System.TimeZoneInfo
+
+This walkthrough assumes you have .NET 4.0 or later installed **and** an older release of .NET, such as .NET 3.5, installed.
+
+Code up a simple C# app that uses System.TimeZoneInfo:
+```
+namespace test
+{
+ class Class1
+ {
+ static void Main(string[] args)
+ {
+ System.TimeZoneInfo ti = null;
+ }
+ }
+}
+```
+
+Next, compile this into an exe using a CLR V2-based toolset (e.g., .NET 3.5). You can use Visual Studio, or just run from the command-line (but be sure your path points to the pre-.NET 4.0 C# compiler!). Example:
+
+```
+csc /debug+ /o- /r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framework\v3.5\System.Core.dll" Class1.cs
+```
+
+Again, be sure you’re using an old csc.exe from, say, a NET 3.5 installation. To verify, open up Class1.exe in ildasm, and take a look at Main(). It should look something like this:
+
+```
+.method /*06000001*/ private hidebysig static
+ void Main(string[] args) cil managed
+ {
+ .entrypoint
+ // Code size 4 (0x4)
+ .maxstack 1
+ **.locals /*11000001*/ init ([0] class [System.Core/*23000002*/]System.TimeZoneInfo/*01000006*/ ti)**
+ IL\_0000: nop
+ IL\_0001: ldnull
+ IL\_0002: stloc.0
+ IL\_0003: ret
+ } // end of method Class1::Main
+```
+
+The key here is to note that the IL uses a TypeRef for System.TimeZoneInfo (01000006) that points to **System.Core.dll**. When you run Class1.exe against a .NET 3.5 runtime, it will find System.TimeZoneInfo in System.Core.dll as usual, and just use that, since System.TimeZoneInfo actually is defined in System.Core.dll in pre-.NET 4.0 frameworks. However, what happens when you run Class1.exe against .NET 4.0 without recompiling? Type forwarding would get invoked!
+
+Note that, if you were to build the above C# code using the .NET 4.0 C# compiler, it would automatically have generated a TypeRef that points to mscorlib.dll instead, so you wouldn't be able to observe the type forwarding at run-time.
+
+Ok, so how do we run this pre-.NET 4.0 executable against .NET 4.0? A config file, of course. Paste the following into a file named Class1.exe.config that sits next to Class1.exe:
+
+```
+<configuration\>
+ <startup\>
+ <supportedRuntime version="v4.0.20506"/>
+ </startup\>
+ </configuration\>
+```
+
+The above will force Class1.exe to bind against .NET 4.0 Beta 1. And when it comes time to look for TimeZoneInfo, the CLR will first look in System.Core.dll, find the exported types table entry, and then hop over to mscorlib.dll to load the type. What does that look like to your profiler? Make your guess and hold that thought. First, another walkthrough…
+
+## Walkthrough 2: Forwarding your own type
+
+To experiment with forwarding your own types, the process is:
+
+- Create Version 1 of your library
+
+ - Create version 1 of your library assembly that defines your type (MyLibAssemblyA.dll)
+ - Create an app that references your type in MyLibAssemblyA.dll (MyClient.exe)
+- Create version 2 of your library
+
+ - Recompile MyLibAssemblyA.dll to forward your type elsewhere (MyLibAssemblyB.dll)
+ - Don’t recompile MyClient.exe. Let it still think the type is defined in MyLibAssemblyA.dll.
+
+### Version 1
+
+Just make a simple C# DLL that includes your type Foo. Something like this (MyLibAssemblyA.cs):
+
+```
+using System;
+public class Foo
+{
+}
+```
+
+and compile it into MyLibAssemblyA.dll:
+
+```
+csc /target:library /debug+ /o- MyLibAssemblyA.cs
+```
+
+Then make yourself a client app that references Foo.
+
+```
+using System;
+public class Test
+{
+ public static void Main()
+ {
+ Foo foo = new Foo();
+ Console.WriteLine(typeof(Foo).AssemblyQualifiedName);
+ }
+}
+```
+
+and compile this into MyClient.exe:
+
+```
+csc /debug+ /o- /r:MyLibAssemblyA.dll MyClient.cs
+```
+
+When you run MyClient.exe, you get this boring output:
+
+```
+Foo, MyLibAssemblyA, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
+```
+
+Ok, time to upgrade!
+
+### Version 2
+Time goes by, your library is growing, and its time to split it into two DLLs. Gotta move Foo into the new DLL. Save this into MyLibAssemblyB.cs
+```
+using System;
+public class Foo
+{
+}
+```
+
+compile that into your new DLL, MyLibAssemblyB.dll:
+```
+csc /target:library /debug+ /o- MyLibAssemblyB.cs
+```
+
+And for the type forward. MyLibAssemblyA.cs now becomes:
+```
+using System;
+using System.Runtime.CompilerServices;
+ [assembly: TypeForwardedTo(typeof(Foo))]
+```
+
+compile that into MyLibAssemblyA.dll (overwriting your Version 1 copy of that DLL):
+```
+csc /target:library /debug+ /o- /r:MyLibAssemblyB.dll MyLibAssemblyA.cs
+```
+
+Now, when you rerun MyClient.exe (without recompiling!), it will look for Foo first in MyLibAssemblyA.dll, and then hop over to MyLibAssemblyB.dll:
+```
+Foo, MyLibAssemblyB, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
+```
+
+And this all despite the fact that MyClient.exe still believes that Foo lives in MyLibAssemblyA:
+```
+.method /*06000001*/ public hidebysig static
+ void Main() cil managed
+ {
+ .entrypoint
+ // Code size 29 (0x1d)
+ .maxstack 1
+ .locals /*11000001*/ init ([0] class [MyLibAssemblyA/*23000002*/]Foo/*01000006*/ foo)
+ IL\_0000: nop
+ IL\_0001: newobj instance void [MyLibAssemblyA/*23000002*/]Foo/*01000006*/::.ctor() /* 0A000004 */
+ IL\_0006: stloc.0
+ **IL\_0007: ldtoken [MyLibAssemblyA/*23000002*/]Foo/*01000006*/**
+ IL\_000c: call class [mscorlib/*23000001*/]System.Type/*01000007*/ [mscorlib/*23000001*/]System.Type/*01000007*/::GetTypeFromHandle(valuetype [mscorlib/*23000001*/]System.RuntimeTypeHandle/*01000008*/) /* 0A000005 */
+ IL\_0011: callvirt instance string [mscorlib/*23000001*/]System.Type/*01000007*/::get\_AssemblyQualifiedName() /* 0A000006 */
+ IL\_0016: call void [mscorlib/*23000001*/]System.Console/*01000009*/::WriteLine(string) /* 0A000007 */
+ IL\_001b: nop
+ IL\_001c: ret
+ } // end of method Test::Main
+```
+ |
+
+## Profilers
+
+What does this look like to profilers? Types are represented as ClassIDs, and modules as ModuleIDs. When you query for info about a ClassID (via GetClassIDInfo2()), you get one and only one ModuleID to which it belongs. So when a ClassID gets forwarded from one ModuleID to another, which does the profiling API report as its real home? The answer: always the final module to which the type has been forwarded and therefore the module whose metadata contains the TypeDef (and not the exported type table entry).
+
+This should make life easy for profilers, since they generally expect to be able to find the metadata TypeDef for a type inside the ModuleID that the profiling API claims is the type’s home. So much of type forwarding will be transparent to your profiler.
+
+However, type forwarding is important to understand if your profiler needs to follow metadata references directly. More generally, if your profiler is reading through metadata and expects to come across a typeDef (e.g., perhaps a metadata reference points to a type in that module, or perhaps your profiler expects certain known types to be in certain modules), then your profiler should be prepared to find an mdtExportedType instead, and to deal gracefully with it rather than doing something silly like crashing.
+
+In any case, whether you think your profiler will be affected by type forwarding, be sure to test, test, test!
+
+ \ No newline at end of file
diff --git a/Documentation/Profiling/davbr-blog-archive/When is it safe to use ObjectIDs.md b/Documentation/Profiling/davbr-blog-archive/When is it safe to use ObjectIDs.md
new file mode 100644
index 0000000000..ea287ae75c
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/When is it safe to use ObjectIDs.md
@@ -0,0 +1,14 @@
+*This blog post originally appeared on David Broman's blog on 12/29/2011*
+
+
+As mentioned in [this post](Debugging - SOS and IDs.md), ObjectIDs are really pointers to managed objects on the GC heap. And as you know, objects get collected or move around on the heap during GCs. So how do you safely work with ObjectIDs?
+
+The overall guidance is that if you plan to dereference an ObjectID or pass it to an ICorProfilerInfo(2,3,4) method, then you must do so either:
+
+1. From inside a GC, from a thread doing the GC (e.g., in response to one of the GC callbacks, in which case you're guaranteed that the GC is blocked by this call), OR
+2. From a callback that gave you the ObjectID (in which case you're guaranteed that the GC is blocked by the callback that gave you the ObjectID)
+
+Of course, taking an ObjectID that you were given and caching it away somewhere is a big no-no, unless you take pains to update all the ObjectIDs in your cache on every GC, by using the SurvivingReferences/MovedReferences callbacks. And even with a well-updated cache such as this, the CLR will still require that you pass ObjectIDs to Info methods only in the above two circumstances, or else you will receive an error HRESULT. This extra checking was added in .NET 4.0.
+
+The reason for this is that some code paths in the CLR assume that their thread is already blocking the GC (to ensure referenced objects stay put), but there was no enforcement in place to ensure this. So we added this enforcement in .NET 4.0. Without checks like this, it could be possible to cause nondeterministic GC heap corruptions or to reference the wrong memory. For example, calling GetObjectSize on a thread that you create (i.e., not a manage thread) does not intrinsically block the GC, and thus is considered unsafe.
+
diff --git a/Documentation/Profiling/davbr-blog-archive/media/2110.image_051F632D.png b/Documentation/Profiling/davbr-blog-archive/media/2110.image_051F632D.png
new file mode 100644
index 0000000000..a08923836e
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/2110.image_051F632D.png
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/4276.image1_31CAADB7.png b/Documentation/Profiling/davbr-blog-archive/media/4276.image1_31CAADB7.png
new file mode 100644
index 0000000000..2cebaec8cc
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/4276.image1_31CAADB7.png
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/8715.image1_thumb_38118445.png b/Documentation/Profiling/davbr-blog-archive/media/8715.image1_thumb_38118445.png
new file mode 100644
index 0000000000..bf16d28acf
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/8715.image1_thumb_38118445.png
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/8715.image_thumb_01A0D243.png b/Documentation/Profiling/davbr-blog-archive/media/8715.image_thumb_01A0D243.png
new file mode 100644
index 0000000000..986ed7dbc7
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/8715.image_thumb_01A0D243.png
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/Attach.jpg b/Documentation/Profiling/davbr-blog-archive/media/Attach.jpg
new file mode 100644
index 0000000000..4f5620e4a9
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/Attach.jpg
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/NoBirthAnnouncement.JPG b/Documentation/Profiling/davbr-blog-archive/media/NoBirthAnnouncement.JPG
new file mode 100644
index 0000000000..defc4414b7
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/NoBirthAnnouncement.JPG
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/deadlock.jpg b/Documentation/Profiling/davbr-blog-archive/media/deadlock.jpg
new file mode 100644
index 0000000000..b0729f3ca2
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/deadlock.jpg
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/media/gccycle.jpg b/Documentation/Profiling/davbr-blog-archive/media/gccycle.jpg
new file mode 100644
index 0000000000..d3c09e3a5c
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/media/gccycle.jpg
Binary files differ
diff --git a/Documentation/Profiling/davbr-blog-archive/samples/Add local to LocalVarSig.cpp b/Documentation/Profiling/davbr-blog-archive/samples/Add local to LocalVarSig.cpp
new file mode 100644
index 0000000000..ae9e5f64bd
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/samples/Add local to LocalVarSig.cpp
@@ -0,0 +1,137 @@
+// This blog post originally appeared on David Broman's blog on 5/18/2006
+//
+// Parse the local variables signature for the method we're rewriting, create a
+// new localvar signature containing one new local, and return the 0-based
+// ordinal for that new local.
+
+UINT AddNewLocal()
+{
+ // Get the metadata interfaces on the module containing the method being
+ // rewritten.
+ HRESULT hr =
+ m_pICorProfilerInfo->GetModuleMetaData(m_moduleId,
+ ofRead | ofWrite,
+ IID_IMetaDataImport,
+ (IUnknown**)&m_pMetaDataImport);
+ if (FAILED(hr))
+ {
+ return 0;
+ }
+
+ hr = m_pMetaDataImport->QueryInterface(IID_IMetaDataEmit,
+ (void**)&m_pMetaDataEmit);
+ if (FAILED(hr))
+ {
+ return 0;
+ }
+
+ // Here's a buffer into which we will write out the modified signature. This
+ // sample code just bails out if it hits signatures that are too big. Just
+ // one of many reasons why you use this code AT YOUR OWN RISK!
+ COR_SIGNATURE
+ rgbNewSig[4096];
+
+ // Use the signature token to look up the actual
+ signature PCCOR_SIGNATURE rgbOrigSig = NULL;
+ ULONG cbOrigSig;
+
+ hr = m_pMetaDataImport->GetSigFromToken(m_tkLocalVarSig, &rgbOrigSig, &cbOrigSig);
+ if (FAILED(hr))
+ {
+ return 0;
+ }
+
+ // These are our running indices in the original and new signature,
+ // respectively
+ UINT iOrigSig = 0;
+ UINT iNewSig = 0;
+
+ // First byte of signature must identify that it's a locals signature!
+ assert(rgbOrigSig[iOrigSig] == SIG_LOCAL_SIG);
+
+ // Copy SIG_LOCAL_SIG
+ if (iNewSig + 1 > sizeof(rgbNewSig))
+ {
+ // We'll write one byte below but no room!
+ return 0;
+ }
+
+ rgbNewSig[iNewSig++] = rgbOrigSig[iOrigSig++];
+
+ // Get original count of locals...
+ ULONG cOrigLocals;
+ ULONG cbOrigLocals;
+ ULONG cbNewLocals;
+ hr = CorSigUncompressData(&rgbOrigSig[iOrigSig],
+ 4, // [IN] length of the signature
+ &cOrigLocals, // [OUT] the expanded data
+ &cbOrigLocals); // [OUT] length of the expanded data
+
+ if (FAILED(hr))
+ {
+ return 0;
+ }
+
+ // ...and write new count of locals (cOrigLocals+1)
+ if (iNewSig + 4 > sizeof(rgbNewSig))
+ {
+ // CorSigCompressData will write up to 4 bytes but no room !
+ return 0;
+ }
+
+ cbNewLocals = CorSigCompressData(cOrigLocals + 1, // [IN]given uncompressed data
+ &rgbNewSig[iNewSig]); // [OUT] buffer where data will be compressed and stored.
+
+ iOrigSig += cbOrigLocals;
+ iNewSig += cbNewLocals;
+
+ // Copy the rest
+ if (iNewSig + cbOrigSig - iOrigSig > sizeof(rgbNewSig))
+ {
+ // We'll copy cbOrigSig - iOrigSig bytes, but no room!
+ return 0;
+ }
+
+ memcpy(&rgbNewSig[iNewSig], &rgbOrigSig[iOrigSig], cbOrigSig - iOrigSig);
+ iNewSig += cbOrigSig - iOrigSig;
+
+ // Manually append final local
+ ULONG cbLocalType;
+ if (iNewSig + 1 > sizeof(rgbNewSig))
+ {
+ // We'll write one byte below but no room!
+ return 0;
+ }
+
+ rgbNewSig[iNewSig++] = ELEMENT_TYPE_VALUETYPE;
+
+ // You'll need to replace 0x01000002 with the appropriate token that describes
+ // the type of this local (which, in turn, is the type of the return value
+ // you're copying into that local). This can be either a TypeDef or TypeRef,
+ // and it must be encoded (compressed).
+ if (iNewSig + 4 > sizeof(rgbNewSig))
+ {
+ // CorSigCompressToken will write up to 4 bytes but no room!
+ return 0;
+ }
+
+ cbLocalType = CorSigCompressToken(0x01000002, &rgbNewSig[iNewSig]);
+
+ iNewSig += cbLocalType;
+
+ // We're done building up the new signature blob. We now need to add it to
+ // the metadata for this module, so we can get a token back for it.
+ assert(iNewSig <= sizeof(rgbNewSig));
+ hr = m_pMetaDataEmit->GetTokenFromSig(&rgbNewSig[0], // [IN] Signature to define.
+ iNewSig, // [IN] Size of signature data.
+ &m_tkLocalVarSig); // [OUT] returned signature token.
+
+ if (FAILED(hr))
+ {
+ return 0;
+ }
+
+ // 0-based index of new local = 0-based index of original last local + 1
+ // = count of original locals
+ return cOrigLocals;
+}
diff --git a/Documentation/Profiling/davbr-blog-archive/samples/PlugInToYourProfiler.cpp b/Documentation/Profiling/davbr-blog-archive/samples/PlugInToYourProfiler.cpp
new file mode 100644
index 0000000000..81471bb1b3
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/samples/PlugInToYourProfiler.cpp
@@ -0,0 +1 @@
+// This blog post originally appeared on David Broman's blog on 10/13/2005 #include "SigFormat.cpp" // --------------------------------------------------------------------- // --------------------------------------------------------------------- // This file does not compile on its own. It contains snippets of code you can add // to a working profiler, so that your profiler will invoke instances of the SigFormat // object to parse and pretty-print all the types in all modules as they're loaded. // // The functions are ordered from callees to callers (so no forward declarations are // necessary). If you prefer a top-down approach to learning code, then start // at the bottom of the file. // --------------------------------------------------------------------- // --------------------------------------------------------------------- // **************************************************************** // HELPERS TO READ THROUGH METADATA, FIND SIGNATURES, AND INVOKE THE PARSER // **************************************************************** // Simple wrapper to create an instance of SigFormat and invoke it HRESULT DoParse(sig_byte * sig, ULONG cbSig) { SigFormat sf; HRESULT hr; bool fRet = sf.Parse(sig, cbSig); if (!fRet) { hr = E_FAIL; goto Error; } hr = S_OK; Cleanup: return hr; Error: goto Cleanup; } // Takes an mdProperty, prints an intro line, then invokes the parser / printer HRESULT PrintProperty(ModuleID moduleID, IMetaDataImport* pMDImport, LPCWSTR wszClassName, mdProperty md) { HRESULT hr; mdTypeDef td; WCHAR wszName[500]; ULONG cchName; PCCOR_SIGNATURE sigMember; ULONG cbSigMember; DWORD dwAttr; DWORD dwCPlusTypeFlag; UVCP_CONSTANT pValue; ULONG cchValue; mdMethodDef mdSetter; mdMethodDef mdGetter; mdMethodDef aOtherMethods[100]; ULONG cOtherMethods; hr = pMDImport->GetPropertyProps(md, // The member for which to get props. &td, // Put member's class here. wszName, // Put member's name here. dimensionof(wszName), // Size of szMember buffer in wide chars. &cchName, // Put actual size here &dwAttr, // Put flags here. &sigMember, // [OUT] point to the blob value of meta data &cbSigMember, // [OUT] actual size of signature blob &dwCPlusTypeFlag, // [OUT] flag for value type. selected ELEMENT_TYPE_* &pValue, // [OUT] constant value &cchValue, &mdSetter, // [OUT] setter method of the property &mdGetter, // [OUT] getter method of the property aOtherMethods, // [OUT] other method of the property dimensionof(aOtherMethods), // [IN] size of rmdOtherMethod &cOtherMethods); // [OUT] total number of other method of this property if (FAILED(hr)) { goto Error; } printf("\n%S.%S (0x%x)\n", wszClassName, wszName, md); DoParse((sig_byte *) sigMember, cbSigMember); hr = S_OK; Cleanup: return hr; Error: goto Cleanup; } // Takes a field token, prints an intro line, then invokes the parser / printer HRESULT PrintField(ModuleID moduleID, IMetaDataImport* pMDImport, LPCWSTR wszClassName, mdToken md) { HRESULT hr; mdTypeDef td; WCHAR wszName[500]; ULONG cchName; PCCOR_SIGNATURE sigMember; ULONG cbSigMember; DWORD dwAttr; DWORD dwCPlusTypeFlag; UVCP_CONSTANT pValue; ULONG cchValue; hr = pMDImport->GetFieldProps(md, // The member for which to get props. &td, // Put member's class here. wszName, // Put member's name here. dimensionof(wszName), // Size of szMember buffer in wide chars. &cchName, // Put actual size here &dwAttr, // Put flags here. &sigMember, // [OUT] point to the blob value of meta data &cbSigMember, // [OUT] actual size of signature blob &dwCPlusTypeFlag, // [OUT] flag for value type. selected ELEMENT_TYPE_* &pValue, // [OUT] constant value &cchValue); // [OUT] size of constant string in chars, 0 for non-strings. if (FAILED(hr)) { goto Error; } printf("\n%S.%S (0x%x)\n", wszClassName, wszName, md); DoParse((sig_byte *) sigMember, cbSigMember); hr = S_OK; Cleanup: return hr; Error: goto Cleanup; } // Takes an mdMethodDef, prints an intro line, then invokes the parser / printer on its signature and its locals HRESULT PrintMethodDef(ModuleID moduleID, IMetaDataImport* pMDImport, LPCWSTR wszClassName, mdMethodDef md) { HRESULT hr; mdTypeDef td; WCHAR wszMethod[500]; ULONG cchMethod; DWORD dwAttr; PCCOR_SIGNATURE sigParam; PCCOR_SIGNATURE sigLocal; ULONG cbSigParam; ULONG cbSigLocal; ULONG ulCodeRVA; DWORD dwImplFlags; BOOL fMore; LPCBYTE pMethodHeader = NULL; ULONG cbMethodSize; IMAGE_COR_ILMETHOD_TINY* pimt = NULL; IMAGE_COR_ILMETHOD_FAT* pimf = NULL; hr = pMDImport->GetMethodProps(md, // The method for which to get props. &td, // Put method's class here. wszMethod, // Put method's name here. dimensionof(wszMethod), // Size of szMethod buffer in wide chars. &cchMethod, // Put actual size here &dwAttr, // Put flags here. &sigParam, // [OUT] point to the blob value of meta data &cbSigParam, // [OUT] actual size of signature blob &ulCodeRVA, // [OUT] codeRVA &dwImplFlags); // [OUT] Impl. Flags if (FAILED(hr)) { goto Error; } printf("\n%S.%S (0x%x)\n", wszClassName, wszMethod, md); // Method prototype signature parse DoParse((sig_byte *) sigParam, cbSigParam); // Method locals signature parse hr = g_pProfilerInfo->GetILFunctionBody(moduleID, md, &pMethodHeader, &cbMethodSize); if (FAILED(hr)) { goto EndLocal; } // The following odd-looking lines of code decode the method header, ensure // it is in a format that contains local variables, and then grabs the local // variable signature out of the header. pimt = (IMAGE_COR_ILMETHOD_TINY*) pMethodHeader; if ((pimt->Flags_CodeSize & (CorILMethod_FormatMask >> 1)) != CorILMethod_FatFormat) { goto EndLocal; } pimf = (IMAGE_COR_ILMETHOD_FAT*) pMethodHeader; if (pimf->LocalVarSigTok == 0) { goto EndLocal; } hr = pMDImport->GetSigFromToken(pimf->LocalVarSigTok, &sigLocal, &cbSigLocal); DoParse((sig_byte *) sigLocal, cbSigLocal); EndLocal: hr = S_OK; Cleanup: return hr; Error: goto Cleanup; } // Simple helper to print an intro line for a class void PrintHeader(LPCWSTR wszClassName, mdTypeDef td, LPCSTR szCategory) { printf("\n--------------------------------------------\n"); printf("%S (0x%x):\t%s\n", wszClassName, td, szCategory); printf("--------------------------------------------\n\n"); } // Combines above functions to print the methods, properties, and fields of a class HRESULT PrintTypedef(ModuleID moduleID, IMetaDataImport* pMDImport, mdTypeDef td) { HRESULT hr; HCORENUM hEnum = NULL; mdMethodDef aMethods[100]; mdFieldDef aFields[100]; mdFieldDef aProperties[100]; ULONG cMethodDefs; ULONG cFields; ULONG cProperties; ULONG i; WCHAR wszTdName[200]; ULONG cchTdName; DWORD dwTypeDefFlags; mdToken tkExtends; BOOL fMore; hr = pMDImport->GetTypeDefProps(td, // [IN] TypeDef token for inquiry. wszTdName, // [OUT] Put name here. dimensionof(wszTdName), // [IN] size of name buffer in wide chars. &cchTdName, // [OUT] put size of name (wide chars) here. &dwTypeDefFlags, // [OUT] Put flags here. &tkExtends); // [OUT] Put base class TypeDef/TypeRef here. if (FAILED(hr)) { goto Error; } PrintHeader(wszTdName, td, "METHODDEFS"); fMore = TRUE; while (fMore) { hr = pMDImport->EnumMethods(&hEnum, td, // [IN] TypeDef to scope the enumeration. aMethods, // [OUT] Put MethodDefs here. dimensionof(aMethods), // [IN] Max MethodDefs to put. &cMethodDefs); // [OUT] Put # put here. if (FAILED(hr)) { goto Error; } if (hr == S_FALSE) { fMore = FALSE; } for (i=0; i < cMethodDefs; i++) { hr = PrintMethodDef(moduleID, pMDImport, wszTdName, aMethods[i]); if (FAILED(hr)) { // do you care? If so, do something about this. } } } pMDImport->CloseEnum(hEnum); hEnum = NULL; PrintHeader(wszTdName, td, "FIELDS"); fMore = TRUE; while (fMore) { hr = pMDImport->EnumFields(&hEnum, td, Fields, dimensionof(aFields), &cFields); if (FAILED(hr)) { goto Error; } if (hr == S_FALSE) { fMore = FALSE; } for (i=0; i < cFields; i++) { hr = PrintField(moduleID, pMDImport, wszTdName, aFields[i]); if (FAILED(hr)) { // do you care? If so, do something about this. } } } pMDImport->CloseEnum(hEnum); hEnum = NULL; PrintHeader(wszTdName, td, "PROPERTIES"); fMore = TRUE; while (fMore) { hr = pMDImport->EnumProperties(&hEnum, td, aProperties, dimensionof(aProperties), &cProperties); if (FAILED(hr)) { goto Error; } if (hr == S_FALSE) { fMore = FALSE; } for (i=0; i < cProperties; i++) { hr = PrintProperty(moduleID, pMDImport, wszTdName, aProperties[i]); if (FAILED(hr)) { // do you care? If so, do something about this. } } } pMDImport->CloseEnum(hEnum); hEnum = NULL; hr = S_OK; Cleanup: if (hEnum != NULL) { pMDImport->CloseEnum(hEnum); } return hr; Error: goto Cleanup; } // Enumerates the typedefs in a module via the metadata interface, and calls PrintTypedef // on each one HRESULT PrintMetadata(ModuleID moduleID, IMetaDataImport* pMDImport) { HRESULT hr; HCORENUM hEnum = NULL; mdTypeDef aTypeDefs[100]; ULONG cTypeDefs; ULONG i; BOOL fMoreTypeDefs = TRUE; while (fMoreTypeDefs) { hr = pMDImport->EnumTypeDefs(&hEnum, aTypeDefs, dimensionof(aTypeDefs), &cTypeDefs); if (FAILED(hr)) { goto Error; } if (hr == S_FALSE) { fMoreTypeDefs = FALSE; } for (i=0; i < cTypeDefs; i++) { hr = PrintTypedef(moduleID, pMDImport, aTypeDefs[i]); if (FAILED(hr)) { // do you care? If so, do something about this. } } } hr = S_OK; Cleanup: if (hEnum != NULL) { pMDImport->CloseEnum(hEnum); } return hr; Error: goto Cleanup; } // **************************************************************** // Add this to your profiler's ICorProfilerCallback2::ModuleLoadFinished implementation. // It is assumed your copy of the ICorProfilerInfo2 interface may be accessed via // g_pProfilerInfo. Change the code to fit your profiler as appropriate. // **************************************************************** // // As a module gets loaded, this callback implementation initiates the pretty-printer to // log all the types to stdout. HRESULT CYourProfImpl::ModuleLoadFinished( ModuleID moduleID, HRESULT hrStatus ) { HRESULT hr; LPCBYTE pbBaseLoadAddr; WCHAR wszName[300]; ULONG cchNameIn = dimensionof(wszName); ULONG cchNameOut; AssemblyID assemblyID; hr = g_pProfilerInfo->GetModuleInfo(moduleID, &pbBaseLoadAddr, cchNameIn, &cchNameOut, wszName, &assemblyID); if (FAILED(hr)) { return hr; } printf("MODULE LOAD FINISHED: %S\n", wszName); IMetaDataImport *pMDImport = NULL; hr = g_pProfilerInfo->GetModuleMetaData(moduleID, ofRead, IID_IMetaDataImport, (IUnknown **)&pMDImport ); if (FAILED(hr)) { return hr; } hr = PrintMetadata(moduleID, pMDImport); if (FAILED(hr)) { // Do any error handling as appropriate } hr = S_OK; Cleanup: return hr; Error: goto Cleanup; } \ No newline at end of file
diff --git a/Documentation/Profiling/davbr-blog-archive/samples/sigformat.cpp b/Documentation/Profiling/davbr-blog-archive/samples/sigformat.cpp
new file mode 100644
index 0000000000..c5b2eb3976
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/samples/sigformat.cpp
@@ -0,0 +1,449 @@
+// This blog post originally appeared on David Broman's blog on 10/13/2005
+
+#include "SigParse.cpp"
+
+ // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
+ // This file demonstrates how to use the general-purpose parser (SigParser) by
+ // deriving a new class from it and overriding the virtuals.
+ //
+ // In this case we're simply printing the notifications to stdout as we receive
+ // them, using pretty indenting.
+ //
+ // Look at PlugInToYourProfiler.cpp to see how to drive this.
+ // ---------------------------------------------------------------------
+ // ---------------------------------------------------------------------
+
+
+ #define dimensionof(a) (sizeof(a)/sizeof(*(a)))
+ #define MAKE_CASE(__elt) case __elt: return #__elt;
+ #define MAKE_CASE_OR(__elt) case __elt: return #__elt "|";
+
+class SigFormat : public SigParser
+{
+private:
+ UINT nIndentLevel;
+
+public:
+ SigFormat() {nIndentLevel = 0; }
+ UINT GetIndentLevel() { return nIndentLevel;}
+
+protected:
+ LPCSTR SigIndexTypeToString(sig_index_type sit)
+ {
+ switch(sit)
+ {
+ default:
+ DebugBreak();
+ return "unknown index type";
+ MAKE_CASE(SIG_INDEX_TYPE_TYPEDEF)
+ MAKE_CASE(SIG_INDEX_TYPE_TYPEREF)
+ MAKE_CASE(SIG_INDEX_TYPE_TYPESPEC)
+ }
+ }
+
+ LPCSTR SigMemberTypeOptionToString(sig_elem_type set)
+ {
+ switch(set & 0xf0)
+ {
+ default:
+ DebugBreak();
+ return "unknown element type";
+ case 0:
+ return "";
+
+ MAKE_CASE_OR(SIG_GENERIC)
+ MAKE_CASE_OR(SIG_HASTHIS)
+ MAKE_CASE_OR(SIG_EXPLICITTHIS)
+ }
+ }
+
+ LPCSTR SigMemberTypeToString(sig_elem_type set)
+ {
+ switch(set & 0xf)
+ {
+ default:
+ DebugBreak();
+ return "unknown element type";
+ MAKE_CASE(SIG_METHOD_DEFAULT)
+ MAKE_CASE(SIG_METHOD_C)
+ MAKE_CASE(SIG_METHOD_STDCALL)
+ MAKE_CASE(SIG_METHOD_THISCALL)
+ MAKE_CASE(SIG_METHOD_FASTCALL)
+ MAKE_CASE(SIG_METHOD_VARARG)
+ MAKE_CASE(SIG_FIELD)
+ MAKE_CASE(SIG_LOCAL_SIG)
+ MAKE_CASE(SIG_PROPERTY)
+ }
+ }
+
+ LPCSTR SigElementTypeToString(sig_elem_type set)
+ {
+ switch(set)
+ {
+ default:
+ DebugBreak();
+ return "unknown element type";
+ MAKE_CASE(ELEMENT_TYPE_END)
+ MAKE_CASE(ELEMENT_TYPE_VOID)
+ MAKE_CASE(ELEMENT_TYPE_BOOLEAN)
+ MAKE_CASE(ELEMENT_TYPE_CHAR)
+ MAKE_CASE(ELEMENT_TYPE_I1)
+ MAKE_CASE(ELEMENT_TYPE_U1)
+ MAKE_CASE(ELEMENT_TYPE_I2)
+ MAKE_CASE(ELEMENT_TYPE_U2)
+ MAKE_CASE(ELEMENT_TYPE_I4)
+ MAKE_CASE(ELEMENT_TYPE_U4)
+ MAKE_CASE(ELEMENT_TYPE_I8)
+ MAKE_CASE(ELEMENT_TYPE_U8)
+ MAKE_CASE(ELEMENT_TYPE_R4)
+ MAKE_CASE(ELEMENT_TYPE_R8)
+ MAKE_CASE(ELEMENT_TYPE_STRING)
+ MAKE_CASE(ELEMENT_TYPE_PTR)
+ MAKE_CASE(ELEMENT_TYPE_BYREF)
+ MAKE_CASE(ELEMENT_TYPE_VALUETYPE)
+ MAKE_CASE(ELEMENT_TYPE_CLASS)
+ MAKE_CASE(ELEMENT_TYPE_VAR)
+ MAKE_CASE(ELEMENT_TYPE_ARRAY)
+ MAKE_CASE(ELEMENT_TYPE_GENERICINST)
+ MAKE_CASE(ELEMENT_TYPE_TYPEDBYREF)
+ MAKE_CASE(ELEMENT_TYPE_I)
+ MAKE_CASE(ELEMENT_TYPE_U)
+ MAKE_CASE(ELEMENT_TYPE_FNPTR)
+ MAKE_CASE(ELEMENT_TYPE_OBJECT)
+ MAKE_CASE(ELEMENT_TYPE_SZARRAY)
+ MAKE_CASE(ELEMENT_TYPE_MVAR)
+ MAKE_CASE(ELEMENT_TYPE_CMOD_REQD)
+ MAKE_CASE(ELEMENT_TYPE_CMOD_OPT)
+ MAKE_CASE(ELEMENT_TYPE_INTERNAL)
+ MAKE_CASE(ELEMENT_TYPE_MODIFIER)
+ MAKE_CASE(ELEMENT_TYPE_SENTINEL)
+ MAKE_CASE(ELEMENT_TYPE_PINNED)
+ }
+ }
+
+ void PrintIndent()
+ {
+ const char k_szSpaces[] = " ";
+
+ // You should probably assert or throw an exception if nIndentLevel
+ // is bigger than dimensionof(k_szSpaces)-1. Error handling is minimized
+ // in this sample for better readability.
+
+ printf(k_szSpaces + ((dimensionof(k_szSpaces)-1) - nIndentLevel));
+ }
+
+ void IncIndent()
+ {
+ nIndentLevel += 2;
+ }
+
+ void DecIndent()
+ {
+ nIndentLevel -= 2;
+ }
+
+ // Simple wrapper around printf that prints the indenting spaces for you
+ void Print(const char* format, ...)
+ {
+ va_list argList;
+ va_start(argList, format);
+ PrintIndent();
+ vprintf(format, argList);
+ }
+
+ // a method with given elem_type
+ virtual void NotifyBeginMethod(sig_elem_type elem_type)
+ {
+ Print("BEGIN METHOD\n");
+ IncIndent();
+ }
+
+ virtual void NotifyEndMethod()
+ {
+ DecIndent();
+ Print("END METHOD\n");
+ }
+
+ // total parameters for the method
+ virtual void NotifyParamCount(sig_count count)
+ {
+ Print("Param count = '%d'\n", count);
+ }
+
+ // starting a return type
+ virtual void NotifyBeginRetType()
+ {
+ Print("BEGIN RET TYPE\n");
+ IncIndent();
+ }
+ virtual void NotifyEndRetType()
+ {
+ DecIndent();
+ Print("END RET TYPE\n");
+ }
+
+ // starting a parameter
+ virtual void NotifyBeginParam()
+ {
+ Print("BEGIN PARAM\n");
+ IncIndent();
+ }
+
+ virtual void NotifyEndParam()
+ {
+ DecIndent();
+ Print("END PARAM\n");
+ }
+
+ // sentinel indication the location of the "..." in the method signature
+ virtual void NotifySentinal()
+ {
+ Print("...\n");
+ }
+
+ // number of generic parameters in this method signature (if any)
+ virtual void NotifyGenericParamCount(sig_count count)
+ {
+ Print("Generic param count = '%d'\n", count);
+ }
+
+ //----------------------------------------------------
+
+ // a field with given elem_type
+ virtual void NotifyBeginField(sig_elem_type elem_type)
+ {
+ Print("BEGIN FIELD: '%s%s'\n", SigMemberTypeOptionToString(elem_type), SigMemberTypeToString(elem_type));
+ IncIndent();
+ }
+
+ virtual void NotifyEndField()
+ {
+ DecIndent();
+ Print("END FIELD\n");
+ }
+
+ //----------------------------------------------------
+
+ // a block of locals with given elem_type (always just LOCAL_SIG for now)
+ virtual void NotifyBeginLocals(sig_elem_type elem_type)
+ {
+ Print("BEGIN LOCALS: '%s%s'\n", SigMemberTypeOptionToString(elem_type), SigMemberTypeToString(elem_type));
+ IncIndent();
+ }
+
+ virtual void NotifyEndLocals()
+ {
+ DecIndent();
+ Print("END LOCALS\n");
+ }
+
+
+ // count of locals with a block
+ virtual void NotifyLocalsCount(sig_count count)
+ {
+ Print("Locals count: '%d'\n", count);
+ }
+
+ // starting a new local within a local block
+ virtual void NotifyBeginLocal()
+ {
+ Print("BEGIN LOCAL\n");
+ IncIndent();
+ }
+
+ virtual void NotifyEndLocal()
+ {
+ DecIndent();
+ Print("END LOCAL\n");
+ }
+
+
+ // the only constraint available to locals at the moment is ELEMENT_TYPE_PINNED
+ virtual void NotifyConstraint(sig_elem_type elem_type)
+ {
+ Print("Constraint: '%s%s'\n", SigMemberTypeOptionToString(elem_type), SigMemberTypeToString(elem_type));
+ }
+
+
+ //----------------------------------------------------
+
+ // a property with given element type
+ virtual void NotifyBeginProperty(sig_elem_type elem_type)
+ {
+ Print("BEGIN PROPERTY: '%s%s'\n", SigMemberTypeOptionToString(elem_type), SigMemberTypeToString(elem_type));
+ IncIndent();
+ }
+
+ virtual void NotifyEndProperty()
+ {
+ DecIndent();
+ Print("END PROPERTY\n");
+ }
+
+
+ //----------------------------------------------------
+
+ // starting array shape information for array types
+ virtual void NotifyBeginArrayShape()
+ {
+ Print("BEGIN ARRAY SHAPE\n");
+ IncIndent();
+ }
+
+ virtual void NotifyEndArrayShape()
+ {
+ DecIndent();
+ Print("END ARRAY SHAPE\n");
+ }
+
+
+ // array rank (total number of dimensions)
+ virtual void NotifyRank(sig_count count)
+ {
+ Print("Rank: '%d'\n", count);
+ }
+
+ // number of dimensions with specified sizes followed by the size of each
+ virtual void NotifyNumSizes(sig_count count)
+ {
+ Print("Num Sizes: '%d'\n", count);
+ }
+
+ virtual void NotifySize(sig_count count)
+ {
+ Print("Size: '%d'\n", count);
+ }
+
+ // BUG BUG lower bounds can be negative, how can this be encoded?
+ // number of dimensions with specified lower bounds followed by lower bound of each
+ virtual void NotifyNumLoBounds(sig_count count)
+ {
+ Print("Num Low Bounds: '%d'\n", count);
+ }
+
+ virtual void NotifyLoBound(sig_count count)
+ {
+ Print("Low Bound: '%d'\n", count);
+ }
+
+ //----------------------------------------------------
+
+
+ // starting a normal type (occurs in many contexts such as param, field, local, etc)
+ virtual void NotifyBeginType()
+ {
+ Print("BEGIN TYPE\n");
+ IncIndent();
+ }
+
+ virtual void NotifyEndType()
+ {
+ DecIndent();
+ Print("END TYPE\n");
+ }
+
+ virtual void NotifyTypedByref()
+ {
+ Print("Typed byref\n");
+ }
+
+ // the type has the 'byref' modifier on it -- this normally proceeds the type definition in the context
+ // the type is used, so for instance a parameter might have the byref modifier on it
+ // so this happens before the BeginType in that context
+ virtual void NotifyByref()
+ {
+ Print("Byref\n");
+ }
+
+ // the type is "VOID" (this has limited uses, function returns and void pointer)
+ virtual void NotifyVoid()
+ {
+ Print("Void\n");
+ }
+
+ // the type has the indicated custom modifiers (which can be optional or required)
+ virtual void NotifyCustomMod(sig_elem_type cmod, sig_index_type indexType, sig_index index)
+ {
+ Print(
+ "Custom modifers: '%s', index type: '%s', index: '0x%x'\n",
+ SigElementTypeToString(cmod),
+ SigIndexTypeToString(indexType),
+ index);
+ }
+
+ // the type is a simple type, the elem_type defines it fully
+ virtual void NotifyTypeSimple(sig_elem_type elem_type)
+ {
+ Print("Type simple: '%s'\n", SigElementTypeToString(elem_type));
+ }
+
+ // the type is specified by the given index of the given index type (normally a type index in the type metadata)
+ // this callback is normally qualified by other ones such as NotifyTypeClass or NotifyTypeValueType
+ virtual void NotifyTypeDefOrRef(sig_index_type indexType, int index)
+ {
+ Print("Type def or ref: '%s', index: '0x%x'\n", SigIndexTypeToString(indexType), index);
+ }
+
+ // the type is an instance of a generic
+ // elem_type indicates value_type or class
+ // indexType and index indicate the metadata for the type in question
+ // number indicates the number of type specifications for the generic types that will follow
+ virtual void NotifyTypeGenericInst(sig_elem_type elem_type, sig_index_type indexType, sig_index index, sig_mem_number number)
+ {
+ Print(
+ "Type generic instance: '%s', index type: '%s', index: '0x%x', member number: '%d'\n",
+ SigElementTypeToString(elem_type),
+ SigIndexTypeToString(indexType),
+ index,
+ number);
+ }
+
+ // the type is the type of the nth generic type parameter for the class
+ virtual void NotifyTypeGenericTypeVariable(sig_mem_number number)
+ {
+ Print("Type generic type variable: number: '%d'\n", number);
+ }
+
+ // the type is the type of the nth generic type parameter for the member
+ virtual void NotifyTypeGenericMemberVariable(sig_mem_number number)
+ {
+ Print("Type generic member variable: number: '%d'\n", number);
+ }
+
+ // the type will be a value type
+ virtual void NotifyTypeValueType()
+ {
+ Print("Type value type\n");
+ }
+
+ // the type will be a class
+ virtual void NotifyTypeClass()
+ {
+ Print("Type class\n");
+ }
+
+ // the type is a pointer to a type (nested type notifications follow)
+ virtual void NotifyTypePointer()
+ {
+ Print("Type pointer\n");
+ }
+
+ // the type is a function pointer, followed by the type of the function
+ virtual void NotifyTypeFunctionPointer()
+ {
+ Print("Type function pointer\n");
+ }
+
+ // the type is an array, this is followed by the array shape, see above, as well as modifiers and element type
+ virtual void NotifyTypeArray()
+ {
+ Print("Type array\n");
+ }
+
+ // the type is a simple zero-based array, this has no shape but does have custom modifiers and element type
+ virtual void NotifyTypeSzArray()
+ {
+ Print("Type sz array\n");
+ }
+};
diff --git a/Documentation/Profiling/davbr-blog-archive/samples/sigparse.cpp b/Documentation/Profiling/davbr-blog-archive/samples/sigparse.cpp
new file mode 100644
index 0000000000..0a273a044c
--- /dev/null
+++ b/Documentation/Profiling/davbr-blog-archive/samples/sigparse.cpp
@@ -0,0 +1 @@
+// This blog post originally appeared on David Broman's blog on 10/13/2005 // Sig ::= MethodDefSig | MethodRefSig | StandAloneMethodSig | FieldSig | PropertySig | LocalVarSig // MethodDefSig ::= [[HASTHIS] [EXPLICITTHIS]] (DEFAULT|VARARG|GENERIC GenParamCount) ParamCount RetType Param* // MethodRefSig ::= [[HASTHIS] [EXPLICITTHIS]] VARARG ParamCount RetType Param* [SENTINEL Param+] // StandAloneMethodSig ::= [[HASTHIS] [EXPLICITTHIS]] (DEFAULT|VARARG|C|STDCALL|THISCALL|FASTCALL) // ParamCount RetType Param* [SENTINEL Param+] // FieldSig ::= FIELD CustomMod* Type // PropertySig ::= PROPERTY [HASTHIS] ParamCount CustomMod* Type Param* // LocalVarSig ::= LOCAL_SIG Count (TYPEDBYREF | ([CustomMod] [Constraint])* [BYREF] Type)+ // ------------- // CustomMod ::= ( CMOD_OPT | CMOD_REQD ) ( TypeDefEncoded | TypeRefEncoded ) // Constraint ::= #define ELEMENT_TYPE_PINNED // Param ::= CustomMod* ( TYPEDBYREF | [BYREF] Type ) // RetType ::= CustomMod* ( VOID | TYPEDBYREF | [BYREF] Type ) // Type ::= ( BOOLEAN | CHAR | I1 | U1 | U2 | U2 | I4 | U4 | I8 | U8 | R4 | R8 | I | U | // | VALUETYPE TypeDefOrRefEncoded // | CLASS TypeDefOrRefEncoded // | STRING // | OBJECT // | PTR CustomMod* VOID // | PTR CustomMod* Type // | FNPTR MethodDefSig // | FNPTR MethodRefSig // | ARRAY Type ArrayShape // | SZARRAY CustomMod* Type // | GENERICINST (CLASS | VALUETYPE) TypeDefOrRefEncoded GenArgCount Type* // | VAR Number // | MVAR Number // ArrayShape ::= Rank NumSizes Size* NumLoBounds LoBound* // TypeDefOrRefEncoded ::= TypeDefEncoded | TypeRefEncoded // TypeDefEncoded ::= 32-bit-3-part-encoding-for-typedefs-and-typerefs // TypeRefEncoded ::= 32-bit-3-part-encoding-for-typedefs-and-typerefs // ParamCount ::= 29-bit-encoded-integer // GenArgCount ::= 29-bit-encoded-integer // Count ::= 29-bit-encoded-integer // Rank ::= 29-bit-encoded-integer // NumSizes ::= 29-bit-encoded-integer // Size ::= 29-bit-encoded-integer // NumLoBounds ::= 29-bit-encoded-integer // LoBounds ::= 29-bit-encoded-integer // Number ::= 29-bit-encoded-integer #define ELEMENT_TYPE_END 0x00 //Marks end of a list #define ELEMENT_TYPE_VOID 0x01 #define ELEMENT_TYPE_BOOLEAN 0x02 #define ELEMENT_TYPE_CHAR 0x03 #define ELEMENT_TYPE_I1 0x04 #define ELEMENT_TYPE_U1 0x05 #define ELEMENT_TYPE_I2 0x06 #define ELEMENT_TYPE_U2 0x07 #define ELEMENT_TYPE_I4 0x08 #define ELEMENT_TYPE_U4 0x09 #define ELEMENT_TYPE_I8 0x0a #define ELEMENT_TYPE_U8 0x0b #define ELEMENT_TYPE_R4 0x0c #define ELEMENT_TYPE_R8 0x0d #define ELEMENT_TYPE_STRING 0x0e #define ELEMENT_TYPE_PTR 0x0f // Followed by type #define ELEMENT_TYPE_BYREF 0x10 // Followed by type #define ELEMENT_TYPE_VALUETYPE 0x11 // Followed by TypeDef or TypeRef token #define ELEMENT_TYPE_CLASS 0x12 // Followed by TypeDef or TypeRef token #define ELEMENT_TYPE_VAR 0x13 // Generic parameter in a generic type definition, represented as number #define ELEMENT_TYPE_ARRAY 0x14 // type rank boundsCount bound1 … loCount lo1 … #define ELEMENT_TYPE_GENERICINST 0x15 // Generic type instantiation. Followed by type type-arg-count type-1 ... type-n #define ELEMENT_TYPE_TYPEDBYREF 0x16 #define ELEMENT_TYPE_I 0x18 // System.IntPtr #define ELEMENT_TYPE_U 0x19 // System.UIntPtr #define ELEMENT_TYPE_FNPTR 0x1b // Followed by full method signature #define ELEMENT_TYPE_OBJECT 0x1c // System.Object #define ELEMENT_TYPE_SZARRAY 0x1d // Single-dim array with 0 lower bound #define ELEMENT_TYPE_MVAR 0x1e // Generic parameter in a generic method definition,represented as number #define ELEMENT_TYPE_CMOD_REQD 0x1f // Required modifier : followed by a TypeDef or TypeRef token #define ELEMENT_TYPE_CMOD_OPT 0x20 // Optional modifier : followed by a TypeDef or TypeRef token #define ELEMENT_TYPE_INTERNAL 0x21 // Implemented within the CLI #define ELEMENT_TYPE_MODIFIER 0x40 // Or’d with following element types #define ELEMENT_TYPE_SENTINEL 0x41 // Sentinel for vararg method signature #define ELEMENT_TYPE_PINNED 0x45 // Denotes a local variable that points at a pinned object #define SIG_METHOD_DEFAULT 0x0 // default calling convention #define SIG_METHOD_C 0x1 // C calling convention #define SIG_METHOD_STDCALL 0x2 // Stdcall calling convention #define SIG_METHOD_THISCALL 0x3 // thiscall calling convention #define SIG_METHOD_FASTCALL 0x4 // fastcall calling convention #define SIG_METHOD_VARARG 0x5 // vararg calling convention #define SIG_FIELD 0x6 // encodes a field #define SIG_LOCAL_SIG 0x7 // used for the .locals directive #define SIG_PROPERTY 0x8 // used to encode a property #define SIG_GENERIC 0x10 // used to indicate that the method has one or more generic parameters. #define SIG_HASTHIS 0x20 // used to encode the keyword instance in the calling convention #define SIG_EXPLICITTHIS 0x40 // used to encode the keyword explicit in the calling convention #define SIG_INDEX_TYPE_TYPEDEF 0 // ParseTypeDefOrRefEncoded returns this as the out index type for typedefs #define SIG_INDEX_TYPE_TYPEREF 1 // ParseTypeDefOrRefEncoded returns this as the out index type for typerefs #define SIG_INDEX_TYPE_TYPESPEC 2 // ParseTypeDefOrRefEncoded returns this as the out index type for typespecs typedef unsigned char sig_byte; typedef unsigned char sig_elem_type; typedef unsigned char sig_index_type; typedef unsigned int sig_index; typedef unsigned int sig_count; typedef unsigned int sig_mem_number; class SigParser { private: sig_byte *pbBase; sig_byte *pbCur; sig_byte *pbEnd; public: bool Parse(sig_byte *blob, sig_count len); private: bool ParseByte(sig_byte *pbOut); bool ParseNumber(sig_count *pOut); bool ParseTypeDefOrRefEncoded(sig_index_type *pOutIndexType, sig_index *pOutIndex); bool ParseMethod(sig_elem_type); bool ParseField(sig_elem_type); bool ParseProperty(sig_elem_type); bool ParseLocals(sig_elem_type); bool ParseLocal(); bool ParseOptionalCustomMods(); bool ParseOptionalCustomModsOrConstraint(); bool ParseCustomMod(); bool ParseRetType(); bool ParseType(); bool ParseParam(); bool ParseArrayShape(); protected: // subtype these methods to create your parser side-effects //---------------------------------------------------- // a method with given elem_type virtual void NotifyBeginMethod(sig_elem_type elem_type) {} virtual void NotifyEndMethod() {} // total parameters for the method virtual void NotifyParamCount(sig_count) {} // starting a return type virtual void NotifyBeginRetType() {} virtual void NotifyEndRetType() {} // starting a parameter virtual void NotifyBeginParam() {} virtual void NotifyEndParam() {} // sentinel indication the location of the "..." in the method signature virtual void NotifySentinal() {} // number of generic parameters in this method signature (if any) virtual void NotifyGenericParamCount(sig_count) {} //---------------------------------------------------- // a field with given elem_type virtual void NotifyBeginField(sig_elem_type elem_type) {} virtual void NotifyEndField() {} //---------------------------------------------------- // a block of locals with given elem_type (always just LOCAL_SIG for now) virtual void NotifyBeginLocals(sig_elem_type elem_type) {} virtual void NotifyEndLocals() {} // count of locals with a block virtual void NotifyLocalsCount(sig_count) {} // starting a new local within a local block virtual void NotifyBeginLocal() {} virtual void NotifyEndLocal() {} // the only constraint available to locals at the moment is ELEMENT_TYPE_PINNED virtual void NotifyConstraint(sig_elem_type elem_type) {} //---------------------------------------------------- // a property with given element type virtual void NotifyBeginProperty(sig_elem_type elem_type) {} virtual void NotifyEndProperty() {} //---------------------------------------------------- // starting array shape information for array types virtual void NotifyBeginArrayShape() {} virtual void NotifyEndArrayShape() {} // array rank (total number of dimensions) virtual void NotifyRank(sig_count) {} // number of dimensions with specified sizes followed by the size of each virtual void NotifyNumSizes(sig_count) {} virtual void NotifySize(sig_count) {} // BUG BUG lower bounds can be negative, how can this be encoded? // number of dimensions with specified lower bounds followed by lower bound of each virtual void NotifyNumLoBounds(sig_count) {} virtual void NotifyLoBound(sig_count) {} //---------------------------------------------------- // starting a normal type (occurs in many contexts such as param, field, local, etc) virtual void NotifyBeginType() {}; virtual void NotifyEndType() {}; virtual void NotifyTypedByref() {} // the type has the 'byref' modifier on it -- this normally proceeds the type definition in the context // the type is used, so for instance a parameter might have the byref modifier on it // so this happens before the BeginType in that context virtual void NotifyByref() {} // the type is "VOID" (this has limited uses, function returns and void pointer) virtual void NotifyVoid() {} // the type has the indicated custom modifiers (which can be optional or required) virtual void NotifyCustomMod(sig_elem_type cmod, sig_index_type indexType, sig_index index) {} // the type is a simple type, the elem_type defines it fully virtual void NotifyTypeSimple(sig_elem_type elem_type) {} // the type is specified by the given index of the given index type (normally a type index in the type metadata) // this callback is normally qualified by other ones such as NotifyTypeClass or NotifyTypeValueType virtual void NotifyTypeDefOrRef(sig_index_type indexType, int index) {} // the type is an instance of a generic // elem_type indicates value_type or class // indexType and index indicate the metadata for the type in question // number indicates the number of type specifications for the generic types that will follow virtual void NotifyTypeGenericInst(sig_elem_type elem_type, sig_index_type indexType, sig_index index, sig_mem_number number) {} // the type is the type of the nth generic type parameter for the class virtual void NotifyTypeGenericTypeVariable(sig_mem_number number) {} // the type is the type of the nth generic type parameter for the member virtual void NotifyTypeGenericMemberVariable(sig_mem_number number) {} // the type will be a value type virtual void NotifyTypeValueType() {} // the type will be a class virtual void NotifyTypeClass() {} // the type is a pointer to a type (nested type notifications follow) virtual void NotifyTypePointer() {} // the type is a function pointer, followed by the type of the function virtual void NotifyTypeFunctionPointer() {} // the type is an array, this is followed by the array shape, see above, as well as modifiers and element type virtual void NotifyTypeArray() {} // the type is a simple zero-based array, this has no shape but does have custom modifiers and element type virtual void NotifyTypeSzArray() {} }; //---------------------------------------------------- bool SigParser::Parse(sig_byte *pb, sig_count cbBuffer) { pbBase = pb; pbCur = pb; pbEnd = pbBase + cbBuffer; sig_elem_type elem_type; if (!ParseByte(&elem_type)) return false; switch (elem_type & 0xf) { case SIG_METHOD_DEFAULT: // default calling convention case SIG_METHOD_C: // C calling convention case SIG_METHOD_STDCALL: // Stdcall calling convention case SIG_METHOD_THISCALL: // thiscall calling convention case SIG_METHOD_FASTCALL: // fastcall calling convention case SIG_METHOD_VARARG: // vararg calling convention return ParseMethod(elem_type); break; case SIG_FIELD: // encodes a field return ParseField(elem_type); break; case SIG_LOCAL_SIG: // used for the .locals directive return ParseLocals(elem_type); break; case SIG_PROPERTY: // used to encode a property return ParseProperty(elem_type); break; default: // unknown signature break; } return false; } bool SigParser::ParseByte(sig_byte *pbOut) { if (pbCur < pbEnd) { *pbOut = *pbCur; pbCur++; return true; } return false; } bool SigParser::ParseMethod(sig_elem_type elem_type) { // MethodDefSig ::= [[HASTHIS] [EXPLICITTHIS]] (DEFAULT|VARARG|GENERIC GenParamCount) // ParamCount RetType Param* [SENTINEL Param+] NotifyBeginMethod(elem_type); sig_count gen_param_count; sig_count param_count; if (elem_type & SIG_GENERIC) { if (!ParseNumber(&gen_param_count)) { return false; } NotifyGenericParamCount(gen_param_count); } if (!ParseNumber(¶m_count)) { return false; } NotifyParamCount(param_count); if (!ParseRetType()) { return false; } bool fEncounteredSentinal = false; for (sig_count i = 0; i < param_count; i++) { if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_SENTINEL) { if (fEncounteredSentinal) { return false; } fEncounteredSentinal = true; NotifySentinal(); pbCur++; } if (!ParseParam()) { return false; } } NotifyEndMethod(); return true; } bool SigParser::ParseField(sig_elem_type elem_type) { // FieldSig ::= FIELD CustomMod* Type NotifyBeginField(elem_type); if (!ParseOptionalCustomMods()) { return false; } if (!ParseType()) { return false; } NotifyEndField(); return true; } bool SigParser::ParseProperty(sig_elem_type elem_type) { // PropertySig ::= PROPERTY [HASTHIS] ParamCount CustomMod* Type Param* NotifyBeginProperty(elem_type); sig_count param_count; if (!ParseNumber(&param_count)) { return false; } NotifyParamCount(param_count); if (!ParseOptionalCustomMods()) { return false; } if (!ParseType()) { return false; } for (sig_count i = 0; i < param_count; i++) { if (!ParseParam()) { return false; } } NotifyEndProperty(); return true; } bool SigParser::ParseLocals(sig_elem_type elem_type) { // LocalVarSig ::= LOCAL_SIG Count (TYPEDBYREF | ([CustomMod] [Constraint])* [BYREF] Type)+ NotifyBeginLocals(elem_type); sig_count local_count; if (!ParseNumber(&local_count)) { return false; } NotifyLocalsCount(local_count); for (sig_count i = 0; i < local_count; i++) { if (!ParseLocal()) { return false; } } NotifyEndLocals(); return true; } bool SigParser::ParseLocal() { //TYPEDBYREF | ([CustomMod] [Constraint])* [BYREF] Type NotifyBeginLocal(); if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_TYPEDBYREF) { NotifyTypedByref(); pbCur++; goto Success; } if (!ParseOptionalCustomModsOrConstraint()) { return false; } if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_BYREF) { NotifyByref(); pbCur++; } if (!ParseType()) { return false; } Success: NotifyEndLocal(); return true; } bool SigParser::ParseOptionalCustomModsOrConstraint() { for (;;) { if (pbCur >= pbEnd) { return true; } switch (*pbCur) { case ELEMENT_TYPE_CMOD_OPT: case ELEMENT_TYPE_CMOD_REQD: if (!ParseCustomMod()) { return false; } break; case ELEMENT_TYPE_PINNED: NotifyConstraint(*pbCur); pbCur++; break; default: return true; } } return false; } bool SigParser::ParseOptionalCustomMods() { for (;;) { if (pbCur >= pbEnd) { return true; } switch (*pbCur) { case ELEMENT_TYPE_CMOD_OPT: case ELEMENT_TYPE_CMOD_REQD: if (!ParseCustomMod()) { return false; } break; default: return true; } } return false; } bool SigParser::ParseCustomMod() { sig_elem_type cmod = 0; sig_index index; sig_index_type indexType; if (!ParseByte(&cmod)) { return false; } if (cmod == ELEMENT_TYPE_CMOD_OPT || cmod == ELEMENT_TYPE_CMOD_REQD) { if (!ParseTypeDefOrRefEncoded(&indexType, &index)) { return false; } NotifyCustomMod(cmod, indexType, index); return true; } return false; } bool SigParser::ParseParam() { // Param ::= CustomMod* ( TYPEDBYREF | [BYREF] Type ) NotifyBeginParam(); if (!ParseOptionalCustomMods()) { return false; } if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_TYPEDBYREF) { NotifyTypedByref(); pbCur++; goto Success; } if (*pbCur == ELEMENT_TYPE_BYREF) { NotifyByref(); pbCur++; } if (!ParseType()) { return false; } Success: NotifyEndParam(); return true; } bool SigParser::ParseRetType() { // RetType ::= CustomMod* ( VOID | TYPEDBYREF | [BYREF] Type ) NotifyBeginRetType(); if (!ParseOptionalCustomMods()) { return false; } if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_TYPEDBYREF) { NotifyTypedByref(); pbCur++; goto Success; } if (*pbCur == ELEMENT_TYPE_VOID) { NotifyVoid(); pbCur++; goto Success; } if (*pbCur == ELEMENT_TYPE_BYREF) { NotifyByref(); pbCur++; } if (!ParseType()) { return false; } Success: NotifyEndRetType(); return true; } bool SigParser::ParseArrayShape() { sig_count rank; sig_count numsizes; sig_count size; // ArrayShape ::= Rank NumSizes Size* NumLoBounds LoBound* NotifyBeginArrayShape(); if (!ParseNumber(&rank)) { return false; } NotifyRank(rank); if (!ParseNumber(&numsizes)) { return false; } NotifyNumSizes(numsizes); for (sig_count i = 0; i < numsizes; i++) { if (!ParseNumber(&size)) { return false; } NotifySize(size); } if (!ParseNumber(&numsizes)) { return false; } NotifyNumLoBounds(numsizes); for (sig_count i = 0; i < numsizes; i++) { if (!ParseNumber(&size)) { return false; } NotifyLoBound(size); } NotifyEndArrayShape(); return true; } bool SigParser::ParseType() { // Type ::= ( BOOLEAN | CHAR | I1 | U1 | U2 | U2 | I4 | U4 | I8 | U8 | R4 | R8 | I | U | // | VALUETYPE TypeDefOrRefEncoded // | CLASS TypeDefOrRefEncoded // | STRING // | OBJECT // | PTR CustomMod* VOID // | PTR CustomMod* Type // | FNPTR MethodDefSig // | FNPTR MethodRefSig // | ARRAY Type ArrayShape // | SZARRAY CustomMod* Type // | GENERICINST (CLASS | VALUETYPE) TypeDefOrRefEncoded GenArgCount Type * // | VAR Number // | MVAR Number NotifyBeginType(); sig_elem_type elem_type; sig_index index; sig_mem_number number; sig_index_type indexType; if (!ParseByte(&elem_type)) return false; switch (elem_type) { case ELEMENT_TYPE_BOOLEAN: case ELEMENT_TYPE_CHAR: case ELEMENT_TYPE_I1: case ELEMENT_TYPE_U1: case ELEMENT_TYPE_U2: case ELEMENT_TYPE_I2: case ELEMENT_TYPE_I4: case ELEMENT_TYPE_U4: case ELEMENT_TYPE_I8: case ELEMENT_TYPE_U8: case ELEMENT_TYPE_R4: case ELEMENT_TYPE_R8: case ELEMENT_TYPE_I: case ELEMENT_TYPE_U: case ELEMENT_TYPE_STRING: case ELEMENT_TYPE_OBJECT: // simple types NotifyTypeSimple(elem_type); break; case ELEMENT_TYPE_PTR: // PTR CustomMod* VOID // PTR CustomMod* Type NotifyTypePointer(); if (!ParseOptionalCustomMods()) { return false; } if (pbCur >= pbEnd) { return false; } if (*pbCur == ELEMENT_TYPE_VOID) { pbCur++; NotifyVoid(); break; } if (!ParseType()) { return false; } break; case ELEMENT_TYPE_CLASS: // CLASS TypeDefOrRefEncoded NotifyTypeClass(); if (!ParseTypeDefOrRefEncoded(&indexType, &index)) { return false; } NotifyTypeDefOrRef(indexType, index); break; case ELEMENT_TYPE_VALUETYPE: //VALUETYPE TypeDefOrRefEncoded NotifyTypeValueType(); if (!ParseTypeDefOrRefEncoded(&indexType, &index)) { return false; } NotifyTypeDefOrRef(indexType, index); break; case ELEMENT_TYPE_FNPTR: // FNPTR MethodDefSig // FNPTR MethodRefSig NotifyTypeFunctionPointer(); if (!ParseByte(&elem_type)) { return false; } if (!ParseMethod(elem_type)) { return false; } break; case ELEMENT_TYPE_ARRAY: // ARRAY Type ArrayShape NotifyTypeArray(); if (!ParseType()) { return false; } if (!ParseArrayShape()) { return false; } break; case ELEMENT_TYPE_SZARRAY: // SZARRAY CustomMod* Type NotifyTypeSzArray(); if (!ParseOptionalCustomMods()) { return false; } if (!ParseType()) { return false; } break; case ELEMENT_TYPE_GENERICINST: // GENERICINST (CLASS | VALUETYPE) TypeDefOrRefEncoded GenArgCount Type * if (!ParseByte(&elem_type)) { return false; } if (elem_type != ELEMENT_TYPE_CLASS && elem_type != ELEMENT_TYPE_VALUETYPE) { return false; } if (!ParseTypeDefOrRefEncoded(&indexType, &index)) { return false; } if (!ParseNumber(&number)) { return false; } NotifyTypeGenericInst(elem_type, indexType, index, number); { for (sig_mem_number i=0; i < number; i++) { if (!ParseType()) { return false; } } } break; case ELEMENT_TYPE_VAR: // VAR Number if (!ParseNumber(&number)) { return false; } NotifyTypeGenericTypeVariable(number); break; case ELEMENT_TYPE_MVAR: // MVAR Number if (!ParseNumber(&number)) { return false; } NotifyTypeGenericMemberVariable(number); break; } NotifyEndType(); return true; } bool SigParser::ParseTypeDefOrRefEncoded(sig_index_type *pIndexTypeOut, sig_index *pIndexOut) { // parse an encoded typedef or typeref sig_count encoded = 0; if (!ParseNumber(&encoded)) { return false; } *pIndexTypeOut = (sig_index_type) (encoded & 0x3); *pIndexOut = (encoded >> 2); return true; } bool SigParser::ParseNumber(sig_count *pOut) { // parse the variable length number format (0-4 bytes) sig_byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; // at least one byte in the encoding, read that if (!ParseByte(&b1)) { return false; } if (b1 == 0xff) { // special encoding of 'NULL' // not sure what this means as a number, don't expect to see it except for string lengths // which we don't encounter anyway so calling it an error return false; } // early out on 1 byte encoding if ( (b1 & 0x80) == 0) { *pOut = (int)b1; return true; } // now at least 2 bytes in the encoding, read 2nd byte if (!ParseByte(&b2)) { return false; } // early out on 2 byte encoding if ( (b1 & 0x40) == 0) { *pOut = (((b1 & 0x3f) << 8) | b2); return true; } // must be a 4 byte encoding if ( (b1 & 0x20) != 0) { // 4 byte encoding has this bit clear -- error if not return false; } if (!ParseByte(&b3)) { return false; } if (!ParseByte(&b4)) { return false; } *pOut = ((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4; return true; } \ No newline at end of file