summaryrefslogtreecommitdiff
path: root/src/vm/packedfields.inl
blob: 1c1a043bae8372d0f1092a1b46834a37e1168968 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
//

//

//
// Provides a mechanism to store an array of DWORD-typed fields in a space-efficient manner. There are some
// caveats:
//  1) Fields can be written and read in an uncompressed form (a simple array of DWORD values) until the
//     PackFields() method is invoked. Once this method has been invoked (and returned true) fields have been
//     compacted and must not be modified again. That is, the primary usage of this class is to store a set of
//     initialized-once fields.
//  2) The compaction algorithm relies on the fields containing small values (such as counts). Avoid storing
//     fields that have special sentinel values (such as all bits set) which will frequently set high order
//     bits.
//  3) An instance of PackedDWORDFields will take up a fixed quantity of memory equivalent to an array of
//     DWORD fields. If PackFields() returns true then the fields values frozen at the time of the call have
//     been compressed into a fewer number of bytes in-place. This smaller size will always be a multiple of
//     sizeof(DWORD) in length and is reported by GetPackedSize(). If a PackedDWORDFields structure is being
//     declared as a field inside another structure it is typically wise to place the field last to take
//     advantage of this size reduction (e.g. when saving the outer structure into an ngen image). If
//     PackFields() returns false then the fields remain unpacked and unchanged.
//  4) The caller retains the responsibility of recording whether an instance of PackedDWORDFields is in the
//     packed or unpacked state. This is important since incorrect behavior will result if the wrong methods
//     are used for the current state (e.g. calling GetUnpackedField() on a packed instance). This is not done
//     automatically since there are no bits free to store the state. However, under a debug build correct
//     usage will be checked (at the expensive of extra storage space).
//  5) The space saving made come at a runtime CPU cost to access the fields. Do not use this mechanism to
//     compact fields that must be read on a perfomance critical path. If unsure, measure the performance of
//     this solution before committing to it.
//
// ============================================================================

// Describe an array of FIELD_COUNT DWORDs. Each entry is addressed via a zero-based index and is expected to
// frequently contain a small integer and remain frozen after initialization.
template <DWORD FIELD_COUNT>
class PackedDWORDFields
{
    // Some constants to make the code a little more readable.
    enum Constants
    {
        kMaxLengthBits  = 5,    // Number of bits needed to express the maximum length of a field (32-bits)
        kBitsPerDWORD   = 32,   // Number of bits in a DWORD
    };

public:
    // Fields are all initialized to zero.
    PackedDWORDFields()
    {
        LIMITED_METHOD_DAC_CONTRACT;

        memset(m_rgUnpackedFields, 0, sizeof(m_rgUnpackedFields));
#ifdef _DEBUG
        memset(m_rgDebugUnpackedFields, 0, sizeof(m_rgDebugUnpackedFields));
        m_fFieldsPacked = false;
#endif // _DEBUG
    }

    // Get the value of the given field when the structure is in its unpacked state.
    DWORD GetUnpackedField(DWORD dwFieldIndex)
    {
        LIMITED_METHOD_DAC_CONTRACT;

        _ASSERTE(dwFieldIndex < FIELD_COUNT);
        _ASSERTE(!m_fFieldsPacked);

        return m_rgUnpackedFields[dwFieldIndex];
    }

    // Set the value of the given field when the structure is in its unpacked state. Setting field values
    // multiple times is allowed but only until a successful call to PackFields is made.
    void SetUnpackedField(DWORD dwFieldIndex, DWORD dwValue)
    {
        LIMITED_METHOD_CONTRACT;

        _ASSERTE(dwFieldIndex < FIELD_COUNT);
        _ASSERTE(!m_fFieldsPacked);

        m_rgUnpackedFields[dwFieldIndex] = dwValue;

#ifdef _DEBUG
        m_rgDebugUnpackedFields[dwFieldIndex] = dwValue;
#endif // _DEBUG
    }

    // Attempt to optimize the set of fields given their current values. Returns false if compaction wouldn't
    // achieve any space savings (in this case the structure remains in the unpacked state and the caller can
    // continue to use the *UnpackedField methods above or even re-attempt PackFields() with different field
    // values). If true is returned the data has been compacted into a smaller amount of space (this will
    // always be a multiple of sizeof(DWORD) in size). This size can be queried using GetPackedSize() below.
    // Once PackFields() has returned true fields can no longer be modified and field values must be retrieved
    // via GetPackedField() rather than GetUnpackedField().
    bool PackFields()
    {
        CONTRACTL
        {
            NOTHROW;
            GC_NOTRIGGER;
            MODE_ANY;
            SO_INTOLERANT;
        }
        CONTRACTL_END;

        // Can't re-pack a packed structure.
        _ASSERTE(!m_fFieldsPacked);

        // First compute the number of bits of space we'd need for a packed representation. Do this before
        // making any changes since sometimes we'd end up expanding the data instead and in this case we wish
        // to return false and make no updates to the structure.

        // There's a fixed overhead of kMaxLengthBits for each field (we store the packed fields as a
        // bit-stream that alternates between a field length (of kMaxLengthBits) followed by a variable length
        // bitfield containing the field value.
        DWORD dwTotalPackedBits = FIELD_COUNT * kMaxLengthBits;

        // For each field calculate excatly how many bits we'd need to store the field value and add this to
        // the total.
        for (DWORD i = 0; i < FIELD_COUNT; i++)
            dwTotalPackedBits += BitsRequired(m_rgUnpackedFields[i]);

        // Now we have the total is it smaller than a simple array of DWORDs?
        if (dwTotalPackedBits >= (FIELD_COUNT * kBitsPerDWORD))
            return false;

        // Compaction will save us space. We're committed to implementing that compaction now.

        // Work from a copy of the unpacked fields since we're about to start modifying the space in which
        // they're currently stored.
        DWORD rgUnpackedFields[FIELD_COUNT];
        memcpy(rgUnpackedFields, m_rgUnpackedFields, sizeof(rgUnpackedFields));

        // Start writing a stream of bits. For each field write a fixed sized header describing the number of
        // bits required to express the field followed by the field value itself.
        DWORD dwOffset = 0;
        for (DWORD i = 0; i < FIELD_COUNT; i++)
        {
            // Find the minimal number of bits required to encode the current field's value.
            DWORD dwFieldLength = BitsRequired(rgUnpackedFields[i]);
            _ASSERTE(dwFieldLength > 0 && dwFieldLength <= kBitsPerDWORD);

            // Write the size field. Note that we store the size biased by one. That is, a field length of one
            // is encoded as zero. We do this so we can express a range of field sizes from 1 through 32,
            // emcompassing the worst case scenario (a full 32 bits). It comes at the cost of not being able
            // to encode zero-valued fields with zero bits. Is this is deemed an important optimization in the
            // future we could always given up on a simple linear mapping of the size field and use a lookup
            // table to map values encoded into the real sizes. Experiments with EEClass packed fields over
            // mscorlib show that this currently doesn't yield us much benefit, primarily due to the DWORD
            // round-up size semantic, which implies we'd need a lot more optimization than this to reduce the
            // average structure size below the next DWORD threshhold.
            BitVectorSet(dwOffset, kMaxLengthBits, dwFieldLength - 1);
            dwOffset += kMaxLengthBits;

            // Write the field value itself.
            BitVectorSet(dwOffset, dwFieldLength, rgUnpackedFields[i]);
            dwOffset += dwFieldLength;
        }

#ifdef _DEBUG
        m_fFieldsPacked = true;
#endif // _DEBUG

        // Compaction was successful.
        return true;
    }

    // Return the size in bytes of a compacted structure (it is illegal to call this on an uncompacted
    // structure).
    DWORD GetPackedSize()
    {
        LIMITED_METHOD_DAC_CONTRACT;

        _ASSERTE(m_fFieldsPacked);

        // Walk the field stream reading header (which are fixed size) and then using the value of the headers
        // to skip the field value.
        DWORD cBits = 0;
        for (DWORD i = 0; i < FIELD_COUNT; i++)
            cBits += kMaxLengthBits + BitVectorGet(cBits, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31]

        // Compute the number of DWORDs needed to store the bits of the encoding.
        // static_cast would not be necessary if ALIGN_UP were templated like FitsIn.
        DWORD cDWORDs = static_cast<DWORD>(ALIGN_UP(cBits, kBitsPerDWORD)) / kBitsPerDWORD;

        // Return the total structure size.
        return offsetof(PackedDWORDFields<FIELD_COUNT>, m_rgPackedFields) + (cDWORDs * sizeof(DWORD));
    }

    // Get the value of a packed field. Illegal to call this on an uncompacted structure.
    DWORD GetPackedField(DWORD dwFieldIndex)
    {
        CONTRACTL
        {
            NOTHROW;
            GC_NOTRIGGER;
            MODE_ANY;
            SUPPORTS_DAC;
            SO_TOLERANT;
        }
        CONTRACTL_END;

        _ASSERTE(dwFieldIndex < FIELD_COUNT);
        _ASSERTE(m_fFieldsPacked);

        // Walk past all the predecessor fields.
        DWORD dwOffset = 0;
        for (DWORD i = 0; i < dwFieldIndex; i++)
            dwOffset += kMaxLengthBits + BitVectorGet(dwOffset, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31]

        // The next kMaxLengthBits bits contain the length in bits of the field we want (-1 due to the way we
        // encode the length).
        DWORD dwFieldLength = BitVectorGet(dwOffset, kMaxLengthBits) + 1;
        dwOffset += kMaxLengthBits;

        // Grab the field value.
        DWORD dwReturn = BitVectorGet(dwOffset, dwFieldLength);

        // On debug builds ensure the encoded field value is the same as the original unpacked version.
        _ASSERTE(dwReturn == m_rgDebugUnpackedFields[dwFieldIndex]);
        return dwReturn;
    }

private:
    // Return the minimum number of bits required to encode a DWORD value by stripping out the
    // most-significant leading zero bits). Returns a value between 1 and 32 inclusive (we never encode
    // anything with zero bits).
    DWORD BitsRequired(DWORD dwValue)
    {
        LIMITED_METHOD_CONTRACT;

        // Starting with a bit-mask of the most significant bit and iterating over masks for successively less
        // significant bits, stop as soon as the mask co-incides with a set bit in the value. Simultaneously
        // we're counting down the bits required to express the range of values implied by seeing the
        // corresponding bit set in the value (e.g. when we're testing the high bit we know we'd need 32-bits
        // to encode the range of values that have this bit set). Stop when we get to one bit (we never return
        // 0 bits required, even for an input value of 0).
        DWORD dwMask = 0x80000000;
        DWORD cBits = 32;
        while (cBits > 1)
        {
            if (dwValue & dwMask)
                return cBits;

            dwMask >>= 1;
            cBits--;
        }

        return 1;
    }

    // Set the dwLength bits at m_rgPackedFields + dwOffset bits to the value dwValue.
    void BitVectorSet(DWORD dwOffset, DWORD dwLength, DWORD dwValue)
    {
        LIMITED_METHOD_CONTRACT;

        _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD); // Can set at most one DWORD at a time
        _ASSERTE((dwLength == kBitsPerDWORD) || (dwValue < (1U << dwLength)));  // Value had better fit in the given length

        // Calculate the start and end naturally aligned DWORDs into which the value will go.
        DWORD dwStartBlock = dwOffset / kBitsPerDWORD;
        DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD;
        if (dwStartBlock == dwEndBlock)
        {
            // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits
            // we'll need to shift the input value (to the left) and a mask of the bits that will be set in
            // the destination DWORD.
            DWORD dwValueShift = dwOffset % kBitsPerDWORD;
            DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift;

            m_rgPackedFields[dwStartBlock] &= ~dwValueMask;             // Zero the target bits
            m_rgPackedFields[dwStartBlock] |= dwValue << dwValueShift;  // Or in the new value (suitably shifted)
        }
        else
        {
            // Hard case: the new value is split across two DWORDs (two DWORDs is the max as the new value can
            // be at most DWORD-sized itself). For simplicity we'll simply break this into two separate
            // non-spanning sets. We can revisit this in the future if the perf is a problem.
            DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD);   // Number of bits to set in the first DWORD
            DWORD dwInitialMask = (1U << dwInitialBits) - 1;                    // Mask covering those value bits

            // Set the portion of the value residing in the first DWORD.
            BitVectorSet(dwOffset, dwInitialBits, dwValue & dwInitialMask);

            // And then the remainder in the second DWORD.
            BitVectorSet(dwOffset + dwInitialBits, dwLength - dwInitialBits, dwValue >> dwInitialBits);
        }

        _ASSERTE(BitVectorGet(dwOffset, dwLength) == dwValue);
    }

    // Get the dwLength bits at m_rgPackedFields + dwOffset bits. Value is zero-extended to DWORD size.
    DWORD BitVectorGet(DWORD dwOffset, DWORD dwLength)
    {
        LIMITED_METHOD_DAC_CONTRACT;

        _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD);    // Can get at most one DWORD at a time

        // Calculate the start and end naturally aligned DWORDs from which the value will come.
        DWORD dwStartBlock = dwOffset / kBitsPerDWORD;
        DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD;
        if (dwStartBlock == dwEndBlock)
        {
            // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits
            // we'll need to shift the extracted value (to the right) and a mask of the bits that will be
            // extracted in the destination DWORD.
            DWORD dwValueShift = dwOffset % kBitsPerDWORD;
            DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift;

            // Mask out the bits we want and shift them down into the bottom of the result DWORD.
            return (m_rgPackedFields[dwStartBlock] & dwValueMask) >> dwValueShift;
        }
        else
        {
            // Hard case: the return value is split across two DWORDs (two DWORDs is the max as the new value
            // can be at most DWORD-sized itself). For simplicity we'll simply break this into two separate
            // non-spanning gets and stitch the result together from that. We can revisit this in the future
            // if the perf is a problem.
            DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD);   // Number of bits to get in the first DWORD
            DWORD dwReturn;

            // Get the initial (low-order) bits from the first DWORD.
            dwReturn = BitVectorGet(dwOffset, dwInitialBits);

            // Get the remaining bits from the second DWORD. These bits will need to be shifted to the left
            // (past the bits we've already read) before being OR'd into the result.
            dwReturn |= BitVectorGet(dwOffset + dwInitialBits, dwLength - dwInitialBits) << dwInitialBits;

            return dwReturn;
        }
    }

#ifdef _DEBUG
    DWORD       m_rgDebugUnpackedFields[FIELD_COUNT];   // A copy of the unpacked fields so we can validate
                                                        // packed reads
    bool        m_fFieldsPacked;                        // The current packed/unpacked state so we can check
                                                        // the right methods are being called
#endif // _DEBUG

    union
    {
        DWORD   m_rgUnpackedFields[FIELD_COUNT];        // The fields in their unpacked state
        DWORD   m_rgPackedFields[1];                    // The first DWORD block of fields in the packed state
    };
};