summaryrefslogtreecommitdiff
path: root/src/mscorlib/shared/System/Text/Decoder.cs
blob: b2a003037b8dc74d3fa1b6b890d3b3a61393462e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Runtime.Serialization;
using System.Text;
using System;
using System.Diagnostics;
using System.Diagnostics.Contracts;

namespace System.Text
{
    // A Decoder is used to decode a sequence of blocks of bytes into a
    // sequence of blocks of characters. Following instantiation of a decoder,
    // sequential blocks of bytes are converted into blocks of characters through
    // calls to the GetChars method. The decoder maintains state between the
    // conversions, allowing it to correctly decode byte sequences that span
    // adjacent blocks.
    //
    // Instances of specific implementations of the Decoder abstract base
    // class are typically obtained through calls to the GetDecoder method
    // of Encoding objects.
    //
    [Serializable]
    public abstract class Decoder
    {
        internal DecoderFallback m_fallback = null;

        [NonSerialized]
        internal DecoderFallbackBuffer m_fallbackBuffer = null;

        internal void SerializeDecoder(SerializationInfo info)
        {
            info.AddValue("m_fallback", this.m_fallback);
        }

        protected Decoder()
        {
            // We don't call default reset because default reset probably isn't good if we aren't initialized.
        }

        public DecoderFallback Fallback
        {
            get
            {
                return m_fallback;
            }

            set
            {
                if (value == null)
                    throw new ArgumentNullException(nameof(value));
                Contract.EndContractBlock();

                // Can't change fallback if buffer is wrong
                if (m_fallbackBuffer != null && m_fallbackBuffer.Remaining > 0)
                    throw new ArgumentException(
                      SR.Argument_FallbackBufferNotEmpty, nameof(value));

                m_fallback = value;
                m_fallbackBuffer = null;
            }
        }

        // Note: we don't test for threading here because async access to Encoders and Decoders
        // doesn't work anyway.
        public DecoderFallbackBuffer FallbackBuffer
        {
            get
            {
                if (m_fallbackBuffer == null)
                {
                    if (m_fallback != null)
                        m_fallbackBuffer = m_fallback.CreateFallbackBuffer();
                    else
                        m_fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
                }

                return m_fallbackBuffer;
            }
        }

        internal bool InternalHasFallbackBuffer
        {
            get
            {
                return m_fallbackBuffer != null;
            }
        }

        // Reset the Decoder
        //
        // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder.  This
        // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
        //
        // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
        //
        // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
        // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
        public virtual void Reset()
        {
            byte[] byteTemp = Array.Empty<byte>();
            char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
            GetChars(byteTemp, 0, 0, charTemp, 0, true);
            if (m_fallbackBuffer != null)
                m_fallbackBuffer.Reset();
        }

        // Returns the number of characters the next call to GetChars will
        // produce if presented with the given range of bytes. The returned value
        // takes into account the state in which the decoder was left following the
        // last call to GetChars. The state of the decoder is not affected
        // by a call to this method.
        //
        public abstract int GetCharCount(byte[] bytes, int index, int count);

        public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
        {
            return GetCharCount(bytes, index, count);
        }

        // We expect this to be the workhorse for NLS Encodings, but for existing
        // ones we need a working (if slow) default implementation)
        [CLSCompliant(false)]
        public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
        {
            // Validate input parameters
            if (bytes == null)
                throw new ArgumentNullException(nameof(bytes),
                      SR.ArgumentNull_Array);

            if (count < 0)
                throw new ArgumentOutOfRangeException(nameof(count),
                      SR.ArgumentOutOfRange_NeedNonNegNum);
            Contract.EndContractBlock();

            byte[] arrbyte = new byte[count];
            int index;

            for (index = 0; index < count; index++)
                arrbyte[index] = bytes[index];

            return GetCharCount(arrbyte, 0, count);
        }

        // Decodes a range of bytes in a byte array into a range of characters
        // in a character array. The method decodes byteCount bytes from
        // bytes starting at index byteIndex, storing the resulting
        // characters in chars starting at index charIndex. The
        // decoding takes into account the state in which the decoder was left
        // following the last call to this method.
        //
        // An exception occurs if the character array is not large enough to
        // hold the complete decoding of the bytes. The GetCharCount method
        // can be used to determine the exact number of characters that will be
        // produced for a given range of bytes. Alternatively, the
        // GetMaxCharCount method of the Encoding that produced this
        // decoder can be used to determine the maximum number of characters that
        // will be produced for a given number of bytes, regardless of the actual
        // byte values.
        //
        public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
                                        char[] chars, int charIndex);

        public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
                                       char[] chars, int charIndex, bool flush)
        {
            return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
        }

        // We expect this to be the workhorse for NLS Encodings, but for existing
        // ones we need a working (if slow) default implementation)
        //
        // WARNING WARNING WARNING
        //
        // WARNING: If this breaks it could be a security threat.  Obviously we
        // call this internally, so you need to make sure that your pointers, counts
        // and indexes are correct when you call this method.
        //
        // In addition, we have internal code, which will be marked as "safe" calling
        // this code.  However this code is dependent upon the implementation of an
        // external GetChars() method, which could be overridden by a third party and
        // the results of which cannot be guaranteed.  We use that result to copy
        // the char[] to our char* output buffer.  If the result count was wrong, we
        // could easily overflow our output buffer.  Therefore we do an extra test
        // when we copy the buffer so that we don't overflow charCount either.
        [CLSCompliant(false)]
        public virtual unsafe int GetChars(byte* bytes, int byteCount,
                                              char* chars, int charCount, bool flush)
        {
            // Validate input parameters
            if (chars == null || bytes == null)
                throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
                    SR.ArgumentNull_Array);

            if (byteCount < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
                    SR.ArgumentOutOfRange_NeedNonNegNum);
            Contract.EndContractBlock();

            // Get the byte array to convert
            byte[] arrByte = new byte[byteCount];

            int index;
            for (index = 0; index < byteCount; index++)
                arrByte[index] = bytes[index];

            // Get the char array to fill
            char[] arrChar = new char[charCount];

            // Do the work
            int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);

            Debug.Assert(result <= charCount, "Returned more chars than we have space for");

            // Copy the char array
            // WARNING: We MUST make sure that we don't copy too many chars.  We can't
            // rely on result because it could be a 3rd party implementation.  We need
            // to make sure we never copy more than charCount chars no matter the value
            // of result
            if (result < charCount)
                charCount = result;

            // We check both result and charCount so that we don't accidentally overrun
            // our pointer buffer just because of an issue in GetChars
            for (index = 0; index < charCount; index++)
                chars[index] = arrChar[index];

            return charCount;
        }

        // This method is used when the output buffer might not be large enough.
        // It will decode until it runs out of bytes, and then it will return
        // true if it the entire input was converted.  In either case it
        // will also return the number of converted bytes and output characters used.
        // It will only throw a buffer overflow exception if the entire lenght of chars[] is
        // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
        // We're done processing this buffer only if completed returns true.
        //
        // Might consider checking Max...Count to avoid the extra counting step.
        //
        // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
        // that its likely that we didn't consume as many bytes as we could have.  For some
        // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
        public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
                                      char[] chars, int charIndex, int charCount, bool flush,
                                      out int bytesUsed, out int charsUsed, out bool completed)
        {
            // Validate parameters
            if (bytes == null || chars == null)
                throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
                      SR.ArgumentNull_Array);

            if (byteIndex < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (charIndex < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (bytes.Length - byteIndex < byteCount)
                throw new ArgumentOutOfRangeException(nameof(bytes),
                      SR.ArgumentOutOfRange_IndexCountBuffer);

            if (chars.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException(nameof(chars),
                      SR.ArgumentOutOfRange_IndexCountBuffer);
            Contract.EndContractBlock();

            bytesUsed = byteCount;

            // Its easy to do if it won't overrun our buffer.
            while (bytesUsed > 0)
            {
                if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
                {
                    charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
                    completed = (bytesUsed == byteCount &&
                        (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
                    return;
                }

                // Try again with 1/2 the count, won't flush then 'cause won't read it all
                flush = false;
                bytesUsed /= 2;
            }

            // Oops, we didn't have anything, we'll have to throw an overflow
            throw new ArgumentException(SR.Argument_ConversionOverflow);
        }

        // This is the version that uses *.
        // We're done processing this buffer only if completed returns true.
        //
        // Might consider checking Max...Count to avoid the extra counting step.
        //
        // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
        // that its likely that we didn't consume as many bytes as we could have.  For some
        // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
        [CLSCompliant(false)]
        public virtual unsafe void Convert(byte* bytes, int byteCount,
                                             char* chars, int charCount, bool flush,
                                             out int bytesUsed, out int charsUsed, out bool completed)
        {
            // Validate input parameters
            if (chars == null || bytes == null)
                throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
                    SR.ArgumentNull_Array);

            if (byteCount < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
                    SR.ArgumentOutOfRange_NeedNonNegNum);
            Contract.EndContractBlock();

            // Get ready to do it
            bytesUsed = byteCount;

            // Its easy to do if it won't overrun our buffer.
            while (bytesUsed > 0)
            {
                if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
                {
                    charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
                    completed = (bytesUsed == byteCount &&
                        (m_fallbackBuffer == null || m_fallbackBuffer.Remaining == 0));
                    return;
                }

                // Try again with 1/2 the count, won't flush then 'cause won't read it all
                flush = false;
                bytesUsed /= 2;
            }

            // Oops, we didn't have anything, we'll have to throw an overflow
            throw new ArgumentException(SR.Argument_ConversionOverflow);
        }
    }
}