summaryrefslogtreecommitdiff
path: root/src/data-identify.c
blob: 615a4f3a6f84c41ef1352baf17a901e73fb7742d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
/* data-identify.c - Try to identify the data
   Copyright (C) 2013, 2016 g10 Code GmbH

   This file is part of GPGME.

   GPGME is free software; you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as
   published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   GPGME is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#if HAVE_CONFIG_H
# include <config.h>
#endif

#include <stdlib.h>
#include <string.h>

#include "gpgme.h"
#include "data.h"
#include "util.h"
#include "parsetlv.h"


/* The size of the sample data we take for detection.  */
#define SAMPLE_SIZE 2048


/* OpenPGP packet types.  */
enum
  {
    PKT_NONE	      = 0,
    PKT_PUBKEY_ENC    = 1,  /* Public key encrypted packet. */
    PKT_SIGNATURE     = 2,  /* Secret key encrypted packet. */
    PKT_SYMKEY_ENC    = 3,  /* Session key packet. */
    PKT_ONEPASS_SIG   = 4,  /* One pass sig packet. */
    PKT_SECRET_KEY    = 5,  /* Secret key. */
    PKT_PUBLIC_KEY    = 6,  /* Public key. */
    PKT_SECRET_SUBKEY = 7,  /* Secret subkey. */
    PKT_COMPRESSED    = 8,  /* Compressed data packet. */
    PKT_ENCRYPTED     = 9,  /* Conventional encrypted data. */
    PKT_MARKER	      = 10, /* Marker packet. */
    PKT_PLAINTEXT     = 11, /* Literal data packet. */
    PKT_RING_TRUST    = 12, /* Keyring trust packet. */
    PKT_USER_ID	      = 13, /* User id packet. */
    PKT_PUBLIC_SUBKEY = 14, /* Public subkey. */
    PKT_OLD_COMMENT   = 16, /* Comment packet from an OpenPGP draft. */
    PKT_ATTRIBUTE     = 17, /* PGP's attribute packet. */
    PKT_ENCRYPTED_MDC = 18, /* Integrity protected encrypted data. */
    PKT_MDC 	      = 19, /* Manipulation detection code packet. */
  };


static inline unsigned long
buf32_to_ulong (const void *buffer)
{
  const unsigned char *p = buffer;

  return (((unsigned long)p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
}


/* Parse the next openpgp packet.  This function assumes a valid
 * OpenPGP packet at the address pointed to by BUFPTR which has a
 * maximum length as stored at BUFLEN.  Return the header information
 * of that packet and advance the pointer stored at BUFPTR to the next
 * packet; also adjust the length stored at BUFLEN to match the
 * remaining bytes. If there are no more packets, store NULL at
 * BUFPTR.  Return an non-zero error code on failure or the following
 * data on success:
 *
 *  R_PKTTYPE = The packet type.
 *  R_NTOTAL  = The total number of bytes of this packet
 *
 * If GPG_ERR_TRUNCATED is returned, a packet type is anyway stored at
 * R_PKTTYPE but R_NOTAL won't have a usable value,
 */
static gpg_error_t
next_openpgp_packet (unsigned char const **bufptr, size_t *buflen,
                     int *r_pkttype, size_t *r_ntotal)
{
  const unsigned char *buf = *bufptr;
  size_t len = *buflen;
  int c, ctb, pkttype;
  unsigned long pktlen;

  if (!len)
    return gpg_error (GPG_ERR_NO_DATA);

  /* First some blacklisting.  */
  if (len >= 4 && !memcmp (buf, "\x89PNG", 4))
    return gpg_error (GPG_ERR_INV_PACKET); /* This is a PNG file.  */

  /* Start parsing.  */
  ctb = *buf++; len--;
  if ( !(ctb & 0x80) )
    return gpg_error (GPG_ERR_INV_PACKET); /* Invalid CTB. */

  if ((ctb & 0x40))  /* New style (OpenPGP) CTB.  */
    {
      pkttype = (ctb & 0x3f);
      if (!len)
        return gpg_error (GPG_ERR_INV_PACKET); /* No 1st length byte. */
      c = *buf++; len--;
      if ( c < 192 )
        pktlen = c;
      else if ( c < 224 )
        {
          pktlen = (c - 192) * 256;
          if (!len)
            return gpg_error (GPG_ERR_INV_PACKET); /* No 2nd length byte. */
          c = *buf++; len--;
          pktlen += c + 192;
        }
      else if (c == 255)
        {
          if (len < 4)
            return gpg_error (GPG_ERR_INV_PACKET); /* No length bytes. */
          pktlen = buf32_to_ulong (buf);
          buf += 4;
          len -= 4;
        }
      else /* Partial length encoding. */
        {
          pktlen = 0;
        }
    }
  else /* Old style CTB.  */
    {
      int lenbytes;

      pktlen = 0;
      pkttype = (ctb>>2)&0xf;
      lenbytes = ((ctb&3)==3)? 0 : (1<<(ctb & 3));
      if (len < lenbytes)
        return gpg_error (GPG_ERR_INV_PACKET); /* Not enough length bytes.  */
      for (; lenbytes; lenbytes--)
        {
          pktlen <<= 8;
          pktlen |= *buf++; len--;
	}
    }

  /* Do some basic sanity check.  */
  switch (pkttype)
    {
    case PKT_PUBKEY_ENC:
    case PKT_SIGNATURE:
    case PKT_SYMKEY_ENC:
    case PKT_ONEPASS_SIG:
    case PKT_SECRET_KEY:
    case PKT_PUBLIC_KEY:
    case PKT_SECRET_SUBKEY:
    case PKT_COMPRESSED:
    case PKT_ENCRYPTED:
    case PKT_MARKER:
    case PKT_PLAINTEXT:
    case PKT_RING_TRUST:
    case PKT_USER_ID:
    case PKT_PUBLIC_SUBKEY:
    case PKT_OLD_COMMENT:
    case PKT_ATTRIBUTE:
    case PKT_ENCRYPTED_MDC:
    case PKT_MDC:
      break; /* Okay these are allowed packets. */
    default:
      return gpg_error (GPG_ERR_UNEXPECTED);
    }

  if (pktlen > len)
    {
      /* Packet length header too long.  This is possible because we
       * may have only a truncated image.  */
      *r_pkttype = pkttype;
      *r_ntotal = 0;
      *bufptr = NULL;
      return gpg_error (GPG_ERR_TRUNCATED);
    }

  *r_pkttype = pkttype;
  *r_ntotal = (buf - *bufptr) + pktlen;

  *bufptr = buf + pktlen;
  *buflen = len - pktlen;
  if (!*buflen)
    *bufptr = NULL;

  return 0;
}


/* Detection of PGP binary data.  This function parses an OpenPGP
 * message.  This parser is robust enough to work on a truncated
 * version.  Returns a GPGME_DATA_TYPE_.  */
static gpgme_data_type_t
pgp_binary_detection (const void *image_arg, size_t imagelen)
{
  gpg_error_t err = 0;
  const unsigned char *image = image_arg;
  size_t n;
  int pkttype;
  int anypacket = 0;
  int allsignatures = 0;

  while (!err && image)
    {
      err = next_openpgp_packet (&image, &imagelen, &pkttype, &n);
      if (gpg_err_code (err) == GPG_ERR_TRUNCATED)
        ;
      else if (err)
        break;

      /* Skip all leading marker packets.  */
      if (!anypacket && pkttype == PKT_MARKER)
        continue;

      if (pkttype == PKT_SIGNATURE)
        {
          if (!anypacket)
            allsignatures = 1;
        }
      else
        allsignatures = 0;

      switch (pkttype)
        {
        case PKT_SIGNATURE:
          break;  /* We decide later.  */

        case PKT_PLAINTEXT:
          /* Old style signature format: {sig}+,plaintext */
          if (allsignatures)
            return GPGME_DATA_TYPE_PGP_SIGNED;
          break;

        case PKT_ONEPASS_SIG:
          return GPGME_DATA_TYPE_PGP_SIGNED;

        case PKT_SECRET_KEY:
        case PKT_PUBLIC_KEY:
          return GPGME_DATA_TYPE_PGP_KEY;

        case PKT_SECRET_SUBKEY:
        case PKT_PUBLIC_SUBKEY:
          return GPGME_DATA_TYPE_PGP_OTHER;
        case PKT_PUBKEY_ENC:
        case PKT_SYMKEY_ENC:
          return GPGME_DATA_TYPE_PGP_ENCRYPTED;

        case PKT_COMPRESSED:
          /* If this is the first packet we assume that that a signed
           * packet follows.  We do not want to uncompress it here due
           * to the need of a lot of code and the potentail DoS. */
          if (!anypacket)
            return GPGME_DATA_TYPE_PGP_SIGNED;
          return GPGME_DATA_TYPE_PGP_OTHER;

        default:
          return GPGME_DATA_TYPE_PGP_OTHER;
        }
      anypacket = 1;
    }

  if (allsignatures)
    return  GPGME_DATA_TYPE_PGP_SIGNATURE;

  return GPGME_DATA_TYPE_UNKNOWN;
}


/* This is probably an armored "PGP MESSAGE" which can encode
 * different PGP data types.  STRING is modified after a call to this
 * function. */
static gpgme_data_type_t
inspect_pgp_message (char *string)
{
  struct b64state state;
  size_t nbytes;

  if (_gpgme_b64dec_start (&state, ""))
    return GPGME_DATA_TYPE_INVALID; /* oops */

  if (_gpgme_b64dec_proc (&state, string, strlen (string), &nbytes))
    {
      _gpgme_b64dec_finish (&state);
      return GPGME_DATA_TYPE_UNKNOWN; /* bad encoding etc. */
    }
  _gpgme_b64dec_finish (&state);
  string[nbytes] = 0; /* Better append a Nul. */

  return pgp_binary_detection (string, nbytes);
}


/* Note that DATA may be binary but a final nul is required so that
   string operations will find a terminator.

   Returns: GPGME_DATA_TYPE_xxxx */
static gpgme_data_type_t
basic_detection (char *data, size_t datalen)
{
  tlvinfo_t ti;
  const char *s;
  size_t n;
  int maybe_p12 = 0;

  if (datalen < 24) /* Object is probably too short for detection.  */
    return GPGME_DATA_TYPE_UNKNOWN;

  /* This is a common example of a CMS object - it is obvious that we
     only need to read a few bytes to get to the OID:
  30 82 0B 59 06 09 2A 86 48 86 F7 0D 01 07 02 A0 82 0B 4A 30 82 0B 46 02
  ----------- ++++++++++++++++++++++++++++++++
  SEQUENCE    OID (signedData)
  (2 byte len)

    A PKCS#12 message is:

  30 82 08 59 02 01 03 30 82 08 1F 06 09 2A 86 48 86 F7 0D 01 07 01 A0 82
  ----------- ++++++++ ----------- ++++++++++++++++++++++++++++++++
  SEQUENCE    INTEGER  SEQUENCE    OID (data)

    A X.509 certificate is:

  30 82 05 B8 30 82 04 A0 A0 03 02 01 02 02 07 15 46 A0 BF 30 07 39 30 0D
  ----------- +++++++++++ ----- ++++++++ --------------------------
  SEQUENCE    SEQUENCE    [0]   INTEGER  INTEGER                    SEQU
              (tbs)            (version) (s/n)                      (Algo)

    Thus we need to read at least 22 bytes, we add 2 bytes to cope with
    length headers stored with 4 bytes.
  */


  s = data;
  n = datalen;

  if (parse_tlv (&s, &n, &ti))
    goto try_pgp; /* Not properly BER encoded.  */
  if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
        && ti.is_cons))
    goto try_pgp; /* A CMS object always starts with a sequence.  */

  if (parse_tlv (&s, &n, &ti))
    goto try_pgp; /* Not properly BER encoded.  */
  if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
      && ti.is_cons && n >= ti.length)
    {
      if (parse_tlv (&s, &n, &ti))
        goto try_pgp;
      if (!(ti.cls == ASN1_CLASS_CONTEXT && ti.tag == 0
            && ti.is_cons && ti.length == 3 && n >= ti.length))
        goto try_pgp;

      if (parse_tlv (&s, &n, &ti))
        goto try_pgp;
      if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
            && !ti.is_cons && ti.length == 1 && n && (*s == 1 || *s == 2)))
        goto try_pgp;
      s++;
      n--;
      if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
            && !ti.is_cons))
        goto try_pgp;
      /* Because the now following S/N may be larger than the sample
         data we have, we stop parsing here and don't check for the
         algorithm ID.  */
      return GPGME_DATA_TYPE_X509_CERT;
    }
  if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
      && !ti.is_cons && ti.length == 1 && n && *s == 3)
    {
      maybe_p12 = 1;
      s++;
      n--;
      if (parse_tlv (&s, &n, &ti))
        goto try_pgp;
      if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
            && ti.is_cons))
        goto try_pgp;
      if (parse_tlv (&s, &n, &ti))
        goto try_pgp;
    }
  if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_OBJECT_ID
      && !ti.is_cons && ti.length && n >= ti.length)
    {
      if (ti.length == 9)
        {
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x01", 9))
            {
              /* Data.  */
              return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
                      /*     */ : GPGME_DATA_TYPE_CMS_OTHER);
            }
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x02", 9))
            {
              /* Signed Data.  */
              return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
                      /*     */ : GPGME_DATA_TYPE_CMS_SIGNED);
            }
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x03", 9))
            return GPGME_DATA_TYPE_CMS_ENCRYPTED; /* Enveloped Data.  */
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x05", 9))
            return GPGME_DATA_TYPE_CMS_OTHER; /* Digested Data.  */
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x06", 9))
            return GPGME_DATA_TYPE_CMS_OTHER; /* Encrypted Data.  */
        }
      else if (ti.length == 11)
        {
          if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x09\x10\x01\x02", 11))
            return GPGME_DATA_TYPE_CMS_OTHER; /* Auth Data.  */
        }
    }


 try_pgp:
  /* Check whether this might be a non-armored PGP message.  We need
     to do this before checking for armor lines, so that we don't get
     fooled by armored messages inside a signed binary PGP message.  */
  if ((data[0] & 0x80))
    {
      /* That might be a binary PGP message.  At least it is not plain
         ASCII.  Of course this might be certain lead-in text of
         armored CMS messages.  However, I am not sure whether this is
         at all defined and in any case it is uncommon.  Thus we don't
         do any further plausibility checks but stupidly assume no CMS
         armored data will follow.  */
      return pgp_binary_detection (data, datalen);
    }

  /* Now check whether there are armor lines.  */
  for (s = data; s && *s; s = (*s=='\n')?(s+1):((s=strchr (s,'\n'))?(s+1):s))
    {
      if (!strncmp (s, "-----BEGIN ", 11))
        {
          if (!strncmp (s+11, "SIGNED ", 7))
            return GPGME_DATA_TYPE_CMS_SIGNED;
          if (!strncmp (s+11, "ENCRYPTED ", 10))
            return GPGME_DATA_TYPE_CMS_ENCRYPTED;
          if (!strncmp (s+11, "PGP ", 4))
            {
              if (!strncmp (s+15, "SIGNATURE", 9))
                return GPGME_DATA_TYPE_PGP_SIGNATURE;
              if (!strncmp (s+15, "SIGNED MESSAGE", 14))
                return GPGME_DATA_TYPE_PGP_SIGNED;
              if (!strncmp (s+15, "PUBLIC KEY BLOCK", 16))
                return GPGME_DATA_TYPE_PGP_KEY;
              if (!strncmp (s+15, "PRIVATE KEY BLOCK", 17))
                return GPGME_DATA_TYPE_PGP_KEY;
              if (!strncmp (s+15, "SECRET KEY BLOCK", 16))
                return GPGME_DATA_TYPE_PGP_KEY;
              if (!strncmp (s+15, "ARMORED FILE", 12))
                return GPGME_DATA_TYPE_UNKNOWN;

              return inspect_pgp_message (data);
            }
          if (!strncmp (s+11, "CERTIFICATE", 11))
            return GPGME_DATA_TYPE_X509_CERT;
          if (!strncmp (s+11, "PKCS12", 6))
            return GPGME_DATA_TYPE_PKCS12;
          return GPGME_DATA_TYPE_CMS_OTHER; /* Not PGP, thus we assume CMS.  */
        }
    }

  return GPGME_DATA_TYPE_UNKNOWN;
}


/* Try to detect the type of the data.  Note that this function works
   only on seekable data objects.  The function tries to reset the
   file pointer but there is no guarantee that it will work.

   FIXME: We may want to add internal buffering so that this function
   can be implemented for allmost all kind of data objects.
 */
gpgme_data_type_t
gpgme_data_identify (gpgme_data_t dh, int reserved)
{
  gpgme_data_type_t result;
  char *sample;
  int n;
  gpgme_off_t off;

  (void)reserved;

  /* Check whether we can seek the data object.  */
  off = gpgme_data_seek (dh, 0, SEEK_CUR);
  if (off == (gpgme_off_t)(-1))
    return GPGME_DATA_TYPE_INVALID;

  /* Allocate a buffer and read the data. */
  sample = malloc (SAMPLE_SIZE);
  if (!sample)
    return GPGME_DATA_TYPE_INVALID; /* Ooops.  */
  n = gpgme_data_read (dh, sample, SAMPLE_SIZE - 1);
  if (n < 0)
    {
      free (sample);
      return GPGME_DATA_TYPE_INVALID; /* Ooops.  */
    }
  sample[n] = 0;  /* (Required for our string functions.)  */

  result = basic_detection (sample, n);
  free (sample);
  gpgme_data_seek (dh, off, SEEK_SET);

  return result;
}