diff options
author | Werner Koch <[email protected]> | 2013-08-09 17:19:26 +0000 |
---|---|---|
committer | Werner Koch <[email protected]> | 2013-08-09 17:19:26 +0000 |
commit | 8579091c4f6a36e6bb06fcfbd147a776166fd3ea (patch) | |
tree | bf1e746416e7c1378832a65329cdcbffd6070df2 /src/data-identify.c | |
parent | Prefer GnuPG-2 engines over GnuPG-1. (diff) | |
download | gpgme-8579091c4f6a36e6bb06fcfbd147a776166fd3ea.tar.gz gpgme-8579091c4f6a36e6bb06fcfbd147a776166fd3ea.zip |
Add function gpgme_data_identify.
* src/gpgme.h.in (gpgme_data_type_t): New.
(gpgme_data_identify): New prototype.
* src/data-identify.c: New.
* src/parsetlv.c, src/parsetlv.h: New. Take from gpa.
* src/libgpgme.vers, src/gpgme.def: Add gpgme_data_identify.
* src/gpgme-tool.c (status): Add STATUS_IDENTIFY_RESULT.
(gt_identify): New.
(cmd_identify): New.
(hlp_passwd): Move close to cmd_passwd.
--
It is often useful to have a way to identify the data which needs
processing. This is such a common task that it makes sense to
implement this in gpgme to avoid diverging implementations.
Diffstat (limited to 'src/data-identify.c')
-rw-r--r-- | src/data-identify.c | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/src/data-identify.c b/src/data-identify.c new file mode 100644 index 00000000..96006335 --- /dev/null +++ b/src/data-identify.c @@ -0,0 +1,247 @@ +/* data-identify.c - Try to identify the data + Copyright (C) 2013 g10 Code GmbH + + This file is part of GPGME. + + GPGME is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + GPGME is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> + +#include "gpgme.h" +#include "data.h" +#include "util.h" +#include "parsetlv.h" + +/* The size of the sample data we take for detection. */ +#define SAMPLE_SIZE 2048 + + + +/* Note that DATA may be binary but a final nul is required so that + string operations will find a terminator. + + Returns: GPGME_DATA_TYPE_xxxx */ +static gpgme_data_type_t +basic_detection (const char *data, size_t datalen) +{ + tlvinfo_t ti; + const char *s; + size_t n; + int maybe_p12 = 0; + + if (datalen < 24) /* Object is probably too short for detection. */ + return GPGME_DATA_TYPE_UNKNOWN; + + /* This is a common example of a CMS object - it is obvious that we + only need to read a few bytes to get to the OID: + 30 82 0B 59 06 09 2A 86 48 86 F7 0D 01 07 02 A0 82 0B 4A 30 82 0B 46 02 + ----------- ++++++++++++++++++++++++++++++++ + SEQUENCE OID (signedData) + (2 byte len) + + A PKCS#12 message is: + + 30 82 08 59 02 01 03 30 82 08 1F 06 09 2A 86 48 86 F7 0D 01 07 01 A0 82 + ----------- ++++++++ ----------- ++++++++++++++++++++++++++++++++ + SEQUENCE INTEGER SEQUENCE OID (data) + + A X.509 certificate is: + + 30 82 05 B8 30 82 04 A0 A0 03 02 01 02 02 07 15 46 A0 BF 30 07 39 30 0D + ----------- +++++++++++ ----- ++++++++ -------------------------- + SEQUENCE SEQUENCE [0] INTEGER INTEGER SEQU + (tbs) (version) (s/n) (Algo) + + Thus we need to read at least 22 bytes, we add 2 bytes to cope with + length headers stored with 4 bytes. + */ + + + s = data; + n = datalen; + + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; /* Not properly BER encoded. */ + if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE + && ti.is_cons)) + goto try_pgp; /* A CMS object always starts with a sequence. */ + + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; /* Not properly BER encoded. */ + if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE + && ti.is_cons && n >= ti.length) + { + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; + if (!(ti.cls == ASN1_CLASS_CONTEXT && ti.tag == 0 + && ti.is_cons && ti.length == 3 && n >= ti.length)) + goto try_pgp; + + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; + if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER + && !ti.is_cons && ti.length == 1 && n && (*s == 1 || *s == 2))) + goto try_pgp; + s++; + n--; + if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER + && !ti.is_cons)) + goto try_pgp; + /* Because the now following S/N may be larger than the sample + data we have, we stop parsing here and don't check for the + algorithm ID. */ + return GPGME_DATA_TYPE_X509_CERT; + } + if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER + && !ti.is_cons && ti.length == 1 && n && *s == 3) + { + maybe_p12 = 1; + s++; + n--; + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; + if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE + && ti.is_cons)) + goto try_pgp; + if (parse_tlv (&s, &n, &ti)) + goto try_pgp; + } + if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_OBJECT_ID + && !ti.is_cons && ti.length && n >= ti.length) + { + if (ti.length == 9) + { + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x01", 9)) + { + /* Data. */ + return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12 + /* */ : GPGME_DATA_TYPE_CMS_OTHER); + } + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x02", 9)) + { + /* Signed Data. */ + return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12 + /* */ : GPGME_DATA_TYPE_CMS_SIGNED); + } + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x03", 9)) + return GPGME_DATA_TYPE_CMS_ENCRYPTED; /* Enveloped Data. */ + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x05", 9)) + return GPGME_DATA_TYPE_CMS_OTHER; /* Digested Data. */ + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x06", 9)) + return GPGME_DATA_TYPE_CMS_OTHER; /* Encrypted Data. */ + } + else if (ti.length == 11) + { + if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x09\x10\x01\x02", 11)) + return GPGME_DATA_TYPE_CMS_OTHER; /* Auth Data. */ + } + } + + + try_pgp: + /* Check whether this might be a non-armored PGP message. We need + to do this before checking for armor lines, so that we don't get + fooled by armored messages inside a signed binary PGP message. */ + if ((data[0] & 0x80)) + { + /* That might be a binary PGP message. At least it is not plain + ASCII. Of course this might be certain lead-in text of + armored CMS messages. However, I am not sure whether this is + at all defined and in any case it is uncommon. Thus we don't + do any further plausibility checks but stupidly assume no CMS + armored data will follow. */ + return GPGME_DATA_TYPE_UNKNOWN; + } + + /* Now check whether there are armor lines. */ + for (s = data; s && *s; s = (*s=='\n')?(s+1):((s=strchr (s,'\n'))?(s+1):s)) + { + if (!strncmp (s, "-----BEGIN ", 11)) + { + if (!strncmp (s+11, "SIGNED ", 7)) + return GPGME_DATA_TYPE_CMS_SIGNED; + if (!strncmp (s+11, "ENCRYPTED ", 10)) + return GPGME_DATA_TYPE_CMS_ENCRYPTED; + if (!strncmp (s+11, "PGP ", 4)) + { + if (!strncmp (s+15, "SIGNATURE", 9)) + return GPGME_DATA_TYPE_PGP_SIGNED; + if (!strncmp (s+15, "SIGNED MESSAGE", 14)) + return GPGME_DATA_TYPE_PGP_SIGNED; + if (!strncmp (s+15, "PUBLIC KEY BLOCK", 16)) + return GPGME_DATA_TYPE_PGP_KEY; + if (!strncmp (s+15, "PRIVATE KEY BLOCK", 17)) + return GPGME_DATA_TYPE_PGP_KEY; + if (!strncmp (s+15, "SECRET KEY BLOCK", 16)) + return GPGME_DATA_TYPE_PGP_KEY; + if (!strncmp (s+15, "ARMORED FILE", 12)) + return GPGME_DATA_TYPE_UNKNOWN; + return GPGME_DATA_TYPE_PGP_OTHER; /* PGP MESSAGE */ + } + if (!strncmp (s+11, "CERTIFICATE", 11)) + return GPGME_DATA_TYPE_X509_CERT; + if (!strncmp (s+11, "PKCS12", 6)) + return GPGME_DATA_TYPE_PKCS12; + return GPGME_DATA_TYPE_CMS_OTHER; /* Not PGP, thus we assume CMS. */ + } + } + + return GPGME_DATA_TYPE_UNKNOWN; +} + + +/* Try to detect the type of the data. Note that this function works + only on seekable data objects. The function tries to reset the + file pointer but there is no guarantee that it will work. + + FIXME: We may want to add internal buffering so that this function + can be implemented for allmost all kind of data objects. + */ +gpgme_data_type_t +gpgme_data_identify (gpgme_data_t dh, int reserved) +{ + gpgme_data_type_t result; + char *sample; + int n; + gpgme_off_t off; + + /* Check whether we can seek the data object. */ + off = gpgme_data_seek (dh, 0, SEEK_CUR); + if (off == (gpgme_off_t)(-1)) + return GPGME_DATA_TYPE_INVALID; + + /* Allocate a buffer and read the data. */ + sample = malloc (SAMPLE_SIZE); + if (!sample) + return GPGME_DATA_TYPE_INVALID; /* Ooops. */ + n = gpgme_data_read (dh, sample, SAMPLE_SIZE - 1); + if (n < 0) + { + free (sample); + return GPGME_DATA_TYPE_INVALID; /* Ooops. */ + } + sample[n] = 0; /* (Required for our string functions.) */ + + result = basic_detection (sample, n); + free (sample); + gpgme_data_seek (dh, off, SEEK_SET); + + return result; +} |