diff options
Diffstat (limited to 'ext/solv_jsonparser.c')
-rw-r--r-- | ext/solv_jsonparser.c | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/ext/solv_jsonparser.c b/ext/solv_jsonparser.c new file mode 100644 index 0000000..053ee6f --- /dev/null +++ b/ext/solv_jsonparser.c @@ -0,0 +1,294 @@ +/* + * solv_jsonparser.c + * + * Simple JSON stream parser + * + * Copyright (c) 2018, SUSE LLC + * + * This program is licensed under the BSD license, read LICENSE.BSD + * for further information + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "util.h" +#include "solv_jsonparser.h" + +struct solv_jsonparser * +jsonparser_init(struct solv_jsonparser *jp, FILE *fp) +{ + memset(jp, 0, sizeof(*jp)); + jp->fp = fp; + jp->state = JP_START; + jp->line = jp->nextline = 1; + jp->nextc = ' '; + queue_init(&jp->stateq); + return jp; +} + +struct solv_jsonparser * +jsonparser_free(struct solv_jsonparser *jp) +{ + queue_free(&jp->stateq); + return 0; +} + +static void +savec(struct solv_jsonparser *jp, char c) +{ + if (jp->nspace == jp->aspace) + { + jp->aspace += 256; + jp->space = solv_realloc(jp->space, jp->aspace); + } + jp->space[jp->nspace++] = c; +} + +static void +saveutf8(struct solv_jsonparser *jp, int c) +{ + int i; + if (c < 0x80) + { + savec(jp, c); + return; + } + i = c < 0x800 ? 1 : c < 0x10000 ? 2 : 3; + savec(jp, (0x1f80 >> i) | (c >> (6 * i))); + while (--i >= 0) + savec(jp, 0x80 | ((c >> (6 * i)) & 0x3f)); +} + +static inline int +nextc(struct solv_jsonparser *jp) +{ + int c = getc(jp->fp); + if (c == '\n') + jp->nextline++; + return c; +} + +static int +skipspace(struct solv_jsonparser *jp) +{ + int c = jp->nextc; + while (c == ' ' || c == '\t' || c == '\r' || c == '\n') + c = nextc(jp); + jp->line = jp->nextline; + return (jp->nextc = c); +} + +static int +parseliteral(struct solv_jsonparser *jp) +{ + size_t nspace = jp->nspace; + int c; + savec(jp, jp->nextc); + for (;;) + { + c = nextc(jp); + if (c < 'a' || c > 'z') + break; + savec(jp, c); + } + jp->nextc = c; + savec(jp, 0); + if (!strcmp(jp->space + nspace, "true")) + return JP_BOOL; + if (!strcmp(jp->space + nspace, "false")) + return JP_BOOL; + if (!strcmp(jp->space + nspace, "null")) + return JP_NULL; + return JP_ERROR; +} + +static int +parsenumber(struct solv_jsonparser *jp) +{ + int c; + savec(jp, jp->nextc); + for (;;) + { + c = nextc(jp); + if ((c < '0' || c > '9') && c != '+' && c != '-' && c != '.' && c != 'e' && c != 'E') + break; + savec(jp, c); + } + jp->nextc = c; + savec(jp, 0); + return JP_NUMBER; +} + +static int +parseutf8(struct solv_jsonparser *jp, int surrogate) +{ + int c, i, r = 0; + /* parse 4-digit hex */ + for (i = 0; i < 4; i++) + { + c = nextc(jp); + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'f') + c -= 'a' - 10; + else if (c >= 'A' && c <= 'F') + c -= 'A' - 10; + else + return -1; + r = (r << 4) | c; + } + if (!surrogate && r >= 0xd800 && r < 0xdc00) + { + /* utf16 surrogate pair encodes 0x10000 - 0x10ffff */ + int r2; + if (nextc(jp) != '\\' || nextc(jp) != 'u' || (r2 = parseutf8(jp, 1)) < 0xdc00 || r2 >= 0xe000) + return -1; + r = 0x10000 + ((r & 0x3ff) << 10 | (r2 & 0x3ff)); + } + return r; +} + +static int +parsestring(struct solv_jsonparser *jp) +{ + int c; + for (;;) + { + if ((c = nextc(jp)) < 32) + return JP_ERROR; + if (c == '"') + break; + if (c == '\\') + { + switch (c = nextc(jp)) + { + case '"': + case '\\': + case '/': + case '\n': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'u': + if ((c = parseutf8(jp, 0)) < 0) + return JP_ERROR; + saveutf8(jp, c); + continue; + default: + return JP_ERROR; + } + } + savec(jp, c); + } + jp->nextc = ' '; + savec(jp, 0); + return JP_STRING; +} + +static int +parsevalue(struct solv_jsonparser *jp) +{ + int c = skipspace(jp); + if (c == '"') + return parsestring(jp); + if ((c >= '0' && c <= '9') || c == '+' || c == '-' || c == '.') + return parsenumber(jp); + if ((c >= 'a' && c <= 'z')) + return parseliteral(jp); + jp->nextc = ' '; + if (c == '[') + return JP_ARRAY; + if (c == '{') + return JP_OBJECT; + if (c == ']') + return JP_ARRAY_END; + if (c == '}') + return JP_OBJECT_END; + return JP_ERROR; +} + +int +jsonparser_parse(struct solv_jsonparser *jp) +{ + int type; + size_t nspace; + + jp->depth = jp->stateq.count; + jp->key = jp->value = 0; + jp->keylen = jp->valuelen = 0; + nspace = jp->nspace = 0; + + if (jp->state == JP_END) + return JP_END; + if (jp->state == JP_START) + jp->state = JP_END; + type = parsevalue(jp); + if (type <= 0) + return JP_ERROR; + if (type == JP_OBJECT_END || type == JP_ARRAY_END) + { + if (jp->state != type - 1) + return JP_ERROR; + jp->state = queue_pop(&jp->stateq); + } + else if (jp->state == JP_OBJECT) + { + nspace = jp->nspace; + if (type != JP_STRING) + return JP_ERROR; + if (skipspace(jp) != ':') + return JP_ERROR; + jp->nextc = ' '; + type = parsevalue(jp); + if (type == JP_OBJECT_END || type == JP_ARRAY_END) + return JP_ERROR; + jp->key = jp->space; + jp->keylen = nspace - 1; + } + if (type == JP_STRING || type == JP_NUMBER || type == JP_BOOL || type == JP_NULL) + { + jp->value = jp->space + nspace; + jp->valuelen = jp->nspace - nspace - 1; + } + if (type == JP_ARRAY || type == JP_OBJECT) + { + queue_push(&jp->stateq, jp->state); + jp->state = type; + } + else if (jp->state == JP_OBJECT || jp->state == JP_ARRAY) + { + int c = skipspace(jp); + if (c == ',') + jp->nextc = ' '; + else if (c != (jp->state == JP_OBJECT ? '}' : ']')) + return JP_ERROR; + } + return type; +} + +int +jsonparser_skip(struct solv_jsonparser *jp, int type) +{ + if (type == JP_ARRAY || type == JP_OBJECT) + { + int depth = jp->depth + 1, endtype = type + 1; + while (type > 0 && (type != endtype || jp->depth != depth)) + type = jsonparser_parse(jp); + } + return type; +} + |