diff options
author | jbj <devnull@localhost> | 2003-02-20 18:55:53 +0000 |
---|---|---|
committer | jbj <devnull@localhost> | 2003-02-20 18:55:53 +0000 |
commit | a86d93be542c918a10630f6348c29b346c0274f4 (patch) | |
tree | c97a04a14a6c1cb61edd3bbbd0befc6213ba8495 /expat/xmlwf | |
parent | 2c26d43b9898a3b5efb06b34946e7918384c1e55 (diff) | |
download | librpm-tizen-a86d93be542c918a10630f6348c29b346c0274f4.tar.gz librpm-tizen-a86d93be542c918a10630f6348c29b346c0274f4.tar.bz2 librpm-tizen-a86d93be542c918a10630f6348c29b346c0274f4.zip |
Initial revision
CVS patchset: 6041
CVS date: 2003/02/20 18:55:53
Diffstat (limited to 'expat/xmlwf')
-rw-r--r-- | expat/xmlwf/Makefile.in | 15 | ||||
-rw-r--r-- | expat/xmlwf/codepage.c | 65 | ||||
-rw-r--r-- | expat/xmlwf/codepage.h | 7 | ||||
-rw-r--r-- | expat/xmlwf/ct.c | 145 | ||||
-rw-r--r-- | expat/xmlwf/filemap.h | 17 | ||||
-rw-r--r-- | expat/xmlwf/readfilemap.c | 74 | ||||
-rw-r--r-- | expat/xmlwf/unixfilemap.c | 57 | ||||
-rw-r--r-- | expat/xmlwf/wfcheck.c | 953 | ||||
-rw-r--r-- | expat/xmlwf/wfcheck.h | 38 | ||||
-rw-r--r-- | expat/xmlwf/wfcheckmessage.c | 30 | ||||
-rw-r--r-- | expat/xmlwf/win32filemap.c | 95 | ||||
-rw-r--r-- | expat/xmlwf/xmlfile.c | 217 | ||||
-rw-r--r-- | expat/xmlwf/xmlfile.h | 11 | ||||
-rw-r--r-- | expat/xmlwf/xmlmime.c | 161 | ||||
-rw-r--r-- | expat/xmlwf/xmlmime.h | 19 | ||||
-rw-r--r-- | expat/xmlwf/xmltchar.h | 36 | ||||
-rw-r--r-- | expat/xmlwf/xmlurl.h | 13 | ||||
-rw-r--r-- | expat/xmlwf/xmlwf.c | 759 | ||||
-rw-r--r-- | expat/xmlwf/xmlwf.dsp | 136 | ||||
-rw-r--r-- | expat/xmlwf/xmlwin32url.cxx | 371 |
20 files changed, 3219 insertions, 0 deletions
diff --git a/expat/xmlwf/Makefile.in b/expat/xmlwf/Makefile.in new file mode 100644 index 000000000..d7c4cb07f --- /dev/null +++ b/expat/xmlwf/Makefile.in @@ -0,0 +1,15 @@ +LIBDIR= ../lib/.libs +INCDIR= ../lib + +LDFLAGS= @LDFLAGS@ +CFLAGS= @CFLAGS@ -I$(INCDIR) + +FILEMAP_OBJ= @FILEMAP_OBJ@ +OBJS= xmlwf.o xmlfile.o codepage.o $(FILEMAP_OBJ) +LIBS= -L$(LIBDIR) -lexpat + +xmlwf: $(OBJS) + $(CC) -o xmlwf $(LDFLAGS) $(OBJS) $(LIBS) + +clean: + rm -f xmlwf core *.o
\ No newline at end of file diff --git a/expat/xmlwf/codepage.c b/expat/xmlwf/codepage.c new file mode 100644 index 000000000..4ab1344e7 --- /dev/null +++ b/expat/xmlwf/codepage.c @@ -0,0 +1,65 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#include "codepage.h" + +#ifdef WIN32 +#define STRICT 1 +#define WIN32_LEAN_AND_MEAN 1 + +#include <windows.h> + +int codepageMap(int cp, int *map) +{ + int i; + CPINFO info; + if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2) + return 0; + for (i = 0; i < 256; i++) + map[i] = -1; + if (info.MaxCharSize > 1) { + for (i = 0; i < MAX_LEADBYTES; i++) { + int j, lim; + if (info.LeadByte[i] == 0 && info.LeadByte[i + 1] == 0) + break; + lim = info.LeadByte[i + 1]; + for (j = info.LeadByte[i]; j < lim; j++) + map[j] = -2; + } + } + for (i = 0; i < 256; i++) { + if (map[i] == -1) { + char c = i; + unsigned short n; + if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, + &c, 1, &n, 1) == 1) + map[i] = n; + } + } + return 1; +} + +int codepageConvert(int cp, const char *p) +{ + unsigned short c; + if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, + p, 2, &c, 1) == 1) + return c; + return -1; +} + +#else /* not WIN32 */ + +int codepageMap(int cp, int *map) +{ + return 0; +} + +int codepageConvert(int cp, const char *p) +{ + return -1; +} + +#endif /* not WIN32 */ diff --git a/expat/xmlwf/codepage.h b/expat/xmlwf/codepage.h new file mode 100644 index 000000000..b36a5502d --- /dev/null +++ b/expat/xmlwf/codepage.h @@ -0,0 +1,7 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +int codepageMap(int cp, int *map); +int codepageConvert(int cp, const char *p); diff --git a/expat/xmlwf/ct.c b/expat/xmlwf/ct.c new file mode 100644 index 000000000..098699d0b --- /dev/null +++ b/expat/xmlwf/ct.c @@ -0,0 +1,145 @@ +#define CHARSET_MAX 41 + +static +const char *getTok(const char **pp) +{ + enum { inAtom, inString, init, inComment }; + int state = init; + const char *tokStart = 0; + for (;;) { + switch (**pp) { + case '\0': + return 0; + case ' ': + case '\r': + case '\t': + case '\n': + if (state == inAtom) + return tokStart; + break; + case '(': + if (state == inAtom) + return tokStart; + if (state != inString) + state++; + break; + case ')': + if (state > init) + --state; + else if (state != inString) + return 0; + break; + case ';': + case '/': + case '=': + if (state == inAtom) + return tokStart; + if (state == init) + return (*pp)++; + break; + case '\\': + ++*pp; + if (**pp == '\0') + return 0; + break; + case '"': + switch (state) { + case inString: + ++*pp; + return tokStart; + case inAtom: + return tokStart; + case init: + tokStart = *pp; + state = inString; + break; + } + break; + default: + if (state == init) { + tokStart = *pp; + state = inAtom; + } + break; + } + ++*pp; + } + /* not reached */ +} + +/* key must be lowercase ASCII */ + +static +int matchkey(const char *start, const char *end, const char *key) +{ + if (!start) + return 0; + for (; start != end; start++, key++) + if (*start != *key && *start != 'A' + (*key - 'a')) + return 0; + return *key == '\0'; +} + +void getXMLCharset(const char *buf, char *charset) +{ + const char *next, *p; + + charset[0] = '\0'; + next = buf; + p = getTok(&next); + if (matchkey(p, next, "text")) + strcpy(charset, "us-ascii"); + else if (!matchkey(p, next, "application")) + return; + p = getTok(&next); + if (!p || *p != '/') + return; + p = getTok(&next); + if (matchkey(p, next, "xml")) + isXml = 1; + p = getTok(&next); + while (p) { + if (*p == ';') { + p = getTok(&next); + if (matchkey(p, next, "charset")) { + p = getTok(&next); + if (p && *p == '=') { + p = getTok(&next); + if (p) { + char *s = charset; + if (*p == '"') { + while (++p != next - 1) { + if (*p == '\\') + ++p; + if (s == charset + CHARSET_MAX - 1) { + charset[0] = '\0'; + break; + } + *s++ = *p; + } + *s++ = '\0'; + } + else { + if (next - p > CHARSET_MAX - 1) + break; + while (p != next) + *s++ = *p++; + *s = 0; + break; + } + } + } + } + } + else + p = getTok(&next); + } +} + +int main(int argc, char **argv) +{ + char buf[CHARSET_MAX]; + getXMLCharset(argv[1], buf); + printf("charset = \"%s\"\n", buf); + return 0; +} diff --git a/expat/xmlwf/filemap.h b/expat/xmlwf/filemap.h new file mode 100644 index 000000000..ecf01751d --- /dev/null +++ b/expat/xmlwf/filemap.h @@ -0,0 +1,17 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + + +#include <stddef.h> + +#ifdef XML_UNICODE +int filemap(const wchar_t *name, + void (*processor)(const void *, size_t, const wchar_t *, void *arg), + void *arg); +#else +int filemap(const char *name, + void (*processor)(const void *, size_t, const char *, void *arg), + void *arg); +#endif diff --git a/expat/xmlwf/readfilemap.c b/expat/xmlwf/readfilemap.c new file mode 100644 index 000000000..7829f612b --- /dev/null +++ b/expat/xmlwf/readfilemap.c @@ -0,0 +1,74 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <stdio.h> + +#ifndef S_ISREG +#ifndef S_IFREG +#define S_IFREG _S_IFREG +#endif +#ifndef S_IFMT +#define S_IFMT _S_IFMT +#endif +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif /* not S_ISREG */ + +#ifndef O_BINARY +#ifdef _O_BINARY +#define O_BINARY _O_BINARY +#else +#define O_BINARY 0 +#endif +#endif + +int filemap(const char *name, + void (*processor)(const void *, size_t, const char *, void *arg), + void *arg) +{ + size_t nbytes; + int fd; + int n; + struct stat sb; + void *p; + + fd = open(name, O_RDONLY|O_BINARY); + if (fd < 0) { + perror(name); + return 0; + } + if (fstat(fd, &sb) < 0) { + perror(name); + return 0; + } + if (!S_ISREG(sb.st_mode)) { + fprintf(stderr, "%s: not a regular file\n", name); + return 0; + } + nbytes = sb.st_size; + p = malloc(nbytes); + if (!p) { + fprintf(stderr, "%s: out of memory\n", name); + return 0; + } + n = read(fd, p, nbytes); + if (n < 0) { + perror(name); + close(fd); + return 0; + } + if (n != nbytes) { + fprintf(stderr, "%s: read unexpected number of bytes\n", name); + close(fd); + return 0; + } + processor(p, nbytes, name, arg); + free(p); + close(fd); + return 1; +} diff --git a/expat/xmlwf/unixfilemap.c b/expat/xmlwf/unixfilemap.c new file mode 100644 index 000000000..4016dd053 --- /dev/null +++ b/expat/xmlwf/unixfilemap.c @@ -0,0 +1,57 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <stdio.h> + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +#include "filemap.h" + +int filemap(const char *name, + void (*processor)(const void *, size_t, const char *, void *arg), + void *arg) +{ + int fd; + size_t nbytes; + struct stat sb; + void *p; + + fd = open(name, O_RDONLY); + if (fd < 0) { + perror(name); + return 0; + } + if (fstat(fd, &sb) < 0) { + perror(name); + close(fd); + return 0; + } + if (!S_ISREG(sb.st_mode)) { + close(fd); + fprintf(stderr, "%s: not a regular file\n", name); + return 0; + } + + nbytes = sb.st_size; + p = (void *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ, + MAP_FILE|MAP_PRIVATE, fd, (off_t)0); + if (p == (void *)-1) { + perror(name); + close(fd); + return 0; + } + processor(p, nbytes, name, arg); + munmap((caddr_t)p, nbytes); + close(fd); + return 1; +} diff --git a/expat/xmlwf/wfcheck.c b/expat/xmlwf/wfcheck.c new file mode 100644 index 000000000..c18abd290 --- /dev/null +++ b/expat/xmlwf/wfcheck.c @@ -0,0 +1,953 @@ +#include <stdlib.h> +#include <string.h> + +#include "wfcheck.h" +#include "hashtable.h" + +#include "xmltok.h" +#include "xmlrole.h" + +typedef struct { + const char *name; + const char *textPtr; + size_t textLen; + const char *docTextPtr; + const char *systemId; + const char *publicId; + const char *notation; + char open; + char wfInContent; + char wfInAttribute; + char magic; +} ENTITY; + +#define INIT_BLOCK_SIZE 1024 + +typedef struct block { + struct block *next; + char s[1]; +} BLOCK; + +typedef struct { + BLOCK *blocks; + const char *end; + char *ptr; + char *start; +} STRING_POOL; + +typedef struct { + HASH_TABLE generalEntities; + HASH_TABLE paramEntities; + STRING_POOL pool; + int containsRef; + int standalone; + char *groupConnector; + size_t groupSize; +} DTD; + +typedef struct { + DTD dtd; + size_t stackSize; + const char **startName; + int attsSize; + ATTRIBUTE *atts; +} CONTEXT; + +static void poolInit(STRING_POOL *); +static void poolDestroy(STRING_POOL *); +static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static int poolGrow(STRING_POOL *); +static int dtdInit(DTD *); +static void dtdDestroy(DTD *); +static int contextInit(CONTEXT *); +static void contextDestroy(CONTEXT *); + +#define poolStart(pool) ((pool)->start) +#define poolDiscard(pool) ((pool)->ptr = (pool)->start) +#define poolFinish(pool) ((pool)->start = (pool)->ptr) + +static enum WfCheckResult +checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc); +static enum WfCheckResult +checkContent(size_t level, CONTEXT *context, const ENCODING *enc, + const char *s, const char *end, const char **badPtr); +static enum WfCheckResult +checkGeneralTextEntity(CONTEXT *context, + const char *s, const char *end, + const char **nextPtr, + const ENCODING **enc); +static enum WfCheckResult +checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **); +static enum WfCheckResult +checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts, + const char **badPtr); +static enum WfCheckResult +checkParsedEntities(CONTEXT *context, const char **badPtr); + +static +enum WfCheckResult storeEntity(DTD *dtd, + const ENCODING *enc, + int isParam, + const char *entityNamePtr, + const char *entityNameEnd, + const char *entityTextPtr, + const char *entityTextEnd, + const char **badPtr); + + +enum WfCheckResult +wfCheck(enum EntityType entityType, const char *s, size_t n, + const char **badPtr, unsigned long *badLine, unsigned long *badCol) +{ + CONTEXT context; + const ENCODING *enc; + const char *start = s; + const char *end = s + n; + const char *next = 0; + enum WfCheckResult result; + + if (!contextInit(&context)) { + contextDestroy(&context); + return noMemory; + } + if (entityType == documentEntity) { + result = checkProlog(&context.dtd, s, end, &next, &enc); + s = next; + if (!result) { + result = checkParsedEntities(&context, &next); + s = next; + if (!result) { + result = checkContent(0, &context, enc, s, end, &next); + s = next; + } + } + } + else { + result = checkGeneralTextEntity(&context, s, end, &next, &enc); + s = next; + } + if (result && s) { + POSITION pos; + memset(&pos, 0, sizeof(POSITION)); + XmlUpdatePosition(enc, start, s, &pos); + *badPtr = s; + *badLine = pos.lineNumber; + *badCol = pos.columnNumber; + } + contextDestroy(&context); + return result; +} + +static +int contextInit(CONTEXT *p) +{ + p->stackSize = 1024; + p->startName = malloc(p->stackSize * sizeof(char *)); + p->attsSize = 1024; + p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE)); + return dtdInit(&(p->dtd)) && p->atts && p->startName; +} + +static +void contextDestroy(CONTEXT *p) +{ + dtdDestroy(&(p->dtd)); + free((void *)p->startName); + free((void *)p->atts); +} + +static enum WfCheckResult +checkContent(size_t level, CONTEXT *context, const ENCODING *enc, + const char *s, const char *end, const char **badPtr) +{ + size_t startLevel = level; + const char *next; + int tok = XmlContentTok(enc, s, end, &next); + for (;;) { + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + if (startLevel > 0) { + if (level != startLevel) { + *badPtr = s; + return asyncEntity; + } + return wellFormed; + } + *badPtr = s; + return noElements; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + case XML_TOK_PARTIAL: + *badPtr = s; + return unclosedToken; + case XML_TOK_PARTIAL_CHAR: + *badPtr = s; + return partialChar; + case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + break; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&context->dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0); + poolDiscard(&context->dtd.pool); + if (!entity) { + if (!context->dtd.containsRef || context->dtd.standalone) { + *badPtr = s; + return undefinedEntity; + } + break; + } + if (entity->wfInContent) + break; + if (entity->open) { + *badPtr = s; + return recursiveEntityRef; + } + if (entity->notation) { + *badPtr = s; + return binaryEntityRef; + } + if (entity) { + if (entity->textPtr) { + enum WfCheckResult result; + const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING); + entity->open = 1; + result = checkContent(level, context, internalEnc, + entity->textPtr, entity->textPtr + entity->textLen, + badPtr); + entity->open = 0; + if (result && *badPtr) { + *badPtr = s; + return result; + } + entity->wfInContent = 1; + } + } + break; + } + case XML_TOK_START_TAG_NO_ATTS: + if (level == context->stackSize) { + context->startName + = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *)); + if (!context->startName) + return noMemory; + } + context->startName[level++] = s + enc->minBytesPerChar; + break; + case XML_TOK_START_TAG_WITH_ATTS: + if (level == context->stackSize) { + context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *)); + if (!context->startName) + return noMemory; + } + context->startName[level++] = s + enc->minBytesPerChar; + /* fall through */ + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: + { + int i; + int n = XmlGetAttributes(enc, s, context->attsSize, context->atts); + if (n > context->attsSize) { + context->attsSize = 2*n; + context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE)); + if (!context->atts) + return noMemory; + XmlGetAttributes(enc, s, n, context->atts); + } + for (i = 0; i < n; i++) { + if (!context->atts[i].normalized) { + enum WfCheckResult result + = checkAttributeValue(&context->dtd, enc, + context->atts[i].valuePtr, + context->atts[i].valueEnd, + badPtr); + if (result) + return result; + } + } + if (i > 1) { + enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr); + if (result) + return result; + } + } + break; + case XML_TOK_END_TAG: + if (level == startLevel) { + *badPtr = s; + return asyncEntity; + } + --level; + if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) { + *badPtr = s; + return tagMismatch; + } + break; + case XML_TOK_CHAR_REF: + if (XmlCharRefNumber(enc, s) < 0) { + *badPtr = s; + return badCharRef; + } + break; + case XML_TOK_XML_DECL: + *badPtr = s; + return misplacedXmlPi; + } + s = next; + if (level == 0) { + do { + tok = XmlPrologTok(enc, s, end, &next); + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + return wellFormed; + case XML_TOK_PROLOG_S: + case XML_TOK_COMMENT: + case XML_TOK_PI: + s = next; + break; + default: + if (tok > 0) { + *badPtr = s; + return junkAfterDocElement; + } + break; + } + } while (tok > 0); + } + else + tok = XmlContentTok(enc, s, end, &next); + } + /* not reached */ +} + +static +int attcmp(const void *p1, const void *p2) +{ + const ATTRIBUTE *a1 = p1; + const ATTRIBUTE *a2 = p2; + size_t n1 = a1->valuePtr - a1->name; + size_t n2 = a2->valuePtr - a2->name; + + if (n1 == n2) { + int n = memcmp(a1->name, a2->name, n1); + if (n) + return n; + /* Sort identical attribute names by position, so that we always + report the first duplicate attribute. */ + if (a1->name < a2->name) + return -1; + else if (a1->name > a2->name) + return 1; + else + return 0; + } + else if (n1 < n2) + return -1; + else + return 1; +} + +/* Note that this trashes the attribute values. */ + +static enum WfCheckResult +checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts, + const char **badPtr) +{ +#define QSORT_MIN_ATTS 10 + if (nAtts < QSORT_MIN_ATTS) { + int i; + for (i = 1; i < nAtts; i++) { + int j; + for (j = 0; j < i; j++) { + if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) { + *badPtr = context->atts[i].name; + return duplicateAttribute; + } + } + } + } + else { + int i; + const char *dup = 0; + /* Store the end of the name in valuePtr */ + for (i = 0; i < nAtts; i++) { + ATTRIBUTE *a = context->atts + i; + a->valuePtr = a->name + XmlNameLength(enc, a->name); + } + qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp); + for (i = 1; i < nAtts; i++) { + ATTRIBUTE *a = context->atts + i; + if (XmlSameName(enc, a->name, a[-1].name)) { + if (!dup || a->name < dup) + dup = a->name; + } + } + if (dup) { + *badPtr = dup; + return duplicateAttribute; + } + } + return wellFormed; +} + +static enum WfCheckResult +checkProlog(DTD *dtd, const char *s, const char *end, + const char **nextPtr, const ENCODING **enc) +{ + const char *entityNamePtr, *entityNameEnd; + int entityIsParam; + PROLOG_STATE state; + ENTITY *entity; + INIT_ENCODING initEnc; + XmlInitEncoding(&initEnc, enc); + XmlPrologStateInit(&state); + for (;;) { + const char *next; + int tok = XmlPrologTok(*enc, s, end, &next); + switch (XmlTokenRole(&state, tok, s, next, *enc)) { + case XML_ROLE_XML_DECL: + { + const char *encodingName = 0; + const ENCODING *encoding = 0; + const char *version; + int standalone = -1; + if (!XmlParseXmlDecl(0, + *enc, + s, + next, + nextPtr, + &version, + &encodingName, + &encoding, + &standalone)) + return syntaxError; + if (encoding) { + if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { + *nextPtr = encodingName; + return incorrectEncoding; + } + *enc = encoding; + } + else if (encodingName) { + *nextPtr = encodingName; + return unknownEncoding; + } + if (standalone == 1) + dtd->standalone = 1; + break; + } + case XML_ROLE_DOCTYPE_SYSTEM_ID: + dtd->containsRef = 1; + break; + case XML_ROLE_DOCTYPE_PUBLIC_ID: + case XML_ROLE_ENTITY_PUBLIC_ID: + case XML_ROLE_NOTATION_PUBLIC_ID: + if (!XmlIsPublicId(*enc, s, next, nextPtr)) + return syntaxError; + break; + case XML_ROLE_INSTANCE_START: + *nextPtr = s; + return wellFormed; + case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: + case XML_ROLE_FIXED_ATTRIBUTE_VALUE: + { + const char *tem = 0; + enum WfCheckResult result + = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar, + &tem); + if (result) { + if (tem) + *nextPtr = tem; + return result; + } + break; + } + case XML_ROLE_ENTITY_VALUE: + { + enum WfCheckResult result + = storeEntity(dtd, + *enc, + entityIsParam, + entityNamePtr, + entityNameEnd, + s, + next, + nextPtr); + if (result != wellFormed) + return result; + } + break; + case XML_ROLE_ENTITY_SYSTEM_ID: + { + const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd); + entity = (ENTITY *)lookup(entityIsParam ? &dtd->paramEntities : &dtd->generalEntities, + name, sizeof(ENTITY)); + if (entity->name != name) { + poolDiscard(&dtd->pool); + entity = 0; + } + else { + poolFinish(&dtd->pool); + entity->systemId = poolStoreString(&dtd->pool, *enc, + s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar); + poolFinish(&dtd->pool); + } + } + break; + case XML_ROLE_PARAM_ENTITY_REF: + { + const char *name = poolStoreString(&dtd->pool, *enc, + s + (*enc)->minBytesPerChar, + next - (*enc)->minBytesPerChar); + ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + poolDiscard(&dtd->pool); + if (!entity) { + if (!dtd->containsRef || dtd->standalone) { + *nextPtr = s; + return undefinedEntity; + } + } + } + break; + case XML_ROLE_ENTITY_NOTATION_NAME: + if (entity) { + entity->notation = poolStoreString(&dtd->pool, *enc, s, next); + poolFinish(&dtd->pool); + } + break; + case XML_ROLE_GENERAL_ENTITY_NAME: + entityNamePtr = s; + entityNameEnd = next; + entityIsParam = 0; + break; + case XML_ROLE_PARAM_ENTITY_NAME: + entityNamePtr = s; + entityNameEnd = next; + entityIsParam = 1; + break; + case XML_ROLE_ERROR: + *nextPtr = s; + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + return paramEntityRef; + case XML_TOK_INVALID: + *nextPtr = next; + return invalidToken; + case XML_TOK_NONE: + return noElements; + case XML_TOK_PARTIAL: + return unclosedToken; + case XML_TOK_PARTIAL_CHAR: + return partialChar; + case XML_TOK_TRAILING_CR: + *nextPtr = s + (*enc)->minBytesPerChar; + return noElements; + case XML_TOK_XML_DECL: + return misplacedXmlPi; + default: + return syntaxError; + } + case XML_ROLE_GROUP_OPEN: + if (state.level >= dtd->groupSize) { + if (dtd->groupSize) + dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2); + else + dtd->groupConnector = malloc(dtd->groupSize = 32); + if (!dtd->groupConnector) + return noMemory; + } + dtd->groupConnector[state.level] = 0; + break; + case XML_ROLE_GROUP_SEQUENCE: + if (dtd->groupConnector[state.level] == '|') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = ','; + break; + case XML_ROLE_GROUP_CHOICE: + if (dtd->groupConnector[state.level] == ',') { + *nextPtr = s; + return syntaxError; + } + dtd->groupConnector[state.level] = '|'; + break; + case XML_ROLE_NONE: + if (tok == XML_TOK_PARAM_ENTITY_REF) + dtd->containsRef = 1; + break; + } + s = next; + } + /* not reached */ +} + +static enum WfCheckResult +checkParsedEntities(CONTEXT *context, const char **badPtr) +{ + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &context->dtd.generalEntities); + for (;;) { + ENTITY *entity = (ENTITY *)hashTableIterNext(&iter); + if (!entity) + break; + if (entity->textPtr && !entity->wfInContent && !entity->magic) { + enum WfCheckResult result; + const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING); + entity->open = 1; + result = checkContent(1, context, internalEnc, + entity->textPtr, entity->textPtr + entity->textLen, + badPtr); + entity->open = 0; + if (result && *badPtr) { + *badPtr = entity->docTextPtr; + return result; + } + entity->wfInContent = 1; + } + } + return wellFormed; +} + +static enum WfCheckResult +checkGeneralTextEntity(CONTEXT *context, + const char *s, const char *end, + const char **nextPtr, + const ENCODING **enc) +{ + INIT_ENCODING initEnc; + const char *next; + int tok; + + XmlInitEncoding(&initEnc, enc); + tok = XmlContentTok(*enc, s, end, &next); + + if (tok == XML_TOK_BOM) { + s = next; + tok = XmlContentTok(*enc, s, end, &next); + } + if (tok == XML_TOK_XML_DECL) { + const char *encodingName = 0; + const ENCODING *encoding = 0; + const char *version; + if (!XmlParseXmlDecl(1, + *enc, + s, + next, + nextPtr, + &version, + &encodingName, + &encoding, + 0)) + return syntaxError; + if (encoding) { + if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) { + *nextPtr = encodingName; + return incorrectEncoding; + } + *enc = encoding; + } + else if (encodingName) { + *nextPtr = encodingName; + return unknownEncoding; + } + s = next; + } + context->dtd.containsRef = 1; + return checkContent(1, context, *enc, s, end, nextPtr); +} + +static enum WfCheckResult +checkAttributeValue(DTD *dtd, const ENCODING *enc, + const char *ptr, const char *end, const char **badPtr) +{ + for (;;) { + const char *next; + int tok = XmlAttributeValueTok(enc, ptr, end, &next); + switch (tok) { + case XML_TOK_TRAILING_CR: + case XML_TOK_NONE: + return wellFormed; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + case XML_TOK_PARTIAL: + *badPtr = ptr; + return invalidToken; + case XML_TOK_CHAR_REF: + if (XmlCharRefNumber(enc, ptr) < 0) { + *badPtr = ptr; + return badCharRef; + } + break; + case XML_TOK_DATA_CHARS: + case XML_TOK_DATA_NEWLINE: + break; + case XML_TOK_ENTITY_REF: + { + const char *name = poolStoreString(&dtd->pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + if (!entity) { + if (!dtd->containsRef) { + *badPtr = ptr; + return undefinedEntity; + } + break; + } + if (entity->wfInAttribute) + break; + if (entity->open) { + *badPtr = ptr; + return recursiveEntityRef; + } + if (entity->notation) { + *badPtr = ptr; + return binaryEntityRef; + } + if (entity) { + if (entity->textPtr) { + enum WfCheckResult result; + const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING); + const char *textEnd = entity->textPtr + entity->textLen; + entity->open = 1; + result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr); + entity->open = 0; + if (result && *badPtr) { + *badPtr = ptr; + return result; + } + entity->wfInAttribute = 1; + } + else { + *badPtr = ptr; + return attributeExternalEntityRef; + } + } + break; + } + break; + default: + abort(); + } + ptr = next; + } + /* not reached */ +} + +static +void poolInit(STRING_POOL *pool) +{ + pool->blocks = 0; + pool->start = 0; + pool->ptr = 0; + pool->end = 0; +} + +static +void poolDestroy(STRING_POOL *pool) +{ + BLOCK *p = pool->blocks; + while (p) { + BLOCK *tem = p->next; + free(p); + p = tem; + } + pool->blocks = 0; + pool->ptr = 0; + pool->start = 0; + pool->end = 0; +} + +static +const char *poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + for (;;) { + XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end); + if (ptr == end) + break; + if (!poolGrow(pool)) + return 0; + } + return pool->start; +} + +static +const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) +{ + if (!poolAppend(pool, enc, ptr, end)) + return 0; + if (pool->ptr == pool->end && !poolGrow(pool)) + return 0; + *(pool->ptr)++ = 0; + return pool->start; +} + +static +int poolGrow(STRING_POOL *pool) +{ + if (pool->blocks && pool->start == pool->blocks->s) { + size_t blockSize = (pool->end - pool->start)*2; + pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize); + if (!pool->blocks) + return 0; + pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->start = pool->blocks->s; + pool->end = pool->start + blockSize; + } + else { + BLOCK *tem; + size_t blockSize = pool->end - pool->start; + if (blockSize < INIT_BLOCK_SIZE) + blockSize = INIT_BLOCK_SIZE; + else + blockSize *= 2; + tem = malloc(offsetof(BLOCK, s) + blockSize); + if (!tem) + return 0; + tem->next = pool->blocks; + pool->blocks = tem; + memcpy(tem->s, pool->start, pool->ptr - pool->start); + pool->ptr = tem->s + (pool->ptr - pool->start); + pool->start = tem->s; + pool->end = tem->s + blockSize; + } + return 1; +} + +static int dtdInit(DTD *dtd) +{ + static const char *names[] = { "lt", "amp", "gt", "quot", "apos" }; + static const char chars[] = { '<', '&', '>', '"', '\'' }; + int i; + + poolInit(&(dtd->pool)); + hashTableInit(&(dtd->generalEntities)); + for (i = 0; i < 5; i++) { + ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY)); + if (!entity) + return 0; + entity->textPtr = chars + i; + entity->textLen = 1; + entity->magic = 1; + entity->wfInContent = 1; + entity->wfInAttribute = 1; + } + hashTableInit(&(dtd->paramEntities)); + dtd->containsRef = 0; + dtd->groupSize = 0; + dtd->groupConnector = 0; + return 1; +} + +static void dtdDestroy(DTD *dtd) +{ + poolDestroy(&(dtd->pool)); + hashTableDestroy(&(dtd->generalEntities)); + hashTableDestroy(&(dtd->paramEntities)); + free(dtd->groupConnector); +} + +static +enum WfCheckResult storeEntity(DTD *dtd, + const ENCODING *enc, + int isParam, + const char *entityNamePtr, + const char *entityNameEnd, + const char *entityTextPtr, + const char *entityTextEnd, + const char **badPtr) +{ + ENTITY *entity; + const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING); + STRING_POOL *pool = &(dtd->pool); + if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd)) + return noMemory; + entity = (ENTITY *)lookup(isParam ? &(dtd->paramEntities) : &(dtd->generalEntities), + pool->start, + sizeof(ENTITY)); + if (entity->name != pool->start) { + poolDiscard(pool); + entityNamePtr = 0; + } + else + poolFinish(pool); + entityTextPtr += enc->minBytesPerChar; + entityTextEnd -= enc->minBytesPerChar; + entity->docTextPtr = entityTextPtr; + for (;;) { + const char *next; + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + *badPtr = entityTextPtr; + return syntaxError; + case XML_TOK_NONE: + if (entityNamePtr) { + entity->textPtr = pool->start; + entity->textLen = pool->ptr - pool->start; + poolFinish(pool); + } + else + poolDiscard(pool); + return wellFormed; + case XML_TOK_ENTITY_REF: + case XML_TOK_DATA_CHARS: + if (!poolAppend(pool, enc, entityTextPtr, next)) + return noMemory; + break; + case XML_TOK_TRAILING_CR: + next = entityTextPtr + enc->minBytesPerChar; + /* fall through */ + case XML_TOK_DATA_NEWLINE: + if (pool->end == pool->ptr && !poolGrow(pool)) + return noMemory; + *(pool->ptr)++ = '\n'; + break; + case XML_TOK_CHAR_REF: + { + char buf[XML_MAX_BYTES_PER_CHAR]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + *badPtr = entityTextPtr; + return badCharRef; + } + n = XmlEncode(utf8, n, buf); + if (!n) { + *badPtr = entityTextPtr; + return badCharRef; + } + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && !poolGrow(pool)) + return noMemory; + *(pool->ptr)++ = buf[i]; + } + } + break; + case XML_TOK_PARTIAL: + *badPtr = entityTextPtr; + return invalidToken; + case XML_TOK_INVALID: + *badPtr = next; + return invalidToken; + default: + abort(); + } + entityTextPtr = next; + } + /* not reached */ +} diff --git a/expat/xmlwf/wfcheck.h b/expat/xmlwf/wfcheck.h new file mode 100644 index 000000000..ea8fe5735 --- /dev/null +++ b/expat/xmlwf/wfcheck.h @@ -0,0 +1,38 @@ + +#include <stddef.h> + +enum WfCheckResult { + wellFormed, + noMemory, + syntaxError, + noElements, + invalidToken, + unclosedToken, + partialChar, + tagMismatch, + duplicateAttribute, + junkAfterDocElement, + paramEntityRef, + undefinedEntity, + recursiveEntityRef, + asyncEntity, + badCharRef, + binaryEntityRef, + attributeExternalEntityRef, + misplacedXmlPi, + unknownEncoding, + incorrectEncoding +}; + +enum EntityType { + documentEntity, + generalTextEntity +}; + +enum WfCheckResult wfCheck(enum EntityType entityType, + const char *s, size_t n, + const char **errorPtr, + unsigned long *errorLineNumber, + unsigned long *errorColNumber); +const char *wfCheckMessage(enum WfCheckResult); + diff --git a/expat/xmlwf/wfcheckmessage.c b/expat/xmlwf/wfcheckmessage.c new file mode 100644 index 000000000..95e811f49 --- /dev/null +++ b/expat/xmlwf/wfcheckmessage.c @@ -0,0 +1,30 @@ +#include "wfcheck.h" + +const char *wfCheckMessage(enum WfCheckResult result) +{ + static const char *message[] = { + 0, + "out of memory", + "syntax error", + "no element found", + "not well-formed", + "unclosed token", + "unclosed token", + "mismatched tag", + "duplicate attribute", + "junk after document element", + "parameter entity reference not allowed within declaration in internal subset", + "undefined entity", + "recursive entity reference", + "asynchronous entity", + "reference to invalid character number", + "reference to binary entity", + "reference to external entity in attribute", + "xml pi not at start of external entity", + "unknown encoding", + "encoding specified in XML declaration is incorrect" + }; + if (result > 0 && result < sizeof(message)/sizeof(message[0])) + return message[result]; + return 0; +} diff --git a/expat/xmlwf/win32filemap.c b/expat/xmlwf/win32filemap.c new file mode 100644 index 000000000..7546fea89 --- /dev/null +++ b/expat/xmlwf/win32filemap.c @@ -0,0 +1,95 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#define STRICT 1 +#define WIN32_LEAN_AND_MEAN 1 + +#ifdef XML_UNICODE_WCHAR_T +#ifndef XML_UNICODE +#define XML_UNICODE +#endif +#endif + +#ifdef XML_UNICODE +#define UNICODE +#define _UNICODE +#endif /* XML_UNICODE */ +#include <windows.h> +#include <stdio.h> +#include <tchar.h> +#include "filemap.h" + +static void win32perror(const TCHAR *); + +int filemap(const TCHAR *name, + void (*processor)(const void *, size_t, const TCHAR *, void *arg), + void *arg) +{ + HANDLE f; + HANDLE m; + DWORD size; + DWORD sizeHi; + void *p; + + f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_FLAG_SEQUENTIAL_SCAN, NULL); + if (f == INVALID_HANDLE_VALUE) { + win32perror(name); + return 0; + } + size = GetFileSize(f, &sizeHi); + if (size == (DWORD)-1) { + win32perror(name); + return 0; + } + if (sizeHi) { + _ftprintf(stderr, _T("%s: bigger than 2Gb\n"), name); + return 0; + } + /* CreateFileMapping barfs on zero length files */ + if (size == 0) { + static const char c = '\0'; + processor(&c, 0, name, arg); + CloseHandle(f); + return 1; + } + m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL); + if (m == NULL) { + win32perror(name); + CloseHandle(f); + return 0; + } + p = MapViewOfFile(m, FILE_MAP_READ, 0, 0, 0); + if (p == NULL) { + win32perror(name); + CloseHandle(m); + CloseHandle(f); + return 0; + } + processor(p, size, name, arg); + UnmapViewOfFile(p); + CloseHandle(m); + CloseHandle(f); + return 1; +} + +static +void win32perror(const TCHAR *s) +{ + LPVOID buf; + if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, + GetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &buf, + 0, + NULL)) { + _ftprintf(stderr, _T("%s: %s"), s, buf); + fflush(stderr); + LocalFree(buf); + } + else + _ftprintf(stderr, _T("%s: unknown Windows error\n"), s); +} diff --git a/expat/xmlwf/xmlfile.c b/expat/xmlwf/xmlfile.c new file mode 100644 index 000000000..ba79b23b3 --- /dev/null +++ b/expat/xmlwf/xmlfile.c @@ -0,0 +1,217 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <fcntl.h> +#include "expat.h" +#include "xmlfile.h" +#include "xmltchar.h" +#include "filemap.h" + +#ifdef _MSC_VER +#include <io.h> +#endif + +#ifdef _POSIX_SOURCE +#include <unistd.h> +#endif + +#ifndef O_BINARY +#ifdef _O_BINARY +#define O_BINARY _O_BINARY +#else +#define O_BINARY 0 +#endif +#endif + +#ifdef _DEBUG +#define READ_SIZE 16 +#else +#define READ_SIZE (1024*8) +#endif + + + +typedef struct { + XML_Parser parser; + int *retPtr; +} PROCESS_ARGS; + +static +void reportError(XML_Parser parser, const XML_Char *filename) +{ + int code = XML_GetErrorCode(parser); + const XML_Char *message = XML_ErrorString(code); + if (message) + ftprintf(stdout, T("%s:%d:%d: %s\n"), + filename, + XML_GetErrorLineNumber(parser), + XML_GetErrorColumnNumber(parser), + message); + else + ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code); +} + +static +void processFile(const void *data, + size_t size, + const XML_Char *filename, + void *args) +{ + XML_Parser parser = ((PROCESS_ARGS *)args)->parser; + int *retPtr = ((PROCESS_ARGS *)args)->retPtr; + if (!XML_Parse(parser, data, size, 1)) { + reportError(parser, filename); + *retPtr = 0; + } + else + *retPtr = 1; +} + +#ifdef WIN32 + +static +int isAsciiLetter(XML_Char c) +{ + return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); +} + +#endif /* WIN32 */ + +static +const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree) +{ + XML_Char *s; + *toFree = 0; + if (!base + || *systemId == T('/') +#ifdef WIN32 + || *systemId == T('\\') + || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) +#endif + ) + return systemId; + *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char)); + if (!*toFree) + return systemId; + tcscpy(*toFree, base); + s = *toFree; + if (tcsrchr(s, T('/'))) + s = tcsrchr(s, T('/')) + 1; +#ifdef WIN32 + if (tcsrchr(s, T('\\'))) + s = tcsrchr(s, T('\\')) + 1; +#endif + tcscpy(s, systemId); + return *toFree; +} + +static +int externalEntityRefFilemap(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + int result; + XML_Char *s; + const XML_Char *filename; + XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); + PROCESS_ARGS args; + args.retPtr = &result; + args.parser = entParser; + filename = resolveSystemId(base, systemId, &s); + XML_SetBase(entParser, filename); + if (!filemap(filename, processFile, &args)) + result = 0; + free(s); + XML_ParserFree(entParser); + return result; +} + +static +int processStream(const XML_Char *filename, XML_Parser parser) +{ + int fd = topen(filename, O_BINARY|O_RDONLY); + if (fd < 0) { + tperror(filename); + return 0; + } + for (;;) { + int nread; + char *buf = XML_GetBuffer(parser, READ_SIZE); + if (!buf) { + close(fd); + ftprintf(stderr, T("%s: out of memory\n"), filename); + return 0; + } + nread = read(fd, buf, READ_SIZE); + if (nread < 0) { + tperror(filename); + close(fd); + return 0; + } + if (!XML_ParseBuffer(parser, nread, nread == 0)) { + reportError(parser, filename); + close(fd); + return 0; + } + if (nread == 0) { + close(fd); + break;; + } + } + return 1; +} + +static +int externalEntityRefStream(XML_Parser parser, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + XML_Char *s; + const XML_Char *filename; + int ret; + XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); + filename = resolveSystemId(base, systemId, &s); + XML_SetBase(entParser, filename); + ret = processStream(filename, entParser); + free(s); + XML_ParserFree(entParser); + return ret; +} + +int XML_ProcessFile(XML_Parser parser, + const XML_Char *filename, + unsigned flags) +{ + int result; + + if (!XML_SetBase(parser, filename)) { + ftprintf(stderr, T("%s: out of memory"), filename); + exit(1); + } + + if (flags & XML_EXTERNAL_ENTITIES) + XML_SetExternalEntityRefHandler(parser, + (flags & XML_MAP_FILE) + ? externalEntityRefFilemap + : externalEntityRefStream); + if (flags & XML_MAP_FILE) { + PROCESS_ARGS args; + args.retPtr = &result; + args.parser = parser; + if (!filemap(filename, processFile, &args)) + result = 0; + } + else + result = processStream(filename, parser); + return result; +} diff --git a/expat/xmlwf/xmlfile.h b/expat/xmlwf/xmlfile.h new file mode 100644 index 000000000..5d6bdce45 --- /dev/null +++ b/expat/xmlwf/xmlfile.h @@ -0,0 +1,11 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#define XML_MAP_FILE 01 +#define XML_EXTERNAL_ENTITIES 02 + +extern int XML_ProcessFile(XML_Parser parser, + const XML_Char *filename, + unsigned flags); diff --git a/expat/xmlwf/xmlmime.c b/expat/xmlwf/xmlmime.c new file mode 100644 index 000000000..4b32c86fe --- /dev/null +++ b/expat/xmlwf/xmlmime.c @@ -0,0 +1,161 @@ +#include <string.h> +#include "xmlmime.h" + +static +const char *getTok(const char **pp) +{ + /* inComment means one level of nesting; inComment+1 means two levels etc */ + enum { inAtom, inString, init, inComment }; + int state = init; + const char *tokStart = 0; + for (;;) { + switch (**pp) { + case '\0': + if (state == inAtom) + return tokStart; + return 0; + case ' ': + case '\r': + case '\t': + case '\n': + if (state == inAtom) + return tokStart; + break; + case '(': + if (state == inAtom) + return tokStart; + if (state != inString) + state++; + break; + case ')': + if (state > init) + --state; + else if (state != inString) + return 0; + break; + case ';': + case '/': + case '=': + if (state == inAtom) + return tokStart; + if (state == init) + return (*pp)++; + break; + case '\\': + ++*pp; + if (**pp == '\0') + return 0; + break; + case '"': + switch (state) { + case inString: + ++*pp; + return tokStart; + case inAtom: + return tokStart; + case init: + tokStart = *pp; + state = inString; + break; + } + break; + default: + if (state == init) { + tokStart = *pp; + state = inAtom; + } + break; + } + ++*pp; + } + /* not reached */ +} + +/* key must be lowercase ASCII */ + +static +int matchkey(const char *start, const char *end, const char *key) +{ + if (!start) + return 0; + for (; start != end; start++, key++) + if (*start != *key && *start != 'A' + (*key - 'a')) + return 0; + return *key == '\0'; +} + +void getXMLCharset(const char *buf, char *charset) +{ + const char *next, *p; + + charset[0] = '\0'; + next = buf; + p = getTok(&next); + if (matchkey(p, next, "text")) + strcpy(charset, "us-ascii"); + else if (!matchkey(p, next, "application")) + return; + p = getTok(&next); + if (!p || *p != '/') + return; + p = getTok(&next); +#if 0 + if (!matchkey(p, next, "xml") && charset[0] == '\0') + return; +#endif + p = getTok(&next); + while (p) { + if (*p == ';') { + p = getTok(&next); + if (matchkey(p, next, "charset")) { + p = getTok(&next); + if (p && *p == '=') { + p = getTok(&next); + if (p) { + char *s = charset; + if (*p == '"') { + while (++p != next - 1) { + if (*p == '\\') + ++p; + if (s == charset + CHARSET_MAX - 1) { + charset[0] = '\0'; + break; + } + *s++ = *p; + } + *s++ = '\0'; + } + else { + if (next - p > CHARSET_MAX - 1) + break; + while (p != next) + *s++ = *p++; + *s = 0; + break; + } + } + } + break; + } + } + else + p = getTok(&next); + } +} + +#ifdef TEST + +#include <stdio.h> + +int main(int argc, char **argv) +{ + char buf[CHARSET_MAX]; + if (argc <= 1) + return 1; + printf("%s\n", argv[1]); + getXMLCharset(argv[1], buf); + printf("charset=\"%s\"\n", buf); + return 0; +} + +#endif /* TEST */ diff --git a/expat/xmlwf/xmlmime.h b/expat/xmlwf/xmlmime.h new file mode 100644 index 000000000..79654b69b --- /dev/null +++ b/expat/xmlwf/xmlmime.h @@ -0,0 +1,19 @@ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Registered charset names are at most 40 characters long. */ + +#define CHARSET_MAX 41 + +/* Figure out the charset to use from the ContentType. +buf contains the body of the header field (the part after "Content-Type:"). +charset gets the charset to use. It must be at least CHARSET_MAX chars long. +charset will be empty if the default charset should be used. */ + +void getXMLCharset(const char *buf, char *charset); + +#ifdef __cplusplus +} +#endif diff --git a/expat/xmlwf/xmltchar.h b/expat/xmlwf/xmltchar.h new file mode 100644 index 000000000..108857551 --- /dev/null +++ b/expat/xmlwf/xmltchar.h @@ -0,0 +1,36 @@ +#ifdef XML_UNICODE +#ifndef XML_UNICODE_WCHAR_T +#error xmlwf requires a 16-bit Unicode-compatible wchar_t +#endif +#define T(x) L ## x +#define ftprintf fwprintf +#define tfopen _wfopen +#define fputts fputws +#define puttc putwc +#define tcscmp wcscmp +#define tcscpy wcscpy +#define tcscat wcscat +#define tcschr wcschr +#define tcsrchr wcsrchr +#define tcslen wcslen +#define tperror _wperror +#define topen _wopen +#define tmain wmain +#define tremove _wremove +#else /* not XML_UNICODE */ +#define T(x) x +#define ftprintf fprintf +#define tfopen fopen +#define fputts fputs +#define puttc putc +#define tcscmp strcmp +#define tcscpy strcpy +#define tcscat strcat +#define tcschr strchr +#define tcsrchr strrchr +#define tcslen strlen +#define tperror perror +#define topen open +#define tmain main +#define tremove remove +#endif /* not XML_UNICODE */ diff --git a/expat/xmlwf/xmlurl.h b/expat/xmlwf/xmlurl.h new file mode 100644 index 000000000..3ab172688 --- /dev/null +++ b/expat/xmlwf/xmlurl.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int XML_URLInit(); +void XML_URLUninit(); +int XML_ProcessURL(XML_Parser parser, + const XML_Char *url, + unsigned flags); + +#ifdef __cplusplus +} +#endif diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c new file mode 100644 index 000000000..891c0db5f --- /dev/null +++ b/expat/xmlwf/xmlwf.c @@ -0,0 +1,759 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> + +#include "expat.h" +#include "codepage.h" +#include "xmlfile.h" +#include "xmltchar.h" + +#ifdef _MSC_VER +#include <crtdbg.h> +#endif + +/* This ensures proper sorting. */ + +#define NSSEP T('\001') + +static void characterData(void *userData, const XML_Char *s, int len) +{ + FILE *fp = userData; + for (; len > 0; --len, ++s) { + switch (*s) { + case T('&'): + fputts(T("&"), fp); + break; + case T('<'): + fputts(T("<"), fp); + break; + case T('>'): + fputts(T(">"), fp); + break; +#ifdef W3C14N + case 13: + fputts(T("
"), fp); + break; +#else + case T('"'): + fputts(T("""), fp); + break; + case 9: + case 10: + case 13: + ftprintf(fp, T("&#%d;"), *s); + break; +#endif + default: + puttc(*s, fp); + break; + } + } +} + +static void attributeValue(FILE *fp, const XML_Char *s) +{ + puttc(T('='), fp); + puttc(T('"'), fp); + for (;;) { + switch (*s) { + case 0: + case NSSEP: + puttc(T('"'), fp); + return; + case T('&'): + fputts(T("&"), fp); + break; + case T('<'): + fputts(T("<"), fp); + break; + case T('"'): + fputts(T("""), fp); + break; +#ifdef W3C14N + case 9: + fputts(T("	"), fp); + break; + case 10: + fputts(T("
"), fp); + break; + case 13: + fputts(T("
"), fp); + break; +#else + case T('>'): + fputts(T(">"), fp); + break; + case 9: + case 10: + case 13: + ftprintf(fp, T("&#%d;"), *s); + break; +#endif + default: + puttc(*s, fp); + break; + } + s++; + } +} + +/* Lexicographically comparing UTF-8 encoded attribute values, +is equivalent to lexicographically comparing based on the character number. */ + +static int attcmp(const void *att1, const void *att2) +{ + return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); +} + +static void startElement(void *userData, const XML_Char *name, const XML_Char **atts) +{ + int nAtts; + const XML_Char **p; + FILE *fp = userData; + puttc(T('<'), fp); + fputts(name, fp); + + p = atts; + while (*p) + ++p; + nAtts = (p - atts) >> 1; + if (nAtts > 1) + qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp); + while (*atts) { + puttc(T(' '), fp); + fputts(*atts++, fp); + attributeValue(fp, *atts); + atts++; + } + puttc(T('>'), fp); +} + +static void endElement(void *userData, const XML_Char *name) +{ + FILE *fp = userData; + puttc(T('<'), fp); + puttc(T('/'), fp); + fputts(name, fp); + puttc(T('>'), fp); +} + +static int nsattcmp(const void *p1, const void *p2) +{ + const XML_Char *att1 = *(const XML_Char **)p1; + const XML_Char *att2 = *(const XML_Char **)p2; + int sep1 = (tcsrchr(att1, NSSEP) != 0); + int sep2 = (tcsrchr(att1, NSSEP) != 0); + if (sep1 != sep2) + return sep1 - sep2; + return tcscmp(att1, att2); +} + +static void startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) +{ + int nAtts; + int nsi; + const XML_Char **p; + FILE *fp = userData; + const XML_Char *sep; + puttc(T('<'), fp); + + sep = tcsrchr(name, NSSEP); + if (sep) { + fputts(T("n1:"), fp); + fputts(sep + 1, fp); + fputts(T(" xmlns:n1"), fp); + attributeValue(fp, name); + nsi = 2; + } + else { + fputts(name, fp); + nsi = 1; + } + + p = atts; + while (*p) + ++p; + nAtts = (p - atts) >> 1; + if (nAtts > 1) + qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp); + while (*atts) { + name = *atts++; + sep = tcsrchr(name, NSSEP); + puttc(T(' '), fp); + if (sep) { + ftprintf(fp, T("n%d:"), nsi); + fputts(sep + 1, fp); + } + else + fputts(name, fp); + attributeValue(fp, *atts); + if (sep) { + ftprintf(fp, T(" xmlns:n%d"), nsi++); + attributeValue(fp, name); + } + atts++; + } + puttc(T('>'), fp); +} + +static void endElementNS(void *userData, const XML_Char *name) +{ + FILE *fp = userData; + const XML_Char *sep; + puttc(T('<'), fp); + puttc(T('/'), fp); + sep = tcsrchr(name, NSSEP); + if (sep) { + fputts(T("n1:"), fp); + fputts(sep + 1, fp); + } + else + fputts(name, fp); + puttc(T('>'), fp); +} + +#ifndef W3C14N + +static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data) +{ + FILE *fp = userData; + puttc(T('<'), fp); + puttc(T('?'), fp); + fputts(target, fp); + puttc(T(' '), fp); + fputts(data, fp); + puttc(T('?'), fp); + puttc(T('>'), fp); +} + +#endif /* not W3C14N */ + +static void defaultCharacterData(XML_Parser parser, const XML_Char *s, int len) +{ + XML_DefaultCurrent(parser); +} + +static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) +{ + XML_DefaultCurrent(parser); +} + +static void defaultEndElement(XML_Parser parser, const XML_Char *name) +{ + XML_DefaultCurrent(parser); +} + +static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) +{ + XML_DefaultCurrent(parser); +} + +static void nopCharacterData(XML_Parser parser, const XML_Char *s, int len) +{ +} + +static void nopStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) +{ +} + +static void nopEndElement(XML_Parser parser, const XML_Char *name) +{ +} + +static void nopProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) +{ +} + +static void markup(XML_Parser parser, const XML_Char *s, int len) +{ + FILE *fp = XML_GetUserData(parser); + for (; len > 0; --len, ++s) + puttc(*s, fp); +} + +static +void metaLocation(XML_Parser parser) +{ + const XML_Char *uri = XML_GetBase(parser); + if (uri) + ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri); + ftprintf(XML_GetUserData(parser), + T(" byte=\"%ld\" nbytes=\"%d\" line=\"%d\" col=\"%d\""), + XML_GetCurrentByteIndex(parser), + XML_GetCurrentByteCount(parser), + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser)); +} + +static +void metaStartDocument(XML_Parser parser) +{ + fputts(T("<document>\n"), XML_GetUserData(parser)); +} + +static +void metaEndDocument(XML_Parser parser) +{ + fputts(T("</document>\n"), XML_GetUserData(parser)); +} + +static +void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) +{ + FILE *fp = XML_GetUserData(parser); + const XML_Char **specifiedAttsEnd + = atts + XML_GetSpecifiedAttributeCount(parser); + const XML_Char **idAttPtr; + int idAttIndex = XML_GetIdAttributeIndex(parser); + if (idAttIndex < 0) + idAttPtr = 0; + else + idAttPtr = atts + idAttIndex; + + ftprintf(fp, T("<starttag name=\"%s\""), name); + metaLocation(parser); + if (*atts) { + fputts(T(">\n"), fp); + do { + ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]); + characterData(fp, atts[1], tcslen(atts[1])); + if (atts >= specifiedAttsEnd) + fputts(T("\" defaulted=\"yes\"/>\n"), fp); + else if (atts == idAttPtr) + fputts(T("\" id=\"yes\"/>\n"), fp); + else + fputts(T("\"/>\n"), fp); + } while (*(atts += 2)); + fputts(T("</starttag>\n"), fp); + } + else + fputts(T("/>\n"), fp); +} + +static +void metaEndElement(XML_Parser parser, const XML_Char *name) +{ + FILE *fp = XML_GetUserData(parser); + ftprintf(fp, T("<endtag name=\"%s\""), name); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) +{ + FILE *fp = XML_GetUserData(parser); + ftprintf(fp, T("<pi target=\"%s\" data=\""), target); + characterData(fp, data, tcslen(data)); + puttc(T('"'), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaComment(XML_Parser parser, const XML_Char *data) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<comment data=\""), fp); + characterData(fp, data, tcslen(data)); + puttc(T('"'), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaStartCdataSection(XML_Parser parser) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<startcdata"), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaEndCdataSection(XML_Parser parser) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<endcdata"), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaCharacterData(XML_Parser parser, const XML_Char *s, int len) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<chars str=\""), fp); + characterData(fp, s, len); + puttc(T('"'), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaStartDoctypeDecl(XML_Parser parser, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ + FILE *fp = XML_GetUserData(parser); + ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaEndDoctypeDecl(XML_Parser parser) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<enddoctype"), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); +} + +static +void metaNotationDecl(XML_Parser parser, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + FILE *fp = XML_GetUserData(parser); + ftprintf(fp, T("<notation name=\"%s\""), notationName); + if (publicId) + ftprintf(fp, T(" public=\"%s\""), publicId); + if (systemId) { + fputts(T(" system=\""), fp); + characterData(fp, systemId, tcslen(systemId)); + puttc(T('"'), fp); + } + metaLocation(parser); + fputts(T("/>\n"), fp); +} + + +static +void metaEntityDecl(XML_Parser parser, + const XML_Char *entityName, + int is_param, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + FILE *fp = XML_GetUserData(parser); + + if (value) { + ftprintf(fp, T("<entity name=\"%s\""), entityName); + metaLocation(parser); + puttc(T('>'), fp); + characterData(fp, value, value_length); + fputts(T("</entity/>\n"), fp); + } + else if (notationName) { + ftprintf(fp, T("<entity name=\"%s\""), entityName); + if (publicId) + ftprintf(fp, T(" public=\"%s\""), publicId); + fputts(T(" system=\""), fp); + characterData(fp, systemId, tcslen(systemId)); + puttc(T('"'), fp); + ftprintf(fp, T(" notation=\"%s\""), notationName); + metaLocation(parser); + fputts(T("/>\n"), fp); + } + else { + ftprintf(fp, T("<entity name=\"%s\""), entityName); + if (publicId) + ftprintf(fp, T(" public=\"%s\""), publicId); + fputts(T(" system=\""), fp); + characterData(fp, systemId, tcslen(systemId)); + puttc(T('"'), fp); + metaLocation(parser); + fputts(T("/>\n"), fp); + } +} + +static +void metaStartNamespaceDecl(XML_Parser parser, + const XML_Char *prefix, + const XML_Char *uri) +{ + FILE *fp = XML_GetUserData(parser); + fputts(T("<startns"), fp); + if (prefix) + ftprintf(fp, T(" prefix=\"%s\""), prefix); + if (uri) { + fputts(T(" ns=\""), fp); + characterData(fp, uri, tcslen(uri)); + fputts(T("\"/>\n"), fp); + } + else + fputts(T("/>\n"), fp); +} + +static +void metaEndNamespaceDecl(XML_Parser parser, const XML_Char *prefix) +{ + FILE *fp = XML_GetUserData(parser); + if (!prefix) + fputts(T("<endns/>\n"), fp); + else + ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix); +} + +static +int unknownEncodingConvert(void *data, const char *p) +{ + return codepageConvert(*(int *)data, p); +} + +static +int unknownEncoding(void *userData, + const XML_Char *name, + XML_Encoding *info) +{ + int cp; + static const XML_Char prefixL[] = T("windows-"); + static const XML_Char prefixU[] = T("WINDOWS-"); + int i; + + for (i = 0; prefixU[i]; i++) + if (name[i] != prefixU[i] && name[i] != prefixL[i]) + return 0; + + cp = 0; + for (; name[i]; i++) { + static const XML_Char digits[] = T("0123456789"); + const XML_Char *s = tcschr(digits, name[i]); + if (!s) + return 0; + cp *= 10; + cp += s - digits; + if (cp >= 0x10000) + return 0; + } + if (!codepageMap(cp, info->map)) + return 0; + info->convert = unknownEncodingConvert; + /* We could just cast the code page integer to a void *, + and avoid the use of release. */ + info->release = free; + info->data = malloc(sizeof(int)); + if (!info->data) + return 0; + *(int *)info->data = cp; + return 1; +} + +static +int notStandalone(void *userData) +{ + return 0; +} + +static +void usage(const XML_Char *prog) +{ + ftprintf(stderr, T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog); + exit(1); +} + +int tmain(int argc, XML_Char **argv) +{ + int i, j; + const XML_Char *outputDir = 0; + const XML_Char *encoding = 0; + unsigned processFlags = XML_MAP_FILE; + int windowsCodePages = 0; + int outputType = 0; + int useNamespaces = 0; + int requireStandalone = 0; + int paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + +#ifdef _MSC_VER + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); +#endif + + i = 1; + j = 0; + while (i < argc) { + if (j == 0) { + if (argv[i][0] != T('-')) + break; + if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { + i++; + break; + } + j++; + } + switch (argv[i][j]) { + case T('r'): + processFlags &= ~XML_MAP_FILE; + j++; + break; + case T('s'): + requireStandalone = 1; + j++; + break; + case T('n'): + useNamespaces = 1; + j++; + break; + case T('p'): + paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS; + /* fall through */ + case T('x'): + processFlags |= XML_EXTERNAL_ENTITIES; + j++; + break; + case T('w'): + windowsCodePages = 1; + j++; + break; + case T('m'): + outputType = 'm'; + j++; + break; + case T('c'): + outputType = 'c'; + useNamespaces = 0; + j++; + break; + case T('t'): + outputType = 't'; + j++; + break; + case T('d'): + if (argv[i][j + 1] == T('\0')) { + if (++i == argc) + usage(argv[0]); + outputDir = argv[i]; + } + else + outputDir = argv[i] + j + 1; + i++; + j = 0; + break; + case T('e'): + if (argv[i][j + 1] == T('\0')) { + if (++i == argc) + usage(argv[0]); + encoding = argv[i]; + } + else + encoding = argv[i] + j + 1; + i++; + j = 0; + break; + case T('\0'): + if (j > 1) { + i++; + j = 0; + break; + } + /* fall through */ + default: + usage(argv[0]); + } + } + if (i == argc) + usage(argv[0]); + for (; i < argc; i++) { + FILE *fp = 0; + XML_Char *outName = 0; + int result; + XML_Parser parser; + if (useNamespaces) + parser = XML_ParserCreateNS(encoding, NSSEP); + else + parser = XML_ParserCreate(encoding); + if (requireStandalone) + XML_SetNotStandaloneHandler(parser, notStandalone); + XML_SetParamEntityParsing(parser, paramEntityParsing); + if (outputType == 't') { + /* This is for doing timings; this gives a more realistic estimate of + the parsing time. */ + outputDir = 0; + XML_SetElementHandler(parser, nopStartElement, nopEndElement); + XML_SetCharacterDataHandler(parser, nopCharacterData); + XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction); + } + else if (outputDir) { + const XML_Char *file = argv[i]; + if (tcsrchr(file, T('/'))) + file = tcsrchr(file, T('/')) + 1; +#ifdef WIN32 + if (tcsrchr(file, T('\\'))) + file = tcsrchr(file, T('\\')) + 1; +#endif + outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char)); + tcscpy(outName, outputDir); + tcscat(outName, T("/")); + tcscat(outName, file); + fp = tfopen(outName, T("wb")); + if (!fp) { + tperror(outName); + exit(1); + } + setvbuf(fp, NULL, _IOFBF, 16384); +#ifdef XML_UNICODE + puttc(0xFEFF, fp); +#endif + XML_SetUserData(parser, fp); + switch (outputType) { + case 'm': + XML_UseParserAsHandlerArg(parser); + XML_SetElementHandler(parser, metaStartElement, metaEndElement); + XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); + XML_SetCommentHandler(parser, metaComment); + XML_SetCdataSectionHandler(parser, metaStartCdataSection, metaEndCdataSection); + XML_SetCharacterDataHandler(parser, metaCharacterData); + XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, metaEndDoctypeDecl); + XML_SetEntityDeclHandler(parser, metaEntityDecl); + XML_SetNotationDeclHandler(parser, metaNotationDecl); + XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, metaEndNamespaceDecl); + metaStartDocument(parser); + break; + case 'c': + XML_UseParserAsHandlerArg(parser); + XML_SetDefaultHandler(parser, markup); + XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); + XML_SetCharacterDataHandler(parser, defaultCharacterData); + XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction); + break; + default: + if (useNamespaces) + XML_SetElementHandler(parser, startElementNS, endElementNS); + else + XML_SetElementHandler(parser, startElement, endElement); + XML_SetCharacterDataHandler(parser, characterData); +#ifndef W3C14N + XML_SetProcessingInstructionHandler(parser, processingInstruction); +#endif /* not W3C14N */ + break; + } + } + if (windowsCodePages) + XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); + result = XML_ProcessFile(parser, argv[i], processFlags); + if (outputDir) { + if (outputType == 'm') + metaEndDocument(parser); + fclose(fp); + if (!result) + tremove(outName); + free(outName); + } + XML_ParserFree(parser); + } + return 0; +} diff --git a/expat/xmlwf/xmlwf.dsp b/expat/xmlwf/xmlwf.dsp new file mode 100644 index 000000000..51de958bf --- /dev/null +++ b/expat/xmlwf/xmlwf.dsp @@ -0,0 +1,136 @@ +# Microsoft Developer Studio Project File - Name="xmlwf" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=xmlwf - Win32 Release +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "xmlwf.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "xmlwf.mak" CFG="xmlwf - Win32 Release" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "xmlwf - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "xmlwf - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "xmlwf - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir ".\Release" +# PROP BASE Intermediate_Dir ".\Release" +# PROP BASE Target_Dir "." +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir ".\Release" +# PROP Intermediate_Dir ".\Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "." +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /W3 /GX /O2 /I "..\xmlparse" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllimport) /YX /FD /c +# ADD BASE RSC /l 0x809 /d "NDEBUG" +# ADD RSC /l 0x809 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /out:"..\bin\xmlwf.exe" + +!ELSEIF "$(CFG)" == "xmlwf - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir ".\Debug" +# PROP BASE Intermediate_Dir ".\Debug" +# PROP BASE Target_Dir "." +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir ".\Debug" +# PROP Intermediate_Dir ".\Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "." +# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c +# ADD CPP /nologo /MDd /W3 /Gm /GX /ZI /Od /I ".\xmlparse" /I "..\xmlparse" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllimport) /YX /FD /c +# ADD BASE RSC /l 0x809 /d "_DEBUG" +# ADD RSC /l 0x809 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 +# ADD LINK32 setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /out:"..\dbgbin\xmlwf.exe" + +!ENDIF + +# Begin Target + +# Name "xmlwf - Win32 Release" +# Name "xmlwf - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90" +# Begin Source File + +SOURCE=.\codepage.c +# End Source File +# Begin Source File + +SOURCE=.\readfilemap.c +# PROP Exclude_From_Build 1 +# End Source File +# Begin Source File + +SOURCE=.\unixfilemap.c +# PROP Exclude_From_Build 1 +# End Source File +# Begin Source File + +SOURCE=.\win32filemap.c +# End Source File +# Begin Source File + +SOURCE=.\xmlfile.c +# End Source File +# Begin Source File + +SOURCE=.\xmlwf.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd" +# Begin Source File + +SOURCE=.\codepage.h +# End Source File +# Begin Source File + +SOURCE=.\xmlfile.h +# End Source File +# Begin Source File + +SOURCE=.\xmltchar.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/expat/xmlwf/xmlwin32url.cxx b/expat/xmlwf/xmlwin32url.cxx new file mode 100644 index 000000000..4f33aa8e7 --- /dev/null +++ b/expat/xmlwf/xmlwin32url.cxx @@ -0,0 +1,371 @@ +#include "xmlparse.h" +#ifdef XML_UNICODE +#define UNICODE +#endif +#include <windows.h> +#include <urlmon.h> +#include <wininet.h> +#include <stdio.h> +#include <tchar.h> +#include "xmlurl.h" +#include "xmlmime.h" + +static +int processURL(XML_Parser parser, IMoniker *baseMoniker, const XML_Char *url); + +typedef void (*StopHandler)(void *, HRESULT); + +class Callback : public IBindStatusCallback { +public: + // IUnknown methods + STDMETHODIMP QueryInterface(REFIID,void **); + STDMETHODIMP_(ULONG) AddRef(); + STDMETHODIMP_(ULONG) Release(); + // IBindStatusCallback methods + STDMETHODIMP OnStartBinding(DWORD, IBinding *); + STDMETHODIMP GetPriority(LONG *); + STDMETHODIMP OnLowResource(DWORD); + STDMETHODIMP OnProgress(ULONG, ULONG, ULONG, LPCWSTR); + STDMETHODIMP OnStopBinding(HRESULT, LPCWSTR); + STDMETHODIMP GetBindInfo(DWORD *, BINDINFO *); + STDMETHODIMP OnDataAvailable(DWORD, DWORD, FORMATETC *, STGMEDIUM *); + STDMETHODIMP OnObjectAvailable(REFIID, IUnknown *); + Callback(XML_Parser, IMoniker *, StopHandler, void *); + ~Callback(); + int externalEntityRef(const XML_Char *context, const XML_Char *systemId, const XML_Char *publicId); +private: + XML_Parser parser_; + IMoniker *baseMoniker_; + DWORD totalRead_; + ULONG ref_; + IBinding *pBinding_; + StopHandler stopHandler_; + void *stopArg_; +}; + +STDMETHODIMP_(ULONG) Callback::AddRef() +{ + return ref_++; +} + +STDMETHODIMP_(ULONG) Callback::Release() +{ + if (--ref_ == 0) { + delete this; + return 0; + } + return ref_; +} + +STDMETHODIMP Callback::QueryInterface(REFIID riid, void** ppv) +{ + if (IsEqualGUID(riid, IID_IUnknown)) + *ppv = (IUnknown *)this; + else if (IsEqualGUID(riid, IID_IBindStatusCallback)) + *ppv = (IBindStatusCallback *)this; + else + return E_NOINTERFACE; + ((LPUNKNOWN)*ppv)->AddRef(); + return S_OK; +} + +STDMETHODIMP Callback::OnStartBinding(DWORD, IBinding* pBinding) +{ + pBinding_ = pBinding; + pBinding->AddRef(); + return S_OK; +} + +STDMETHODIMP Callback::GetPriority(LONG *) +{ + return E_NOTIMPL; +} + +STDMETHODIMP Callback::OnLowResource(DWORD) +{ + return E_NOTIMPL; +} + +STDMETHODIMP Callback::OnProgress(ULONG, ULONG, ULONG, LPCWSTR) +{ + return S_OK; +} + +STDMETHODIMP Callback::OnStopBinding(HRESULT hr, LPCWSTR szError) +{ + if (pBinding_) { + pBinding_->Release(); + pBinding_ = 0; + } + if (baseMoniker_) { + baseMoniker_->Release(); + baseMoniker_ = 0; + } + stopHandler_(stopArg_, hr); + return S_OK; +} + +STDMETHODIMP Callback::GetBindInfo(DWORD* pgrfBINDF, BINDINFO* pbindinfo) +{ + *pgrfBINDF = BINDF_ASYNCHRONOUS; + return S_OK; +} + +static +void reportError(XML_Parser parser) +{ + int code = XML_GetErrorCode(parser); + const XML_Char *message = XML_ErrorString(code); + if (message) + _ftprintf(stderr, _T("%s:%d:%ld: %s\n"), + XML_GetBase(parser), + XML_GetErrorLineNumber(parser), + XML_GetErrorColumnNumber(parser), + message); + else + _ftprintf(stderr, _T("%s: (unknown message %d)\n"), XML_GetBase(parser), code); +} + +STDMETHODIMP Callback::OnDataAvailable(DWORD grfBSCF, + DWORD dwSize, + FORMATETC *pfmtetc, + STGMEDIUM* pstgmed) +{ + if (grfBSCF & BSCF_FIRSTDATANOTIFICATION) { + IWinInetHttpInfo *hp; + HRESULT hr = pBinding_->QueryInterface(IID_IWinInetHttpInfo, (void **)&hp); + if (SUCCEEDED(hr)) { + char contentType[1024]; + DWORD bufSize = sizeof(contentType); + DWORD flags = 0; + contentType[0] = 0; + hr = hp->QueryInfo(HTTP_QUERY_CONTENT_TYPE, contentType, &bufSize, 0, NULL); + if (SUCCEEDED(hr)) { + char charset[CHARSET_MAX]; + getXMLCharset(contentType, charset); + if (charset[0]) { +#ifdef XML_UNICODE + XML_Char wcharset[CHARSET_MAX]; + XML_Char *p1 = wcharset; + const char *p2 = charset; + while ((*p1++ = (unsigned char)*p2++) != 0) + ; + XML_SetEncoding(parser_, wcharset); +#else + XML_SetEncoding(parser_, charset); +#endif + } + } + hp->Release(); + } + } + if (!parser_) + return E_ABORT; + if (pstgmed->tymed == TYMED_ISTREAM) { + while (totalRead_ < dwSize) { +#define READ_MAX (64*1024) + DWORD nToRead = dwSize - totalRead_; + if (nToRead > READ_MAX) + nToRead = READ_MAX; + void *buf = XML_GetBuffer(parser_, nToRead); + if (!buf) { + _ftprintf(stderr, _T("out of memory\n")); + return E_ABORT; + } + DWORD nRead; + HRESULT hr = pstgmed->pstm->Read(buf, nToRead, &nRead); + if (SUCCEEDED(hr)) { + totalRead_ += nRead; + if (!XML_ParseBuffer(parser_, + nRead, + (grfBSCF & BSCF_LASTDATANOTIFICATION) != 0 + && totalRead_ == dwSize)) { + reportError(parser_); + return E_ABORT; + } + } + } + } + return S_OK; +} + +STDMETHODIMP Callback::OnObjectAvailable(REFIID, IUnknown *) +{ + return S_OK; +} + +int Callback::externalEntityRef(const XML_Char *context, const XML_Char *systemId, const XML_Char *publicId) +{ + XML_Parser entParser = XML_ExternalEntityParserCreate(parser_, context, 0); + XML_SetBase(entParser, systemId); + int ret = processURL(entParser, baseMoniker_, systemId); + XML_ParserFree(entParser); + return ret; +} + +Callback::Callback(XML_Parser parser, IMoniker *baseMoniker, StopHandler stopHandler, void *stopArg) +: parser_(parser), + baseMoniker_(baseMoniker), + ref_(0), + pBinding_(0), + totalRead_(0), + stopHandler_(stopHandler), + stopArg_(stopArg) +{ + if (baseMoniker_) + baseMoniker_->AddRef(); +} + +Callback::~Callback() +{ + if (pBinding_) + pBinding_->Release(); + if (baseMoniker_) + baseMoniker_->Release(); +} + +static +int externalEntityRef(void *arg, + const XML_Char *context, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) +{ + return ((Callback *)arg)->externalEntityRef(context, systemId, publicId); +} + + +static +HRESULT openStream(XML_Parser parser, + IMoniker *baseMoniker, + const XML_Char *uri, + StopHandler stopHandler, void *stopArg) +{ + if (!XML_SetBase(parser, uri)) + return E_OUTOFMEMORY; + HRESULT hr; + IMoniker *m; +#ifdef XML_UNICODE + hr = CreateURLMoniker(0, uri, &m); +#else + LPWSTR uriw = new wchar_t[strlen(uri) + 1]; + for (int i = 0;; i++) { + uriw[i] = uri[i]; + if (uriw[i] == 0) + break; + } + hr = CreateURLMoniker(baseMoniker, uriw, &m); + delete [] uriw; +#endif + if (FAILED(hr)) + return hr; + IBindStatusCallback *cb = new Callback(parser, m, stopHandler, stopArg); + XML_SetExternalEntityRefHandler(parser, externalEntityRef); + XML_SetExternalEntityRefHandlerArg(parser, cb); + cb->AddRef(); + IBindCtx *b; + if (FAILED(hr = CreateAsyncBindCtx(0, cb, 0, &b))) { + cb->Release(); + m->Release(); + return hr; + } + cb->Release(); + IStream *pStream; + hr = m->BindToStorage(b, 0, IID_IStream, (void **)&pStream); + if (SUCCEEDED(hr)) { + if (pStream) + pStream->Release(); + } + if (hr == MK_S_ASYNCHRONOUS) + hr = S_OK; + m->Release(); + b->Release(); + return hr; +} + +struct QuitInfo { + const XML_Char *url; + HRESULT hr; + int stop; +}; + +static +void winPerror(const XML_Char *url, HRESULT hr) +{ + LPVOID buf; + if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER + | FORMAT_MESSAGE_FROM_HMODULE, + GetModuleHandleA("urlmon.dll"), + hr, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &buf, + 0, + NULL) + || FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER + | FORMAT_MESSAGE_FROM_SYSTEM, + 0, + hr, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &buf, + 0, + NULL)) { + /* The system error messages seem to end with a newline. */ + _ftprintf(stderr, _T("%s: %s"), url, buf); + fflush(stderr); + LocalFree(buf); + } + else + _ftprintf(stderr, _T("%s: error %x\n"), url, hr); +} + +static void threadQuit(void *p, HRESULT hr) +{ + QuitInfo *qi = (QuitInfo *)p; + qi->hr = hr; + qi->stop = 1; +} + +extern "C" +int XML_URLInit() +{ + return SUCCEEDED(CoInitialize(0)); +} + +extern "C" +void XML_URLUninit() +{ + CoUninitialize(); +} + +static +int processURL(XML_Parser parser, IMoniker *baseMoniker, const XML_Char *url) +{ + QuitInfo qi; + qi.stop = 0; + qi.url = url; + + XML_SetBase(parser, url); + HRESULT hr = openStream(parser, baseMoniker, url, threadQuit, &qi); + if (FAILED(hr)) { + winPerror(url, hr); + return 0; + } + else if (FAILED(qi.hr)) { + winPerror(url, qi.hr); + return 0; + } + MSG msg; + while (!qi.stop && GetMessage (&msg, NULL, 0, 0)) { + TranslateMessage (&msg); + DispatchMessage (&msg); + } + return 1; +} + +extern "C" +int XML_ProcessURL(XML_Parser parser, + const XML_Char *url, + unsigned flags) +{ + return processURL(parser, 0, url); +} |