diff options
author | jk7744.park <jk7744.park@samsung.com> | 2015-09-08 22:07:42 +0900 |
---|---|---|
committer | jk7744.park <jk7744.park@samsung.com> | 2015-09-08 22:07:42 +0900 |
commit | 4732847654ed74ed893b7ea088805832fd91cc18 (patch) | |
tree | 393207d68983c3b587733b83714bef2de2457025 /src | |
download | xmlstarlet-accepted/tizen_2.4_mobile.tar.gz xmlstarlet-accepted/tizen_2.4_mobile.tar.bz2 xmlstarlet-accepted/tizen_2.4_mobile.zip |
tizen 2.3.1 releasetizen_2.4_mobile_releasetizen_2.3.1_releasesubmit/tizen_2.4/20151028.063429submit/tizen_2.3.1/20150915.075556accepted/tizen/2.4/mobile/20151029.035146tizen_2.4tizen_2.3.1accepted/tizen_2.4_mobile
Diffstat (limited to 'src')
-rw-r--r-- | src/c14n-usage.txt | 22 | ||||
-rw-r--r-- | src/depyx-usage.txt | 10 | ||||
-rw-r--r-- | src/edit-usage.txt | 29 | ||||
-rw-r--r-- | src/elem-usage.txt | 10 | ||||
-rw-r--r-- | src/escape-usage.txt | 7 | ||||
-rw-r--r-- | src/escape.h | 52 | ||||
-rw-r--r-- | src/format-usage.txt | 19 | ||||
-rw-r--r-- | src/ls-usage.txt | 5 | ||||
-rw-r--r-- | src/pyx-usage.txt | 14 | ||||
-rw-r--r-- | src/select-usage.txt | 77 | ||||
-rw-r--r-- | src/sources.mk | 47 | ||||
-rw-r--r-- | src/trans-usage.txt | 23 | ||||
-rw-r--r-- | src/trans.c | 286 | ||||
-rw-r--r-- | src/trans.h | 96 | ||||
-rw-r--r-- | src/unescape-usage.txt | 7 | ||||
-rw-r--r-- | src/usage.txt | 23 | ||||
-rw-r--r-- | src/validate-usage.txt | 23 | ||||
-rw-r--r-- | src/xml.c | 353 | ||||
-rw-r--r-- | src/xml_C14N.c | 359 | ||||
-rw-r--r-- | src/xml_depyx.c | 242 | ||||
-rw-r--r-- | src/xml_edit.c | 742 | ||||
-rw-r--r-- | src/xml_elem.c | 286 | ||||
-rw-r--r-- | src/xml_escape.c | 343 | ||||
-rw-r--r-- | src/xml_format.c | 392 | ||||
-rw-r--r-- | src/xml_ls.c | 204 | ||||
-rw-r--r-- | src/xml_pyx.c | 293 | ||||
-rw-r--r-- | src/xml_select.c | 1017 | ||||
-rw-r--r-- | src/xml_trans.c | 282 | ||||
-rw-r--r-- | src/xml_validate.c | 471 | ||||
-rw-r--r-- | src/xmlstar.h | 43 |
30 files changed, 5777 insertions, 0 deletions
diff --git a/src/c14n-usage.txt b/src/c14n-usage.txt new file mode 100644 index 0000000..6ffc50d --- /dev/null +++ b/src/c14n-usage.txt @@ -0,0 +1,22 @@ +XMLStarlet Toolkit: XML canonicalization +Usage: PROG c14n [--net] <mode> <xml-file> [<xpath-file>] [<inclusive-ns-list>] +where + <xml-file> - input XML document file name (stdin is used if '-') + <xpath-file> - XML file containing XPath expression for + c14n XML canonicalization + Example: + <?xml version="1.0"?> + <XPath xmlns:n0="http://a.example.com" xmlns:n1="http://b.example"> + (//. | //@* | //namespace::*)[ancestor-or-self::n1:elem1] + </XPath> + + <inclusive-ns-list> - the list of inclusive namespace prefixes + (only for exclusive canonicalization) + Example: 'n1 n2' + + <mode> is one of following: + --with-comments XML file canonicalization w comments (default) + --without-comments XML file canonicalization w/o comments + --exc-with-comments Exclusive XML file canonicalization w comments + --exc-without-comments Exclusive XML file canonicalization w/o comments + diff --git a/src/depyx-usage.txt b/src/depyx-usage.txt new file mode 100644 index 0000000..7400705 --- /dev/null +++ b/src/depyx-usage.txt @@ -0,0 +1,10 @@ +XMLStarlet Toolkit: Convert PYX into XML +Usage: PROG p2x [<pyx-file>] +where + <pyx-file> - input PYX document file name (stdin is used if missing) + +The PYX format is a line-oriented representation of +XML documents that is derived from the SGML ESIS format. +(see ESIS - ISO 8879 Element Structure Information Set spec, +ISO/IEC JTC1/SC18/WG8 N931 (ESIS)) + diff --git a/src/edit-usage.txt b/src/edit-usage.txt new file mode 100644 index 0000000..719e239 --- /dev/null +++ b/src/edit-usage.txt @@ -0,0 +1,29 @@ +XMLStarlet Toolkit: Edit XML document(s) +Usage: PROG ed <global-options> {<action>} [ <xml-file-or-uri> ... ] +where + <global-options> - global options for editing + <xml-file-or-uri> - input XML document file name/uri (stdin otherwise) + +<global-options> are: + -P, or -S - preserve whitespace nodes. + (or --pf, --ps) Note that space between attributes is not preserved + -O (or --omit-decl) - omit XML declaration (<?xml ...?>) + -L (or --inplace) - edit file inplace + -N <name>=<value> - predefine namespaces (name without 'xmlns:') + ex: xsql=urn:oracle-xsql + Multiple -N options are allowed. + -N options must be last global options. + --net - allow network access + --help or -h - display help + +where <action> + -d or --delete <xpath> + --var <name> <xpath> + -i or --insert <xpath> -t (--type) elem|text|attr -n <name> -v (--value) <value> + -a or --append <xpath> -t (--type) elem|text|attr -n <name> -v (--value) <value> + -s or --subnode <xpath> -t (--type) elem|text|attr -n <name> -v (--value) <value> + -m or --move <xpath1> <xpath2> + -r or --rename <xpath1> -v <new-name> + -u or --update <xpath> -v (--value) <value> + -x (--expr) <xpath> + diff --git a/src/elem-usage.txt b/src/elem-usage.txt new file mode 100644 index 0000000..f1537da --- /dev/null +++ b/src/elem-usage.txt @@ -0,0 +1,10 @@ +XMLStarlet Toolkit: Display element structure of XML document +Usage: PROG el [<options>] <xml-file> +where + <xml-file> - input XML document file name (stdin is used if missing) + <options> is one of: + -a - show attributes as well + -v - show attributes and their values + -u - print out sorted unique lines + -d<n> - print out sorted unique lines up to depth <n> + diff --git a/src/escape-usage.txt b/src/escape-usage.txt new file mode 100644 index 0000000..1ac6ec2 --- /dev/null +++ b/src/escape-usage.txt @@ -0,0 +1,7 @@ +XMLStarlet Toolkit: Escape special XML characters +Usage: PROG esc [<options>] [<string>] +where <options> are + --help - print usage + (TODO: more to be added in future) +if <string> is missing stdin is used instead. + diff --git a/src/escape.h b/src/escape.h new file mode 100644 index 0000000..31e1369 --- /dev/null +++ b/src/escape.h @@ -0,0 +1,52 @@ +/* $Id: escape.h,v 1.2 2005/03/12 03:24:23 mgrouch Exp $ */ + +#ifndef __ESCAPE_H +#define __ESCAPE_H + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +typedef enum { + XML_C14N_NORMALIZE_ATTR = 0, + XML_C14N_NORMALIZE_COMMENT = 1, + XML_C14N_NORMALIZE_PI = 2, + XML_C14N_NORMALIZE_TEXT = 3, + XML_C14N_NORMALIZE_NOTHING = 4 +} xml_C14NNormalizationMode; + +extern xmlChar *xml_C11NNormalizeString(const xmlChar * input, + xml_C14NNormalizationMode mode); + +#define xml_C11NNormalizeAttr( a ) \ + xml_C11NNormalizeString((a), XML_C14N_NORMALIZE_ATTR) +#define xml_C11NNormalizeComment( a ) \ + xml_C11NNormalizeString((a), XML_C14N_NORMALIZE_COMMENT) +#define xml_C11NNormalizePI( a ) \ + xml_C11NNormalizeString((a), XML_C14N_NORMALIZE_PI) +#define xml_C11NNormalizeText( a ) \ + xml_C11NNormalizeString((a), XML_C14N_NORMALIZE_TEXT) + +#endif /* __ESCAPE_H */ diff --git a/src/format-usage.txt b/src/format-usage.txt new file mode 100644 index 0000000..7a93cf7 --- /dev/null +++ b/src/format-usage.txt @@ -0,0 +1,19 @@ +XMLStarlet Toolkit: Format XML document +Usage: PROG fo [<options>] <xml-file> +where <options> are + -n or --noindent - do not indent + -t or --indent-tab - indent output with tabulation + -s or --indent-spaces <num> - indent output with <num> spaces + -o or --omit-decl - omit xml declaration <?xml version="1.0"?> + --net - allow network access + -R or --recover - try to recover what is parsable + -D or --dropdtd - remove the DOCTYPE of the input docs + -C or --nocdata - replace cdata section with text nodes + -N or --nsclean - remove redundant namespace declarations + -e or --encode <encoding> - output in the given encoding (utf-8, unicode...) +#ifdef LIBXML_HTML_ENABLED + -H or --html - input is HTML +#endif + -Q or --quiet - Suppress errors from libxml2 + -h or --help - print help + diff --git a/src/ls-usage.txt b/src/ls-usage.txt new file mode 100644 index 0000000..d3b9574 --- /dev/null +++ b/src/ls-usage.txt @@ -0,0 +1,5 @@ +XMLStarlet Toolkit: List directory as XML +Usage: PROG ls [ <dir> | --help ] +Lists current directory in XML format. +Time is shown per ISO 8601 spec. + diff --git a/src/pyx-usage.txt b/src/pyx-usage.txt new file mode 100644 index 0000000..d94d277 --- /dev/null +++ b/src/pyx-usage.txt @@ -0,0 +1,14 @@ +XMLStarlet Toolkit: Convert XML into PYX format (based on ESIS - ISO 8879) +Usage: PROG pyx {<xml-file>} +where + <xml-file> - input XML document file name (stdin is used if missing) + +The PYX format is a line-oriented representation of +XML documents that is derived from the SGML ESIS format. +(see ESIS - ISO 8879 Element Structure Information Set spec, +ISO/IEC JTC1/SC18/WG8 N931 (ESIS)) + +A non-validating, ESIS generating tool originally developed for +pyxie project (see http://pyxie.sourceforge.net/) +ESIS Generation by Sean Mc Grath http://www.digitome.com/sean.html + diff --git a/src/select-usage.txt b/src/select-usage.txt new file mode 100644 index 0000000..266f7fa --- /dev/null +++ b/src/select-usage.txt @@ -0,0 +1,77 @@ +XMLStarlet Toolkit: Select from XML document(s) +Usage: PROG sel <global-options> {<template>} [ <xml-file> ... ] +where + <global-options> - global options for selecting + <xml-file> - input XML document file name/uri (stdin is used if missing) + <template> - template for querying XML document with following syntax: + +<global-options> are: + -Q or --quiet - do not write anything to standard output. + -C or --comp - display generated XSLT + -R or --root - print root element <xsl-select> + -T or --text - output is text (default is XML) + -I or --indent - indent output + -D or --xml-decl - do not omit xml declaration line + -B or --noblanks - remove insignificant spaces from XML tree + -E or --encode <encoding> - output in the given encoding (utf-8, unicode...) + -N <name>=<value> - predefine namespaces (name without 'xmlns:') + ex: xsql=urn:oracle-xsql + Multiple -N options are allowed. + --net - allow fetch DTDs or entities over network + --help - display help + +Syntax for templates: -t|--template <options> +where <options> + -c or --copy-of <xpath> - print copy of XPATH expression + -v or --value-of <xpath> - print value of XPATH expression + -o or --output <string> - output string literal + -n or --nl - print new line + -f or --inp-name - print input file name (or URL) + -m or --match <xpath> - match XPATH expression + --var <name> <value> --break or + --var <name>=<value> - declare a variable (referenced by $name) + -i or --if <test-xpath> - check condition <xsl:if test="test-xpath"> + --elif <test-xpath> - check condition if previous conditions failed + --else - check if previous conditions failed + -e or --elem <name> - print out element <xsl:element name="name"> + -a or --attr <name> - add attribute <xsl:attribute name="name"> + -b or --break - break nesting + -s or --sort op xpath - sort in order (used after -m) where + op is X:Y:Z, + X is A - for order="ascending" + X is D - for order="descending" + Y is N - for data-type="numeric" + Y is T - for data-type="text" + Z is U - for case-order="upper-first" + Z is L - for case-order="lower-first" + +There can be multiple --match, --copy-of, --value-of, etc options +in a single template. The effect of applying command line templates +can be illustrated with the following XSLT analogue + +PROG sel -t -c "xpath0" -m "xpath1" -m "xpath2" -v "xpath3" \ + -t -m "xpath4" -c "xpath5" + +is equivalent to applying the following XSLT + +<?xml version="1.0"?> +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:template match="/"> + <xsl:call-template name="t1"/> + <xsl:call-template name="t2"/> +</xsl:template> +<xsl:template name="t1"> + <xsl:copy-of select="xpath0"/> + <xsl:for-each select="xpath1"> + <xsl:for-each select="xpath2"> + <xsl:value-of select="xpath3"/> + </xsl:for-each> + </xsl:for-each> +</xsl:template> +<xsl:template name="t2"> + <xsl:for-each select="xpath4"> + <xsl:copy-of select="xpath5"/> + </xsl:for-each> +</xsl:template> +</xsl:stylesheet> + diff --git a/src/sources.mk b/src/sources.mk new file mode 100644 index 0000000..c511ffd --- /dev/null +++ b/src/sources.mk @@ -0,0 +1,47 @@ +usage_texts =\ +src/usage.txt\ +src/c14n-usage.txt\ +src/depyx-usage.txt\ +src/edit-usage.txt\ +src/elem-usage.txt\ +src/escape-usage.txt\ +src/format-usage.txt\ +src/ls-usage.txt\ +src/pyx-usage.txt\ +src/select-usage.txt\ +src/trans-usage.txt\ +src/unescape-usage.txt\ +src/validate-usage.txt + +generated_usage_sources =\ +src/usage.c\ +src/c14n-usage.c\ +src/depyx-usage.c\ +src/edit-usage.c\ +src/elem-usage.c\ +src/escape-usage.c\ +src/format-usage.c\ +src/ls-usage.c\ +src/pyx-usage.c\ +src/select-usage.c\ +src/trans-usage.c\ +src/unescape-usage.c\ +src/validate-usage.c + +xml_SOURCES =\ +src/escape.h\ +src/trans.c\ +src/trans.h\ +src/xml.c\ +src/xml_C14N.c\ +src/xml_depyx.c\ +src/xml_edit.c\ +src/xml_elem.c\ +src/xml_escape.c\ +src/xml_format.c\ +src/xml_ls.c\ +src/xml_pyx.c\ +src/xml_select.c\ +src/xmlstar.h\ +src/xml_trans.c\ +src/xml_validate.c diff --git a/src/trans-usage.txt b/src/trans-usage.txt new file mode 100644 index 0000000..94abec9 --- /dev/null +++ b/src/trans-usage.txt @@ -0,0 +1,23 @@ +XMLStarlet Toolkit: Transform XML document(s) using XSLT +Usage: PROG tr [<options>] <xsl-file> {-p|-s <name>=<value>} [<xml-file>...] +where + <xsl-file> - main XSLT stylesheet for transformation + <xml-file> - input XML document file/URL (stdin is used if missing) + <name>=<value> - name and value of the parameter passed to XSLT processor + -p - parameter is XPATH expression ("'string'" to quote string) + -s - parameter is a string literal +<options> are: + --help or -h - display help message + --omit-decl - omit xml declaration <?xml version="1.0"?> + --embed or -E - allow applying embedded stylesheet + --show-ext - show list of extensions + --val - allow validate against DTDs or schemas + --net - allow fetch DTDs or entities over network +#ifdef LIBXML_XINCLUDE_ENABLED + --xinclude - do XInclude processing on document input +#endif + --maxdepth val - increase the maximum depth +#ifdef LIBXML_HTML_ENABLED + --html - input document(s) is(are) in HTML format +#endif + diff --git a/src/trans.c b/src/trans.c new file mode 100644 index 0000000..3603436 --- /dev/null +++ b/src/trans.c @@ -0,0 +1,286 @@ +/* $Id: trans.c,v 1.19 2004/11/22 02:28:21 mgrouch Exp $ */ + +#include <config.h> +#include "trans.h" +#include "xmlstar.h" + +/* + * This code is based on xsltproc by Daniel Veillard (daniel@veillard.com) + * (see also http://xmlsoft.org/) + */ + +int errorno = 0; + +/** + * Initialize global command line options + */ +void +xsltInitOptions(xsltOptionsPtr ops) +{ + ops->noval = 1; + ops->nonet = 1; + ops->omit_decl = 0; + ops->show_extensions = 0; + ops->noblanks = 0; + ops->embed = 0; +#ifdef LIBXML_XINCLUDE_ENABLED + ops->xinclude = 0; +#endif +#ifdef LIBXML_HTML_ENABLED + ops->html = 0; +#endif +#ifdef LIBXML_CATALOG_ENABLED + ops->catalogs = 0; +#endif +} + +/** + * Initialize LibXML + */ +void +xsltInitLibXml(xsltOptionsPtr ops) +{ + /* + * Initialize library memory + */ + xmlInitMemory(); + + LIBXML_TEST_VERSION + + /* + * Store line numbers in the document tree + */ + xmlLineNumbersDefault(1); + + /* + * Register the EXSLT extensions + */ + exsltRegisterAll(); + + /* + * Register the test module + */ + xsltRegisterTestModule(); + + if (ops->show_extensions) + { + xsltDebugDumpExtensions(stderr); + exit(EXIT_SUCCESS); + } + + xmlKeepBlanksDefault(1); + if (ops->noblanks) xmlKeepBlanksDefault(0); + xmlPedanticParserDefault(0); + + xmlGetWarningsDefaultValue = 1; + /*xmlDoValidityCheckingDefaultValue = 0;*/ + xmlLoadExtDtdDefaultValue = 1; + + /* + * DTD validation options + */ + if (ops->noval == 0) + { + xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + } + else + { + xmlLoadExtDtdDefaultValue = 0; + } + +#ifdef LIBXML_XINCLUDE_ENABLED + /* + * enable XInclude + */ + if (ops->xinclude) + xsltSetXIncludeDefault(1); +#endif + +#ifdef LIBXML_CATALOG_ENABLED + /* + * enable SGML catalogs + */ + if (ops->catalogs) + { + char *catalogs = getenv("SGML_CATALOG_FILES"); + if (catalogs == NULL) + fprintf(stderr, "Variable $SGML_CATALOG_FILES not set\n"); + else + xmlLoadCatalogs(catalogs); + } +#endif +} + +/* get result of XSL transformation */ +xmlDocPtr +xsltTransform(xsltOptionsPtr ops, xmlDocPtr doc, const char** params, + xsltStylesheetPtr cur, const char *filename) +{ + xsltTransformContextPtr ctxt; + xmlDocPtr res; + + if (ops->omit_decl) + { + cur->omitXmlDeclaration = 1; + } + +#ifdef LIBXML_XINCLUDE_ENABLED + if (ops->xinclude) xmlXIncludeProcess(doc); +#endif + + ctxt = xsltNewTransformContext(cur, doc); + if (ctxt == NULL) return NULL; + + res = xsltApplyStylesheetUser(cur, doc, params, NULL, NULL, ctxt); + + if (ctxt->state == XSLT_STATE_ERROR) + errorno = 9; + if (ctxt->state == XSLT_STATE_STOPPED) + errorno = 10; + xsltFreeTransformContext(ctxt); + xmlFreeDoc(doc); + if (res == NULL) + { + fprintf(stderr, "no result for %s\n", filename); + } + return res; +} + +/** + * Run stylesheet on XML document + */ +void +xsltProcess(xsltOptionsPtr ops, xmlDocPtr doc, const char** params, + xsltStylesheetPtr cur, const char *filename) +{ + xmlDocPtr res = xsltTransform(ops, doc, params, cur, filename); + + if (res && xsltSaveResultToFile(stdout, res, cur) < 0) + { + errorno = EXIT_LIB_ERROR; + } + + xmlFreeDoc(res); +} + +/** + * run XSLT on documents + */ +int xsltRun(xsltOptionsPtr ops, char* xsl, const char** params, + int count, char **docs) +{ + xsltStylesheetPtr cur = NULL; + xmlDocPtr doc, style; + int i, options = 0; + + options = XSLT_PARSE_OPTIONS; + + /* + * Compile XSLT Sylesheet + */ + style = xmlReadFile((const char *) xsl, NULL, options); + if (style == NULL) + { + fprintf(stderr, "cannot parse %s\n", xsl); + cur = NULL; + errorno = 4; + } + else + { + if (ops->embed) + { + cur = xsltLoadStylesheetPI(style); + if (cur != NULL) + { + /* it is an embedded stylesheet */ + xsltProcess(ops, style, params, cur, xsl); + xsltFreeStylesheet(cur); + cur = NULL; + } + for (i=0; i<count; i++) + { + style = xmlReadFile((const char *) docs[i], NULL, options); + if (style == NULL) + { + fprintf(stderr, "cannot parse %s\n", docs[i]); + cur = NULL; + goto done; + } + cur = xsltLoadStylesheetPI(style); + if (cur != NULL) + { + /* it is an embedded stylesheet */ + xsltProcess(ops, style, params, cur, docs[i]); + xsltFreeStylesheet(cur); + cur = NULL; + } + } + goto done; + } + + cur = xsltParseStylesheetDoc(style); + if (cur != NULL) + { + if (cur->errors != 0) + { + errorno = 5; + goto done; + } + if (cur->indent == 1) xmlIndentTreeOutput = 1; + else xmlIndentTreeOutput = 0; + } + else + { + xmlFreeDoc(style); + errorno = 5; + goto done; + } + } + + /* + * run XSLT + */ + if ((cur != NULL) && (cur->errors == 0)) + { + for (i=0; i<count; i++) + { + doc = NULL; +#ifdef LIBXML_HTML_ENABLED + if (ops->html) doc = htmlReadFile(docs[i], NULL, options); + else +#endif + { + doc = xmlReadFile((const char *) docs[i], NULL, options); + } + + if (doc == NULL) + { + fprintf(stderr, "unable to parse %s\n", docs[i]); + errorno = 6; + continue; + } + xsltProcess(ops, doc, params, cur, docs[i]); + } + + if (count == 0) + { + /* stdin */ + doc = NULL; +#ifdef LIBXML_HTML_ENABLED + if (ops->html) doc = htmlParseFile("-", NULL); + else +#endif + doc = xmlReadFile("-", NULL, options); + xsltProcess(ops, doc, params, cur, "-"); + } + } + +done: + + /* + * Clean up + */ + if (cur != NULL) xsltFreeStylesheet(cur); + + return(errorno); +} diff --git a/src/trans.h b/src/trans.h new file mode 100644 index 0000000..016304e --- /dev/null +++ b/src/trans.h @@ -0,0 +1,96 @@ +/* $Id: trans.h,v 1.11 2004/11/21 23:40:40 mgrouch Exp $ */ + +#ifndef __TRANS_H +#define __TRANS_H + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <libxml/xmlmemory.h> +#include <libxml/debugXML.h> +#include <libxml/xmlIO.h> +#include <libxml/HTMLtree.h> +#include <libxml/xinclude.h> +#include <libxml/parserInternals.h> +#include <libxml/uri.h> + +#include <libxslt/xslt.h> +#include <libxslt/xsltInternals.h> +#include <libxslt/transform.h> +#include <libxslt/xsltutils.h> +#include <libxslt/extensions.h> +#include <libexslt/exslt.h> + +#ifdef LIBXML_XINCLUDE_ENABLED +#include <libxml/xinclude.h> +#endif +#ifdef LIBXML_CATALOG_ENABLED +#include <libxml/catalog.h> +#endif + +#define MAX_PARAMETERS 256 +#define MAX_PATHS 256 + +typedef struct _xsltOptions { + int noval; /* do not validate against DTDs or schemas */ + int nonet; /* refuse to fetch DTDs or entities over network */ + int show_extensions; /* display list of extensions */ + int omit_decl; /* omit xml declaration */ + int noblanks; /* Remove insignificant spaces from XML tree */ + int embed; /* Allow applying embedded stylesheet */ +#ifdef LIBXML_XINCLUDE_ENABLED + int xinclude; /* do XInclude processing on input documents */ +#endif +#ifdef LIBXML_HTML_ENABLED + int html; /* inputs are in HTML format */ +#endif +#ifdef LIBXML_CATALOG_ENABLED + int catalogs; /* use SGML catalogs from $SGML_CATALOG_FILES */ +#endif +} xsltOptions; + +typedef xsltOptions *xsltOptionsPtr; + + +extern int errorno; + +void xsltInitOptions(xsltOptionsPtr ops); + +void xsltInitLibXml(xsltOptionsPtr ops); + +void xsltProcess(xsltOptionsPtr ops, xmlDocPtr doc, + const char **params, xsltStylesheetPtr cur, + const char *filename); + +xmlDocPtr xsltTransform(xsltOptionsPtr ops, xmlDocPtr doc, + const char **params, xsltStylesheetPtr cur, + const char *filename); + +int xsltRun(xsltOptionsPtr ops, char* xsl, + const char **params, + int count, char **docs); + +#endif /* __TRANS_H */ diff --git a/src/unescape-usage.txt b/src/unescape-usage.txt new file mode 100644 index 0000000..3fa6d81 --- /dev/null +++ b/src/unescape-usage.txt @@ -0,0 +1,7 @@ +XMLStarlet Toolkit: Unescape special XML characters +Usage: PROG unesc [<options>] [<string>] +where <options> are + --help - print usage + (TODO: more to be added in future) +if <string> is missing stdin is used instead. + diff --git a/src/usage.txt b/src/usage.txt new file mode 100644 index 0000000..5b28280 --- /dev/null +++ b/src/usage.txt @@ -0,0 +1,23 @@ +XMLStarlet Toolkit: Command line utilities for XML +Usage: PROG [<options>] <command> [<cmd-options>] +where <command> is one of: + ed (or edit) - Edit/Update XML document(s) + sel (or select) - Select data or query XML document(s) (XPATH, etc) + tr (or transform) - Transform XML document(s) using XSLT + val (or validate) - Validate XML document(s) (well-formed/DTD/XSD/RelaxNG) + fo (or format) - Format XML document(s) + el (or elements) - Display element structure of XML document + c14n (or canonic) - XML canonicalization + ls (or list) - List directory as XML + esc (or escape) - Escape special XML characters + unesc (or unescape) - Unescape special XML characters + pyx (or xmln) - Convert XML into PYX format (based on ESIS - ISO 8879) + p2x (or depyx) - Convert PYX into XML +<options> are: + --version - show version + --help - show help +Wherever file name mentioned in command help it is assumed +that URL can be used instead as well. + +Type: PROG <command> --help <ENTER> for command help + diff --git a/src/validate-usage.txt b/src/validate-usage.txt new file mode 100644 index 0000000..27e8d7d --- /dev/null +++ b/src/validate-usage.txt @@ -0,0 +1,23 @@ +XMLStarlet Toolkit: Validate XML document(s) +Usage: PROG val <options> [ <xml-file-or-uri> ... ] +where <options> + -w or --well-formed - validate well-formedness only (default) + -d or --dtd <dtd-file> - validate against DTD + --net - allow network access +#ifdef LIBXML_SCHEMAS_ENABLED + -s or --xsd <xsd-file> - validate against XSD schema + -E or --embed - validate using embedded DTD +#endif +#ifdef LIBXML_SCHEMAS_ENABLED + -r or --relaxng <rng-file> - validate against Relax-NG schema +#endif + -e or --err - print verbose error messages on stderr + -b or --list-bad - list only files which do not validate + -g or --list-good - list only files which validate + -q or --quiet - do not list files (return result code only) + +#ifdef LIBXML_SCHEMAS_ENABLED +NOTE: XML Schemas are not fully supported yet due to its incomplete + support in libxml2 (see http://xmlsoft.org) + +#endif diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..0feda16 --- /dev/null +++ b/src/xml.c @@ -0,0 +1,353 @@ +/* $Id: xml.c,v 1.37 2004/11/11 03:39:34 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> +#include <version.h> + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include <libxslt/xslt.h> +#include <libxslt/xsltconfig.h> + +#include "xmlstar.h" + +static const xmlChar* XMLSTAR_NS = BAD_CAST "http://xmlstar.sourceforge.net"; +static const xmlChar* XMLSTAR_NS_PREFIX = BAD_CAST "xstar"; + +extern int edMain(int argc, char **argv); +extern int selMain(int argc, char **argv); +extern int trMain(int argc, char **argv); +extern int valMain(int argc, char **argv); +extern int foMain(int argc, char **argv); +extern int elMain(int argc, char **argv); +extern int c14nMain(int argc, char **argv); +extern int lsMain(int argc, char **argv); +extern int pyxMain(int argc, char **argv); +extern int depyxMain(int argc, char **argv); +extern int escMain(int argc, char **argv, int escape); + +const char more_info[] = +"XMLStarlet is a command line toolkit to query/edit/check/transform\n" +"XML documents (for more information see http://xmlstar.sourceforge.net/)\n"; + +const char libxslt_more_info[] = +"\n" +"Current implementation uses libxslt from GNOME codebase as XSLT processor\n" +"(see http://xmlsoft.org/ for more details)\n"; + +/** + * Display usage syntax + */ +void +usage(int argc, char **argv, exit_status status) +{ + extern void fprint_usage(FILE* o, const char* argv0); + FILE* o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * Error reporting function + */ +void reportError(void *ptr, xmlErrorPtr error) +{ + ErrorInfo *errorInfo = (ErrorInfo*) ptr; + assert(errorInfo); + + if (errorInfo->verbose) + { + int msglen; + int domain = error->domain; + const char *filename = + error->file? error->file : + errorInfo? errorInfo->filename : + NULL; + xmlTextReaderPtr reader = errorInfo->xmlReader; + + int line = (!filename)? 0 : + (reader)? xmlTextReaderGetParserLineNumber(reader) : + error->line; + int column = (!filename)? 0 : + (reader)? xmlTextReaderGetParserColumnNumber(reader) : + error->int2; + if (line) + { + fprintf(stderr, "%s:%d.%d: ", filename, line, column); + } + + msglen = strlen(error->message); + if (error->message[msglen-1] == '\n') + error->message[msglen-1] = '\0'; + fprintf(stderr, "%s", error->message); + + /* only print extra info if it's not in message */ + if (error->str1 && strstr(error->message, error->str1) == NULL) { + fprintf(stderr, ": %s", error->str1); + if (error->str2 && strstr(error->message, error->str2) == NULL) { + fprintf(stderr, ", %s", error->str2); + } + if (error->str3 && strstr(error->message, error->str3) == NULL) { + fprintf(stderr, ", %s", error->str3); + } + } + fprintf(stderr, "\n"); + + + if ((domain == XML_FROM_PARSER) || (domain == XML_FROM_HTML) || + (domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) || + (domain == XML_FROM_IO) || (domain == XML_FROM_VALID)) { + xmlParserCtxtPtr ctxt = error->ctxt; + if (ctxt) xmlParserPrintFileContext(ctxt->input); + } + } +} + +#define CHECK_MEM(ret) if (!ret) \ + (fprintf(stderr, "out of memory\n"), exit(EXIT_INTERNAL_ERROR)) + +void* +xmalloc(size_t size) +{ + void *ret = malloc(size); + CHECK_MEM(ret); + return ret; +} +void* +xrealloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + CHECK_MEM(ret); + return ret; +} +char* +xstrdup(const char *str) +{ + char *ret = (char*) xmlStrdup(BAD_CAST str); + CHECK_MEM(ret); + return ret; +} + +/** + * This is the main function + */ +int +main(int argc, char **argv) +{ + int ret = 0; + /* by default errors are reported */ + static ErrorInfo errorInfo = { NULL, NULL, VERBOSE }; + + xmlMemSetup(free, xmalloc, xrealloc, xstrdup); + xmlSetStructuredErrorFunc(&errorInfo, reportError); + + if (argc <= 1) + { + usage(argc, argv, EXIT_BAD_ARGS); + } + else if (!strcmp(argv[1], "ed") || !strcmp(argv[1], "edit")) + { + ret = edMain(argc, argv); + } + else if (!strcmp(argv[1], "sel") || !strcmp(argv[1], "select")) + { + ret = selMain(argc, argv); + } + else if (!strcmp(argv[1], "tr") || !strcmp(argv[1], "transform")) + { + ret = trMain(argc, argv); + } + else if (!strcmp(argv[1], "fo") || !strcmp(argv[1], "format")) + { + ret = foMain(argc, argv); + } + else if (!strcmp(argv[1], "val") || !strcmp(argv[1], "validate")) + { + ret = valMain(argc, argv); + } + else if (!strcmp(argv[1], "el") || !strcmp(argv[1], "elements")) + { + ret = elMain(argc, argv); + } + else if (!strcmp(argv[1], "c14n") || !strcmp(argv[1], "canonic")) + { + ret = c14nMain(argc, argv); + } + else if (!strcmp(argv[1], "ls") || !strcmp(argv[1], "list")) + { + ret = lsMain(argc, argv); + } + else if (!strcmp(argv[1], "pyx") || !strcmp(argv[1], "xmln")) + { + ret = pyxMain(argc, argv); + } + else if (!strcmp(argv[1], "depyx") || !strcmp(argv[1], "p2x")) + { + ret = depyxMain(argc, argv); + } + else if (!strcmp(argv[1], "esc") || !strcmp(argv[1], "escape")) + { + ret = escMain(argc, argv, 1); + } + else if (!strcmp(argv[1], "unesc") || !strcmp(argv[1], "unescape")) + { + ret = escMain(argc, argv, 0); + } + else if (!strcmp(argv[1], "--version")) + { + fprintf(stdout, "%s\n" + "compiled against libxml2 %s, linked with %s\n" + "compiled against libxslt %s, linked with %s\n", + VERSION, + LIBXML_DOTTED_VERSION, xmlParserVersion, + LIBXSLT_DOTTED_VERSION, xsltEngineVersion); + ret = EXIT_SUCCESS; + } + else + { + usage(argc, argv, strcmp(argv[1], "--help") == 0? + EXIT_SUCCESS : EXIT_BAD_ARGS); + } + + exit(ret); +} + + +void +registerXstarVariable(xmlXPathContextPtr ctxt, + const char* name, xmlXPathObjectPtr value) +{ + xmlXPathRegisterVariableNS(ctxt, BAD_CAST name, XMLSTAR_NS, value); +} + +static xmlXPathObjectPtr varLookupFallbackToXstarNS(void* ctxt_vp, + const xmlChar* name, const xmlChar* ns_uri) +{ + xmlXPathObjectPtr ret; + xmlXPathContextPtr ctxt = ctxt_vp; + + ctxt->varLookupFunc = NULL; /* avoid infinite recursion! */ + + /* first get the default lookup value */ + ret = xmlXPathVariableLookupNS(ctxt, name, ns_uri); + + if (!ret && !ns_uri) { + /* if we didn't find anything, and there was no namespace given, + try looking in XMLStarlet namespace */ + ret = xmlXPathVariableLookupNS(ctxt, name, XMLSTAR_NS); + } + + ctxt->varLookupFunc = &varLookupFallbackToXstarNS; + return ret; +} + +void +registerXstarNs(xmlXPathContextPtr ctxt) +{ + xmlXPathRegisterVariableLookup(ctxt, &varLookupFallbackToXstarNS, ctxt); + xmlXPathRegisterNs(ctxt, XMLSTAR_NS_PREFIX, XMLSTAR_NS); +} + + +static void bad_ns_opt(const char *msg) +{ + fprintf(stderr, "Bad namespace option: %s\n", msg); + exit(EXIT_BAD_ARGS); +} + +#define MAX_NS_ARGS 256 +xmlChar *ns_arr[2 * MAX_NS_ARGS + 1]; + +/** + * Parse command line for -N <prefix>=<namespace> arguments + */ +int +parseNSArr(xmlChar** ns_arr, int* plen, int argc, char **argv) +{ + int i = 0; + *plen = 0; + ns_arr[0] = 0; + + for (i=0; i<argc; i++) + { + int prefix_len; + xmlChar *name, *value; + const xmlChar *equal_sign; + + /* check for end of arguments */ + if (argv[i] == 0 || argv[i][0] != '-') + break; + if (strcmp(argv[i], "-N") != 0) + continue; + + i++; + if (i >= argc) bad_ns_opt("-N without argument"); + + equal_sign = xmlStrchr((const xmlChar*) argv[i], '='); + if (!equal_sign) + bad_ns_opt("namespace should have the form <prefix>=<url>"); + prefix_len = equal_sign - (const xmlChar*) argv[i]; + + name = xmlStrndup((const xmlChar*) argv[i], prefix_len); + value = xmlStrdup((const xmlChar*) argv[i]+prefix_len+1); + + if (*plen >= MAX_NS_ARGS) + { + fprintf(stderr, "too many namespaces increase MAX_NS_ARGS\n"); + exit(EXIT_BAD_ARGS); + } + + ns_arr[*plen] = name; + (*plen)++; + ns_arr[*plen] = value; + (*plen)++; + ns_arr[*plen] = 0; + + } + + return i; +} + +/** + * Cleanup memory allocated by namespaces arguments + */ +void +cleanupNSArr(xmlChar **ns_arr) +{ + xmlChar **p = ns_arr; + + while (*p) + { + xmlFree(*p); + p++; + } +} diff --git a/src/xml_C14N.c b/src/xml_C14N.c new file mode 100644 index 0000000..4a0becc --- /dev/null +++ b/src/xml_C14N.c @@ -0,0 +1,359 @@ +/* + * $Id: xml_C14N.c,v 1.12 2004/11/24 03:00:10 mgrouch Exp $ + * + * Canonical XML implementation test program + * (http://www.w3.org/TR/2001/REC-xml-c14n-20010315) + * + * See Copyright for the status of this software. + * + * Author: Aleksey Sanin <aleksey@aleksey.com> + */ + +#include <libxml/xmlversion.h> +#include <config.h> + +#if defined(LIBXML_C14N_ENABLED) + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> + +#include <libxml/c14n.h> + +#include "xmlstar.h" + +static void c14nUsage(const char *name, exit_status status) +{ + extern void fprint_c14n_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_c14n_usage(o, name); + fprintf(o, "%s", more_info); + exit(status); +} + +static xmlXPathObjectPtr +load_xpath_expr (xmlDocPtr parent_doc, const char* filename); + +static xmlChar **parse_list(xmlChar *str); + +#if 0 +static void print_xpath_nodes(xmlNodeSetPtr nodes); +#endif + +static int +run_c14n(const char* xml_filename, int with_comments, int exclusive, + const char* xpath_filename, xmlChar **inclusive_namespaces, + int nonet) { + xmlDocPtr doc; + xmlXPathObjectPtr xpath = NULL; + int ret; + + /* + * build an XML tree from a the file; we need to add default + * attributes and resolve all character and entities references + */ + + doc = xmlReadFile(xml_filename, NULL, + XML_PARSE_NOENT | XML_PARSE_DTDLOAD | + XML_PARSE_DTDATTR | (nonet? XML_PARSE_NONET:0)); + if (doc == NULL) { + fprintf(stderr, "Error: unable to parse file \"%s\"\n", xml_filename); + return(EXIT_BAD_FILE); + } + + /* + * Check the document is of the right kind + */ + if(xmlDocGetRootElement(doc) == NULL) { + fprintf(stderr,"Error: empty document for file \"%s\"\n", xml_filename); + xmlFreeDoc(doc); + return(EXIT_BAD_FILE); + } + + /* + * load xpath file if specified + */ + if(xpath_filename) { + xpath = load_xpath_expr(doc, xpath_filename); + if(xpath == NULL) { + fprintf(stderr,"Error: unable to evaluate xpath expression\n"); + xmlFreeDoc(doc); + return(EXIT_BAD_FILE); + } + } + + /* + * Canonical form + */ + set_stdout_binary(); /* avoid line ending conversion */ + ret = xmlC14NDocSave(doc, + (xpath) ? xpath->nodesetval : NULL, + exclusive, inclusive_namespaces, + with_comments, "-", 0); + if(ret < 0) { + fprintf(stderr,"Error: failed to canonicalize XML file \"%s\" (ret=%d)\n", + xml_filename, ret); + xmlFreeDoc(doc); + return(EXIT_FAILURE); + } + + /* + * Cleanup + */ + if(xpath != NULL) xmlXPathFreeObject(xpath); + xmlFreeDoc(doc); + + return(ret >= 0? EXIT_SUCCESS : EXIT_FAILURE); +} + +int c14nMain(int argc, char **argv) { + int ret = -1, nonet = 1; + + /* + * Init libxml + */ + xmlInitParser(); + LIBXML_TEST_VERSION + + /* + * Parse command line and process file + */ + + if (argc > 2 && strcmp(argv[2], "--net") == 0) { + nonet = 0; + /* TODO: parse options properly */ + argc--; + argv++; + } + + if (argc < 4) { + if (argc >= 3) + { + if (strcmp(argv[2], "--help") == 0 || strcmp(argv[2], "-h") == 0) + c14nUsage(argv[0], EXIT_SUCCESS); + } + ret = run_c14n((argc > 2)? argv[2] : "-", 1, 0, NULL, NULL, nonet); + } else if(strcmp(argv[2], "--with-comments") == 0) { + ret = run_c14n(argv[3], 1, 0, (argc > 4) ? argv[4] : NULL, NULL, nonet); + } else if(strcmp(argv[2], "--without-comments") == 0) { + ret = run_c14n(argv[3], 0, 0, (argc > 4) ? argv[4] : NULL, NULL, nonet); + } else if(strcmp(argv[2], "--exc-with-comments") == 0) { + xmlChar **list; + + /* load exclusive namespace from command line */ + list = (argc > 5) ? parse_list((xmlChar *)argv[5]) : NULL; + ret = run_c14n(argv[3], 1, 1, (argc > 4) ? argv[4] : NULL, list, nonet); + if(list != NULL) xmlFree(list); + } else if(strcmp(argv[2], "--exc-without-comments") == 0) { + xmlChar **list; + + /* load exclusive namespace from command line */ + list = (argc > 5) ? parse_list((xmlChar *)argv[5]) : NULL; + ret = run_c14n(argv[3], 0, 1, (argc > 4) ? argv[4] : NULL, list, nonet); + if(list != NULL) xmlFree(list); + } else { + fprintf(stderr, "error: bad arguments.\n"); + c14nUsage(argv[0], EXIT_BAD_ARGS); + } + + /* + * Shutdown libxml + */ + xmlCleanupParser(); + xmlMemoryDump(); + + return ret; +} + +/* + * Macro used to grow the current buffer. + */ +#define growBufferReentrant() { \ + buffer_size *= 2; \ + buffer = (xmlChar **) \ + xmlRealloc(buffer, buffer_size * sizeof(xmlChar*)); \ + if (buffer == NULL) { \ + perror("realloc failed"); \ + return(NULL); \ + } \ +} + +static xmlChar ** +parse_list(xmlChar *str) { + xmlChar **buffer; + xmlChar **out = NULL; + int buffer_size = 0; + int len; + + if(str == NULL) { + return(NULL); + } + + len = xmlStrlen(str); + if((str[0] == '\'') && (str[len - 1] == '\'')) { + str[len - 1] = '\0'; + str++; + len -= 2; + } + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = xmlMalloc(buffer_size * sizeof(xmlChar*)); + out = buffer; + + while(*str != '\0') { + if (out - buffer > buffer_size - 10) { + int indx = out - buffer; + + growBufferReentrant(); + out = &buffer[indx]; + } + (*out++) = str; + while(*str != ',' && *str != '\0') ++str; + if(*str == ',') *(str++) = '\0'; + } + (*out) = NULL; + return buffer; +} + +static xmlXPathObjectPtr +load_xpath_expr (xmlDocPtr parent_doc, const char* filename) { + xmlXPathObjectPtr xpath; + xmlDocPtr doc; + xmlChar *expr; + xmlXPathContextPtr ctx; + xmlNodePtr node; + xmlNsPtr ns; + + /* + * load XPath expr as a file + */ + xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + xmlSubstituteEntitiesDefault(1); + + doc = xmlReadFile(filename, NULL, XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR); + if (doc == NULL) { + fprintf(stderr, "Error: unable to parse file \"%s\"\n", filename); + return(NULL); + } + + /* + * Check the document is of the right kind + */ + if(xmlDocGetRootElement(doc) == NULL) { + fprintf(stderr,"Error: empty document for file \"%s\"\n", filename); + xmlFreeDoc(doc); + return(NULL); + } + + node = doc->children; + while(node != NULL && !xmlStrEqual(node->name, (const xmlChar *)"XPath")) { + node = node->next; + } + + if(node == NULL) { + fprintf(stderr,"Error: XPath element expected in the file \"%s\"\n", filename); + xmlFreeDoc(doc); + return(NULL); + } + + expr = xmlNodeGetContent(node); + if(expr == NULL) { + fprintf(stderr,"Error: XPath content element is NULL \"%s\"\n", filename); + xmlFreeDoc(doc); + return(NULL); + } + + ctx = xmlXPathNewContext(parent_doc); + if(ctx == NULL) { + fprintf(stderr,"Error: unable to create new context\n"); + xmlFree(expr); + xmlFreeDoc(doc); + return(NULL); + } + + /* + * Register namespaces + */ + ns = node->nsDef; + while(ns != NULL) { + if(xmlXPathRegisterNs(ctx, ns->prefix, ns->href) != 0) { + fprintf(stderr,"Error: unable to register NS with prefix=\"%s\" and href=\"%s\"\n", ns->prefix, ns->href); + xmlFree(expr); + xmlXPathFreeContext(ctx); + xmlFreeDoc(doc); + return(NULL); + } + ns = ns->next; + } + + /* + * Evaluate xpath + */ + xpath = xmlXPathEvalExpression(expr, ctx); + if(xpath == NULL) { + fprintf(stderr,"Error: unable to evaluate xpath expression\n"); + xmlFree(expr); + xmlXPathFreeContext(ctx); + xmlFreeDoc(doc); + return(NULL); + } + + /* print_xpath_nodes(xpath->nodesetval); */ + + xmlFree(expr); + xmlXPathFreeContext(ctx); + xmlFreeDoc(doc); + return(xpath); +} + +#if 0 +static void +print_xpath_nodes(xmlNodeSetPtr nodes) { + xmlNodePtr cur; + int i; + + if(nodes == NULL ){ + fprintf(stderr, "Error: no nodes set defined\n"); + return; + } + + fprintf(stderr, "Nodes Set:\n-----\n"); + for(i = 0; i < nodes->nodeNr; ++i) { + if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { + xmlNsPtr ns; + + ns = (xmlNsPtr)nodes->nodeTab[i]; + cur = (xmlNodePtr)ns->next; + fprintf(stderr, "namespace \"%s\"=\"%s\" for node %s:%s\n", + ns->prefix, ns->href, + (cur->ns) ? cur->ns->prefix : BAD_CAST "", cur->name); + } else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) { + cur = nodes->nodeTab[i]; + fprintf(stderr, "element node \"%s:%s\"\n", + (cur->ns) ? cur->ns->prefix : BAD_CAST "", cur->name); + } else { + cur = nodes->nodeTab[i]; + fprintf(stderr, "node \"%s\": type %d\n", cur->name, cur->type); + } + } +} +#endif + +#else +#include <stdio.h> +int c14nMain(int argc, char **argv) { + printf("%s : XPath/Canonicalization support not compiled in\n", argv[0]); + return 2; +} +#endif /* LIBXML_C14N_ENABLED */ + diff --git a/src/xml_depyx.c b/src/xml_depyx.c new file mode 100644 index 0000000..1935fc0 --- /dev/null +++ b/src/xml_depyx.c @@ -0,0 +1,242 @@ +/* $Id: xml_depyx.c,v 1.8 2005/03/12 03:24:23 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include <libxml/xmlmemory.h> + +#include "xmlstar.h" +#include "escape.h" + +#define INSZ 4*1024 + +static void +depyxUsage(int argc, char **argv, exit_status status) +{ + extern void fprint_depyx_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_depyx_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * Decode PYX string + * + */ +void +pyxDecode(char *str, xml_C14NNormalizationMode mode) +{ + while (*str) + { + if ((*str == '\\') && (*(str+1) == 'n')) + { + printf("\n"); + str++; + } + else if ((*str == '\\') && (*(str+1) == 't')) + { + printf("\t"); + str++; + } + else if ((*str == '\\') && (*(str+1) == '\\')) + { + printf("\\"); + str++; + } + else + { + if ((*str == '<') && ((mode == XML_C14N_NORMALIZE_ATTR) || + (mode == XML_C14N_NORMALIZE_TEXT))) { + printf("<"); + } + else if ((*str == '>') && (mode == XML_C14N_NORMALIZE_TEXT)) { + printf(">"); + } + else if ((*str == '&') && ((mode == XML_C14N_NORMALIZE_ATTR) || + (mode == XML_C14N_NORMALIZE_TEXT))) { + printf("&"); + } + else if ((*str == '"') && (mode == XML_C14N_NORMALIZE_ATTR)) { + printf("""); + } + else { + printf("%c", *str); + } + } + str++; + } +} + +/** + * Decode PYX file + * + */ +int +pyxDePyx(char *file) +{ + static char line[INSZ]; + FILE *in = stdin; + + if (strcmp(file, "-")) + { + in = fopen(file, "r"); + if (in == NULL) + { + fprintf(stderr, "error: could not open: %s\n", file); + exit(EXIT_BAD_FILE); + } + } + + while (!feof(in)) + { + if (fgets(line, INSZ - 1, in)) + { + if(line[strlen(line)-1] == '\n') line[strlen(line)-1] = '\0'; + + while (line[0] == '(') + { + printf("<%s", line+1); + if (!feof(in)) + { + if (fgets(line, INSZ - 1, in)) + { + if(line[strlen(line)-1] == '\n') line[strlen(line)-1] = '\0'; + + while(line[0] == 'A') /* attribute */ + { + char *value; + + printf(" "); + value = line+1; + while(*value && (*value != ' ')) + { + printf("%c", *value); + value++; + } + if (*value == ' ') + { + value++; + printf("=\""); + pyxDecode(value, XML_C14N_NORMALIZE_ATTR); /* attribute value */ + printf("\""); + } + if (!feof(in)) + { + if (fgets(line, INSZ - 1, in)) + { + if(line[strlen(line)-1] == '\n') line[strlen(line)-1] = '\0'; + } + } + } + printf(">"); + } + } + } + + if (line[0] == '-') + { + /* text */ + pyxDecode(line+1, XML_C14N_NORMALIZE_TEXT); + } + else if (line[0] == '?') + { + /* processing instruction */ + printf("<?"); + pyxDecode(line+1, XML_C14N_NORMALIZE_TEXT); + printf("?>"); + printf("\n"); /* is this correct? */ + } + else if (line[0] == 'D') + { + /* processing instruction */ + printf("<!DOCTYPE"); + pyxDecode(line+1, XML_C14N_NORMALIZE_TEXT); + printf(">"); + printf("\n"); /* is this correct? */ + } + else if (line[0] == 'C') + { + /* comment */ + printf("<!--"); + pyxDecode(line+1, XML_C14N_NORMALIZE_TEXT); + printf("-->"); + printf("\n"); /* is this correct? */ + } + else if (line[0] == '[') + { + /* CDATA */ + printf("<![CDATA["); + pyxDecode(line+1, XML_C14N_NORMALIZE_NOTHING); + printf("]]>"); + printf("\n"); /* is this correct? */ + } + else if (line[0] == ')') + { + printf("</%s>", line+1); + } + } + } + + return EXIT_SUCCESS; +} + +/** + * Main function for 'de-PYX' + * + */ +int +depyxMain(int argc, char **argv) +{ + int ret = EXIT_SUCCESS; + + if ((argc >= 3) && (!strcmp(argv[2], "-h") || !strcmp(argv[2], "--help"))) + { + depyxUsage(argc, argv, EXIT_SUCCESS); + } + else if (argc == 3) + { + ret = pyxDePyx(argv[2]); + } + else if (argc == 2) + { + ret = pyxDePyx("-"); + } + else + { + depyxUsage(argc, argv, EXIT_BAD_ARGS); + } + + printf("\n"); + + return ret; +} + diff --git a/src/xml_edit.c b/src/xml_edit.c new file mode 100644 index 0000000..d564f3c --- /dev/null +++ b/src/xml_edit.c @@ -0,0 +1,742 @@ +/* $Id: xml_edit.c,v 1.45 2005/01/08 00:07:03 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include <libxml/xmlmemory.h> +#include <libxml/debugXML.h> +#include <libxml/xmlsave.h> +#include <libxml/HTMLtree.h> +#include <libxml/xinclude.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/xpointer.h> +#include <libxml/parserInternals.h> +#include <libxml/uri.h> +#include <libexslt/exslt.h> + +#include "xmlstar.h" + +/* + TODO: + 1. Should this be allowed ? + ./xml ed -m /xml /xml/table/rec/object ../examples/xml/tab-obj.xml +*/ + +typedef struct _edOptions { /* Global 'edit' options */ + int noblanks; /* Remove insignificant spaces from XML tree */ + int preserveFormat; /* Preserve original XML formatting */ + int omit_decl; /* Omit XML declaration line <?xml version="1.0"?> */ + int inplace; /* Edit file inplace (no output on stdout) */ + int nonet; /* Disallow network access */ +} edOptions; + +typedef edOptions *edOptionsPtr; + +typedef enum _XmlEdOp { + XML_ED_DELETE, + XML_ED_VAR, + XML_ED_INSERT, + XML_ED_APPEND, + XML_ED_UPDATE, + XML_ED_RENAME, + XML_ED_MOVE, + XML_ED_SUBNODE +} XmlEdOp; + +/* TODO ??? */ +typedef enum _XmlNodeType { + XML_UNDEFINED, + XML_ATTR, + XML_ELEM, + XML_TEXT, + XML_COMT, + XML_CDATA, + XML_EXPR +} XmlNodeType; + +typedef struct { + char shortOpt; + const char* longOpt; /* include "--" */ + XmlNodeType type; +} OptionSpec; + +static const OptionSpec + OPT_VAL_OR_EXP[] = { + {'x', "--expr", XML_EXPR}, + {'v', "--value", XML_TEXT} + }, + OPT_JUST_VAL[] = { + {'v', "--value", XML_TEXT} + }, + OPT_JUST_TYPE[] = { + {'t', "--type"} + }, + OPT_NODE_TYPE[] = { + {0, "elem", XML_ELEM}, + {0, "attr", XML_ATTR}, + {0, "text", XML_TEXT} + }, + OPT_JUST_NAME[] = { + {'n', "--name"} + }; + + +typedef const char* XmlEdArg; + +typedef struct _XmlEdAction { + XmlEdOp op; + XmlEdArg arg1; + XmlEdArg arg2; + XmlEdArg arg3; + XmlNodeType type; +} XmlEdAction; + +/** + * display short help message + */ +static void +edUsage(const char *argv0, exit_status status) +{ + extern void fprint_edit_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_edit_usage(o, argv0); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * Initialize global command line options + */ +static void +edInitOptions(edOptionsPtr ops) +{ + ops->noblanks = 1; + ops->omit_decl = 0; + ops->preserveFormat = 0; + ops->inplace = 0; + ops->nonet = 1; +} + +/** + * Parse global command line options + */ +static int +edParseOptions(edOptionsPtr ops, int argc, char **argv) +{ + int i; + + i = 2; + while((i < argc) && (argv[i][0] == '-')) + { + if (!strcmp(argv[i], "-S") || !strcmp(argv[i], "--ps")) + { + ops->noblanks = 0; /* preserve spaces */ + } + else if (!strcmp(argv[i], "-P") || !strcmp(argv[i], "--pf")) + { + ops->preserveFormat = 1; /* preserve format */ + } + else if (!strcmp(argv[i], "-O") || !strcmp(argv[i], "--omit-decl")) + { + ops->omit_decl = 1; + } + else if (!strcmp(argv[i], "-L") || !strcmp(argv[i], "--inplace")) + { + ops->inplace = 1; + } + else if (!strcmp(argv[i], "--net")) + { + ops->nonet = 0; + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h") || + !strcmp(argv[i], "-?") || !strcmp(argv[i], "-Z")) + { + edUsage(argv[0], EXIT_SUCCESS); + } + else + { + break; + } + i++; + } + + return i; +} + +/** + * register the namespace from @ns_arr to @ctxt + */ +static void +nsarr_xpath_register(xmlXPathContextPtr ctxt) +{ + int ns; + for (ns = 0; ns_arr[ns]; ns += 2) { + xmlXPathRegisterNs(ctxt, ns_arr[ns], ns_arr[ns+1]); + } +} + +/** + * register top-level namespace definitions from @doc to @ctxt + */ +static void +extract_ns_defs(xmlDocPtr doc, xmlXPathContextPtr ctxt) +{ + xmlNsPtr nsDef; + xmlNodePtr root = xmlDocGetRootElement(doc); + if (!root) return; + + for (nsDef = root->nsDef; nsDef; nsDef = nsDef->next) { + if (nsDef->prefix != NULL) /* can only register ns with prefix */ + xmlXPathRegisterNs(ctxt, nsDef->prefix, nsDef->href); + } +} + +static void +update_string(xmlDocPtr doc, xmlNodePtr dest, const xmlChar* newstr) +{ + /* TODO: do we need xmlEncodeEntitiesReentrant() too/instead? */ + xmlChar* string = xmlEncodeSpecialChars(doc, newstr); + xmlNodeSetContent(dest, string); + xmlFree(string); +} + +/** + * 'update' operation + */ +static void +edUpdate(xmlDocPtr doc, xmlNodeSetPtr nodes, const char *val, + XmlNodeType type, xmlXPathContextPtr ctxt) +{ + int i; + xmlXPathCompExprPtr xpath = NULL; + + if (type == XML_EXPR) { + xpath = xmlXPathCompile((const xmlChar*) val); + if (!xpath) return; + } + + for (i = 0; i < nodes->nodeNr; i++) + { + /* update node */ + if (type == XML_EXPR) { + xmlXPathObjectPtr res; + + ctxt->node = nodes->nodeTab[i]; + res = xmlXPathCompiledEval(xpath, ctxt); + if (res->type == XPATH_NODESET || res->type == XPATH_XSLT_TREE) { + int j; + xmlNodePtr oldChild; + xmlNodeSetPtr oldChildren = xmlXPathNodeSetCreate(NULL); + /* NOTE: newChildren can be NULL for empty result set */ + xmlNodeSetPtr newChildren = res->nodesetval; + + /* NOTE: nodes can be both oldChildren and newChildren */ + + /* unlink the old children */ + for (oldChild = nodes->nodeTab[i]->children; oldChild; oldChild = oldChild->next) { + xmlUnlinkNode(oldChild); + /* we can't free it now because an oldChild can also be + newChild! just put it in the list */ + xmlXPathNodeSetAdd(oldChildren, oldChild); + } + + /* add the new children */ + for (j = 0; newChildren && j < newChildren->nodeNr; j++) { + xmlNodePtr node = newChildren->nodeTab[j]; + xmlAddChild(nodes->nodeTab[i], + /* if node is linked to this doc we need to copy */ + (node->doc == doc)? xmlDocCopyNode(node, doc, 1) : node); + newChildren->nodeTab[j] = NULL; + } + newChildren->nodeNr = 0; + + /* NOTE: if any oldChildren were newChildren, they've been + copied so we can free them all now */ + for (j = 0; j < oldChildren->nodeNr; j++) { + xmlFreeNode(oldChildren->nodeTab[j]); + oldChildren->nodeTab[j] = NULL; + } + oldChildren->nodeNr = 0; + xmlXPathFreeNodeSet(oldChildren); + } else { + res = xmlXPathConvertString(res); + update_string(doc, nodes->nodeTab[i], res->stringval); + } + xmlXPathFreeObject(res); + } else { + update_string(doc, nodes->nodeTab[i], (const xmlChar*) val); + } + } + + xmlXPathFreeCompExpr(xpath); +} + +/* holds the node that was last inserted */ +static xmlNodeSetPtr previous_insertion; + +/** + * We must not keep free'd nodes in @previous_insertion. + * This is a callback from xmlFreeNode() + */ +static void +removeNodeFromPrev(xmlNodePtr node) +{ + xmlXPathNodeSetDel(previous_insertion, node); +} + +/** + * 'insert' operation + */ +static void +edInsert(xmlDocPtr doc, xmlNodeSetPtr nodes, const char *val, const char *name, + XmlNodeType type, int mode) +{ + int i; + + xmlXPathEmptyNodeSet(previous_insertion); + + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNodePtr node; + + if (nodes->nodeTab[i] == (void*) doc && mode != 0) { + fprintf(stderr, "The document node cannot have siblings.\n"); + exit(EXIT_INTERNAL_ERROR); + } + + /* update node */ + if (type == XML_ATTR) + { + node = (xmlNodePtr) xmlNewProp(nodes->nodeTab[i], BAD_CAST name, BAD_CAST val); + } + else if (type == XML_ELEM) + { + node = xmlNewDocNode(doc, NULL /* TODO: NS */, BAD_CAST name, BAD_CAST val); + if (mode > 0) + xmlAddNextSibling(nodes->nodeTab[i], node); + else if (mode < 0) + xmlAddPrevSibling(nodes->nodeTab[i], node); + else + xmlAddChild(nodes->nodeTab[i], node); + } + else if (type == XML_TEXT) + { + node = xmlNewDocText(doc, BAD_CAST val); + if (mode > 0) + xmlAddNextSibling(nodes->nodeTab[i], node); + else if (mode < 0) + xmlAddPrevSibling(nodes->nodeTab[i], node); + else + xmlAddChild(nodes->nodeTab[i], node); + } + xmlXPathNodeSetAdd(previous_insertion, node); + } +} + +/** + * 'rename' operation + */ +static void +edRename(xmlDocPtr doc, xmlNodeSetPtr nodes, const char *val, XmlNodeType type) +{ + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + if (nodes->nodeTab[i] == (void*) doc) { + fprintf(stderr, "The document node cannot be renamed.\n"); + exit(EXIT_INTERNAL_ERROR); + } + xmlNodeSetName(nodes->nodeTab[i], BAD_CAST val); + } +} + +/** + * 'delete' operation + */ +static void +edDelete(xmlDocPtr doc, xmlNodeSetPtr nodes) +{ + int i; + for (i = nodes->nodeNr - 1; i >= 0; i--) + { + if (nodes->nodeTab[i] == (void*) doc) { + fprintf(stderr, "The document node cannot be deleted.\n"); + exit(EXIT_INTERNAL_ERROR); + } + + if (nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { + fprintf(stderr, "FIXME: can't delete namespace nodes\n"); + exit(EXIT_INTERNAL_ERROR); + } + /* delete node */ + xmlUnlinkNode(nodes->nodeTab[i]); + + /* Free node and children */ + xmlFreeNode(nodes->nodeTab[i]); + nodes->nodeTab[i] = NULL; + } +} + +/** + * 'move' operation + */ +static void +edMove(xmlDocPtr doc, xmlNodeSetPtr nodes, xmlNodePtr to) +{ + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + if (nodes->nodeTab[i] == (void*) doc) { + fprintf(stderr, "The document node cannot be moved.\n"); + exit(EXIT_INTERNAL_ERROR); + } + + if (nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { + fprintf(stderr, "FIXME: can't move namespace nodes\n"); + exit(EXIT_INTERNAL_ERROR); + } + /* move node */ + xmlUnlinkNode(nodes->nodeTab[i]); + xmlAddChild(to, nodes->nodeTab[i]); + } +} + +/** + * Loop through array of operations and perform them + */ +static void +edProcess(xmlDocPtr doc, const XmlEdAction* ops, int ops_count) +{ + int k; + xmlXPathContextPtr ctxt = xmlXPathNewContext(doc); + /* NOTE: later registrations override earlier ones */ + registerXstarNs(ctxt); + + /* variables */ + previous_insertion = xmlXPathNodeSetCreate(NULL); + registerXstarVariable(ctxt, "prev", + xmlXPathWrapNodeSet(previous_insertion)); + xmlDeregisterNodeDefault(&removeNodeFromPrev); + +#if HAVE_EXSLT_XPATH_REGISTER + /* register extension functions */ + exsltDateXpathCtxtRegister(ctxt, BAD_CAST "date"); + exsltMathXpathCtxtRegister(ctxt, BAD_CAST "math"); + exsltSetsXpathCtxtRegister(ctxt, BAD_CAST "set"); + exsltStrXpathCtxtRegister(ctxt, BAD_CAST "str"); +#endif + /* namespaces from doc */ + extract_ns_defs(doc, ctxt); + /* namespaces from command line */ + nsarr_xpath_register(ctxt); + + for (k = 0; k < ops_count; k++) + { + xmlXPathObjectPtr res; + xmlNodeSetPtr nodes; + + /* NOTE: to make relative paths match as if from "/", set context to + document; setting to root would match as if from "/node()/" */ + ctxt->node = (xmlNodePtr) doc; + + if (ops[k].op == XML_ED_VAR) { + res = xmlXPathEvalExpression(BAD_CAST ops[k].arg2, ctxt); + xmlXPathRegisterVariable(ctxt, BAD_CAST ops[k].arg1, res); + continue; + } + + res = xmlXPathEvalExpression(BAD_CAST ops[k].arg1, ctxt); + if (!res || res->type != XPATH_NODESET || !res->nodesetval) continue; + nodes = res->nodesetval; + + switch (ops[k].op) + { + case XML_ED_DELETE: + edDelete(doc, nodes); + break; + case XML_ED_MOVE: { + xmlXPathObjectPtr res_to; + ctxt->node = (xmlNodePtr) doc; + res_to = xmlXPathEvalExpression(BAD_CAST ops[k].arg2, ctxt); + if (!res_to + || res_to->type != XPATH_NODESET + || res_to->nodesetval->nodeNr != 1) { + fprintf(stderr, "move destination is not a single node\n"); + continue; + } + edMove(doc, nodes, res_to->nodesetval->nodeTab[0]); + xmlXPathFreeObject(res_to); + break; + } + case XML_ED_UPDATE: + edUpdate(doc, nodes, ops[k].arg2, ops[k].type, ctxt); + break; + case XML_ED_RENAME: + edRename(doc, nodes, ops[k].arg2, ops[k].type); + break; + case XML_ED_INSERT: + edInsert(doc, nodes, ops[k].arg2, ops[k].arg3, ops[k].type, -1); + break; + case XML_ED_APPEND: + edInsert(doc, nodes, ops[k].arg2, ops[k].arg3, ops[k].type, 1); + break; + case XML_ED_SUBNODE: + edInsert(doc, nodes, ops[k].arg2, ops[k].arg3, ops[k].type, 0); + break; + default: + break; + } + xmlXPathFreeObject(res); + } + /* NOTE: free()ing ctxt also free()s previous_insertion */ + previous_insertion = NULL; + xmlDeregisterNodeDefault(NULL); + + xmlXPathFreeContext(ctxt); +} + +/** + * Output document + */ +static void +edOutput(const char* filename, const XmlEdAction* ops, int ops_count, + const edOptions* g_ops) +{ + xmlDocPtr doc; + int save_options = +#if LIBXML_VERSION >= 20708 + (g_ops->noblanks? 0 : XML_SAVE_WSNONSIG) | +#endif + (g_ops->preserveFormat? 0 : XML_SAVE_FORMAT) | + (g_ops->omit_decl? XML_SAVE_NO_DECL : 0); + int read_options = + (g_ops->nonet? XML_PARSE_NONET : 0); + xmlSaveCtxtPtr save; + + doc = xmlReadFile(filename, NULL, read_options); + if (!doc) + { + cleanupNSArr(ns_arr); + xmlCleanupParser(); + xmlCleanupGlobals(); + exit(EXIT_BAD_FILE); + } + + edProcess(doc, ops, ops_count); + + /* avoid getting ASCII CRs in UTF-16/UCS-(2,4) text */ + if ((xmlStrcasestr(doc->encoding, BAD_CAST "UTF") == 0 + && xmlStrcasestr(doc->encoding, BAD_CAST "16") == 0) + || + (xmlStrcasestr(doc->encoding, BAD_CAST "UCS") == 0 + && (xmlStrcasestr(doc->encoding, BAD_CAST "2") == 0 + || + xmlStrcasestr(doc->encoding, BAD_CAST "4") == 0))) + { + set_stdout_binary(); + } + + save = xmlSaveToFilename(g_ops->inplace? filename : "-", NULL, save_options); + xmlSaveDoc(save, doc); + xmlSaveClose(save); + + xmlFreeDoc(doc); +} + +/** + * get next command line arg, or print error exit and exit if there isn't one + * @returns pointer to the arg + * @argi is incremented + */ +static const char* +nextArg(char *const*const argv, int *argi) +{ + const char *arg = argv[*argi]; + if (arg == NULL) + { + edUsage(argv[0], EXIT_BAD_ARGS); + } + *argi += 1; + return arg; +} + +/** + * like nextArg(), but additionally look for next arg in @choices + */ +static XmlNodeType +parseNextArg(char *const*const argv, int *argi, + const OptionSpec choices[], int choices_count) +{ + const char* arg = nextArg(argv, argi); + int i; + for (i = 0; i < choices_count; i++) { + if ((arg[0] == '-' && arg[1] == choices[i].shortOpt) || + (strcmp(arg, choices[i].longOpt) == 0)) + return choices[i].type; + } + edUsage(argv[0], EXIT_BAD_ARGS); + return 0; /* never reach here */ +} +#define parseNextArg(argv, argi, choices) \ + parseNextArg(argv, argi, choices, COUNT_OF(choices)) + + +/** --insert, --append, and --subnode all take the same arguments */ +static void +parseInsertionArgs(XmlEdOp op_type, XmlEdAction* op, + char *const*const argv, int *argi) +{ + op->op = op_type; + op->arg1 = nextArg(argv, argi); + parseNextArg(argv, argi, OPT_JUST_TYPE); + op->type = parseNextArg(argv, argi, OPT_NODE_TYPE); + parseNextArg(argv, argi, OPT_JUST_NAME); + op->arg3 = nextArg(argv, argi); + parseNextArg(argv, argi, OPT_JUST_VAL); + op->arg2 = nextArg(argv, argi); +} + +/** + * This is the main function for 'edit' option + */ +int +edMain(int argc, char **argv) +{ + int i, ops_count, max_ops_count = 8, n, start = 0; + XmlEdAction* ops = xmlMalloc(sizeof(XmlEdAction) * max_ops_count); + static edOptions g_ops; + int nCount = 0; + + if (argc < 3) edUsage(argv[0], EXIT_BAD_ARGS); + + edInitOptions(&g_ops); + start = edParseOptions(&g_ops, argc, argv); + + parseNSArr(ns_arr, &nCount, argc-start, argv+start); + + /* + * Parse command line and fill array of operations + */ + ops_count = 0; + i = start + nCount; + + while (i < argc) + { + const char *arg = nextArg(argv, &i); + if (arg[0] == '-') + { + if (ops_count >= max_ops_count) + { + max_ops_count *= 2; + ops = xmlRealloc(ops, sizeof(XmlEdAction) * max_ops_count); + } + ops[ops_count].type = XML_UNDEFINED; + + if (!strcmp(arg, "-d") || !strcmp(arg, "--delete")) + { + ops[ops_count].op = XML_ED_DELETE; + ops[ops_count].arg1 = nextArg(argv, &i); + ops[ops_count].arg2 = 0; + } + else if (!strcmp(arg, "--var")) + { + ops[ops_count].op = XML_ED_VAR; + ops[ops_count].arg1 = nextArg(argv, &i); + ops[ops_count].arg2 = nextArg(argv, &i); + } + else if (!strcmp(arg, "-m") || !strcmp(arg, "--move")) + { + ops[ops_count].op = XML_ED_MOVE; + ops[ops_count].arg1 = nextArg(argv, &i); + ops[ops_count].arg2 = nextArg(argv, &i); + } + else if (!strcmp(arg, "-u") || !strcmp(arg, "--update")) + { + ops[ops_count].op = XML_ED_UPDATE; + ops[ops_count].arg1 = nextArg(argv, &i); + ops[ops_count].type = parseNextArg(argv, &i, OPT_VAL_OR_EXP); + ops[ops_count].arg2 = nextArg(argv, &i); + } + else if (!strcmp(arg, "-r") || !strcmp(arg, "--rename")) + { + ops[ops_count].op = XML_ED_RENAME; + ops[ops_count].arg1 = nextArg(argv, &i); + ops[ops_count].type = parseNextArg(argv, &i, OPT_JUST_VAL); + ops[ops_count].arg2 = nextArg(argv, &i); + } + else if (!strcmp(arg, "-i") || !strcmp(arg, "--insert")) + { + parseInsertionArgs(XML_ED_INSERT, &ops[ops_count], argv, &i); + } + else if (!strcmp(arg, "-a") || !strcmp(arg, "--append")) + { + parseInsertionArgs(XML_ED_APPEND, &ops[ops_count], argv, &i); + } + else if (!strcmp(arg, "-s") || !strcmp(arg, "--subnode")) + { + parseInsertionArgs(XML_ED_SUBNODE, &ops[ops_count], argv, &i); + } + else + { + fprintf(stderr, "Warning: unrecognized option '%s'\n", arg); + } + ops_count++; + } + else + { + i--; /* it was a filename, we didn't use it */ + break; + } + } + + xmlKeepBlanksDefault(0); + + if ((!g_ops.noblanks) || g_ops.preserveFormat) xmlKeepBlanksDefault(1); + + if (i >= argc) + { + edOutput("-", ops, ops_count, &g_ops); + } + + for (n=i; n<argc; n++) + { + edOutput(argv[n], ops, ops_count, &g_ops); + } + + xmlFree(ops); + cleanupNSArr(ns_arr); + xmlCleanupParser(); + xmlCleanupGlobals(); + return 0; +} diff --git a/src/xml_elem.c b/src/xml_elem.c new file mode 100644 index 0000000..024e62a --- /dev/null +++ b/src/xml_elem.c @@ -0,0 +1,286 @@ +/* $Id: xml_elem.c,v 1.23 2004/11/21 23:40:40 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <libxml/xmlstring.h> +#include <libxml/hash.h> +#include <stdlib.h> +#include <string.h> + +#include "xmlstar.h" +#include "escape.h" + +/* TODO: + + 2. Option to display this only for nodes matching + an XPATH expression + + -p <xpath> + + so it will be able to deal with subtrees as well + +*/ + +typedef struct _elOptions { + int show_attr; /* show attributes */ + int show_attr_and_val; /* show attributes and values */ + int sort_uniq; /* do sort and uniq on output */ + int check_depth; /* limit depth */ +} elOptions; + + +static elOptions elOps; +static xmlHashTablePtr uniq = NULL; +static xmlChar *curXPath = NULL; + +/** + * Display usage syntax + */ +void +elUsage(int argc, char **argv, exit_status status) +{ + extern void fprint_elem_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_elem_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * read file and print element paths + */ +int +parse_xml_file(const char *filename) +{ + int ret, prev_depth = 0; + xmlTextReaderPtr reader; + + for (reader = xmlReaderForFile(filename, NULL, 0);;) + { + int depth; + const xmlChar *name; + xmlReaderTypes type; + + if (!reader) { + fprintf(stderr, "couldn't read file '%s'\n", filename); + exit(EXIT_BAD_FILE); + } + + ret = xmlTextReaderRead(reader); + if (ret <= 0) break; + type = xmlTextReaderNodeType(reader); + depth = xmlTextReaderDepth(reader); + name = xmlTextReaderConstName(reader); + + if (type != XML_READER_TYPE_ELEMENT) + continue; + + while (curXPath && depth <= prev_depth) + { + xmlChar *slash = BAD_CAST strrchr((char*) curXPath, '/'); + if (slash) *slash = '\0'; + prev_depth--; + } + prev_depth = depth; + + if (depth > 0) curXPath = xmlStrcat(curXPath, BAD_CAST "/"); + curXPath = xmlStrcat(curXPath, name); + + if (elOps.show_attr) + { + int have_attr; + + fprintf(stdout, "%s\n", curXPath); + for (have_attr = xmlTextReaderMoveToFirstAttribute(reader); + have_attr; + have_attr = xmlTextReaderMoveToNextAttribute(reader)) + { + const xmlChar *aname = xmlTextReaderConstName(reader); + fprintf(stdout, "%s/@%s\n", curXPath, aname); + } + } + else if (elOps.show_attr_and_val) + { + fprintf(stdout, "%s", curXPath); + if (xmlTextReaderHasAttributes(reader)) + { + int have_attr, first = 1; + fprintf(stdout, "["); + for (have_attr = xmlTextReaderMoveToFirstAttribute(reader); + have_attr; + have_attr = xmlTextReaderMoveToNextAttribute(reader)) + { + const xmlChar *aname = xmlTextReaderConstName(reader), + *avalue = xmlTextReaderConstValue(reader); + char quote; + if (!first) + fprintf(stdout, " and "); + first = 0; + + quote = xmlStrchr(avalue, '\'')? '"' : '\''; + fprintf(stdout, "@%s=%c%s%c", aname, quote, avalue, quote); + } + fprintf(stdout, "]"); + } + fprintf(stdout, "\n"); + } + else if (elOps.sort_uniq) + { + if ((elOps.check_depth == 0) || (elOps.check_depth != 0 && depth < elOps.check_depth)) + { + xmlHashAddEntry(uniq, curXPath, (void*) 1); + } + } + else fprintf(stdout, "%s\n", curXPath); + + } + + return ret == -1? EXIT_LIB_ERROR : ret; +} + +/** + * Initialize options values + */ +void +elInitOptions(elOptions *ops) +{ + ops->show_attr = 0; + ops->show_attr_and_val = 0; + ops->sort_uniq = 0; + ops->check_depth = 0; +} + +typedef struct { + xmlChar **array; + int offset; +} ArrayDest; + +/** + * put @name into @data->array[@data->offset] + */ +static void +hash_key_put(void *payload, void *data, xmlChar *name) +{ + ArrayDest *dest = data; + dest->array[dest->offset++] = name; +} + +/** + * a compare function for qsort + * takes pointers to 2 xmlChar* and compares them + */ +static int +compare_string_ptr(const void *p1, const void *p2) +{ + typedef xmlChar const *const xmlCChar; + xmlCChar *str1 = p1, *str2 = p2; + return xmlStrcmp(*str1, *str2); +} + +/** + * This is the main function for 'el' option + */ +int +elMain(int argc, char **argv) +{ + int errorno = 0; + char* inp_file = "-"; + + if (argc <= 1) elUsage(argc, argv, EXIT_BAD_ARGS); + + elInitOptions(&elOps); + + if (argc == 2) + errorno = parse_xml_file("-"); + else + { + if (!strcmp(argv[2], "--help") || !strcmp(argv[2], "-h") || + !strcmp(argv[2], "-?") || !strcmp(argv[2], "-Z")) + { + elUsage(argc, argv, EXIT_SUCCESS); + } + else if (!strcmp(argv[2], "-a")) + { + elOps.show_attr = 1; + if (argc >= 4) inp_file = argv[3]; + errorno = parse_xml_file(inp_file); + } + else if (!strcmp(argv[2], "-v")) + { + elOps.show_attr_and_val = 1; + if (argc >= 4) inp_file = argv[3]; + errorno = parse_xml_file(inp_file); + } + else if (!strcmp(argv[2], "-u")) + { + elOps.sort_uniq = 1; + if (argc >= 4) inp_file = argv[3]; + uniq = xmlHashCreate(0); + errorno = parse_xml_file(inp_file); + } + else if (!strncmp(argv[2], "-d", 2)) + { + elOps.check_depth = atoi(argv[2]+2); + /* printf("Checking depth (%d)\n", elOps.check_depth); */ + elOps.sort_uniq = 1; + if (argc >= 4) inp_file = argv[3]; + uniq = xmlHashCreate(0); + errorno = parse_xml_file(inp_file); + } + else if (argv[2][0] != '-') + { + errorno = parse_xml_file(argv[2]); + } + else + elUsage(argc, argv, EXIT_BAD_ARGS); + } + + if (uniq) + { + int i; + ArrayDest lines; + lines.array = xmlMalloc(sizeof(xmlChar*) * xmlHashSize(uniq)); + lines.offset = 0; + xmlHashScan(uniq, hash_key_put, &lines); + + qsort(lines.array, lines.offset, sizeof(xmlChar*), compare_string_ptr); + + for (i = 0; i < lines.offset; i++) + { + printf("%s\n", lines.array[i]); + } + + xmlFree(lines.array); + xmlHashFree(uniq, NULL); + } + + return errorno; +} + diff --git a/src/xml_escape.c b/src/xml_escape.c new file mode 100644 index 0000000..d13b9d7 --- /dev/null +++ b/src/xml_escape.c @@ -0,0 +1,343 @@ +/* $Id: xml_escape.c,v 1.11 2004/11/21 23:40:40 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include <libxml/xmlmemory.h> +#include <libxml/debugXML.h> +#include <libxml/xmlIO.h> +#include <libxml/parserInternals.h> + +#include "xmlstar.h" +#include "escape.h" + +#define INSZ 4*1024 + +/* + * TODO: 1. stdin input + * 2. exit values on errors + */ + +/** + * Print small help for command line options + */ +void +escUsage(int argc, char **argv, int escape, exit_status status) +{ + extern void fprint_escape_usage(FILE* o, const char* argv0); + extern void fprint_unescape_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + if (escape) fprint_escape_usage(o, argv[0]); + else fprint_unescape_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +struct xmlPredefinedChar { + const char *name; + char value; + int name_len; +}; + +static struct xmlPredefinedChar xmlPredefinedCharValues[] = { + { "lt", '<', 2 }, + { "gt", '>', 2 }, + { "apos", '\'', 4 }, + { "quot", '\"', 4 }, + { "amp", '&', 3 }, + { NULL, '\0', 0 } +}; + +/* + * Macro used to grow the current buffer. + */ +#define grow_BufferReentrant() { \ + buffer_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + fprintf(stderr, "growing buffer error"); \ + abort(); \ + } \ +} + +/** + * xml_C11NNormalizeString: + * @input: the input string + * @mode: the normalization mode (attribute, comment, PI or text) + * + * Converts a string to a canonical (normalized) format. The code is stolen + * from xmlEncodeEntitiesReentrant(). Added normalization of \x09, \x0a, \x0A + * and the @mode parameter + * + * Returns a normalized string (caller is responsible for calling xmlFree()) + * or NULL if an error occurs + */ +xmlChar * +xml_C11NNormalizeString(const xmlChar * input, + xml_C14NNormalizationMode mode) +{ + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + + if (input == NULL) + return (NULL); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + fprintf(stderr, "allocating buffer error"); + abort(); + } + out = buffer; + + while (*cur != '\0') { + if ((out - buffer) > (buffer_size - 10)) { + int indx = out - buffer; + + grow_BufferReentrant(); + out = &buffer[indx]; + } + + if ((*cur == '<') && ((mode == XML_C14N_NORMALIZE_ATTR) || + (mode == XML_C14N_NORMALIZE_TEXT))) { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '>') && (mode == XML_C14N_NORMALIZE_TEXT)) { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '&') && ((mode == XML_C14N_NORMALIZE_ATTR) || + (mode == XML_C14N_NORMALIZE_TEXT))) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if ((*cur == '"') && (mode == XML_C14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '\x09') && (mode == XML_C14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = '9'; + *out++ = ';'; + } else if ((*cur == '\x0A') && (mode == XML_C14N_NORMALIZE_ATTR)) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = 'A'; + *out++ = ';'; + } else if ((*cur == '\x0D') && ((mode == XML_C14N_NORMALIZE_ATTR) || + (mode == XML_C14N_NORMALIZE_TEXT) || + (mode == XML_C14N_NORMALIZE_COMMENT) || + (mode == XML_C14N_NORMALIZE_PI))) { + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + *out++ = 'D'; + *out++ = ';'; + } else { + /* + * Works because on UTF-8, all extended sequences cannot + * result in bytes in the ASCII range. + */ + *out++ = *cur; + } + cur++; + } + *out++ = 0; + return (buffer); +} + +/* TODO: CHECK THIS PROCEDURE IT'S PROB FULL OF BUGS */ +char * +xml_unescape(char* str) +{ + char *p = str, *p2 = NULL; + char *ret = NULL; + + ret = (char*) xmlCharStrdup(str); + p2 = ret; + + while(*p) + { + if (*p == '&') + { + struct xmlPredefinedChar *pair = xmlPredefinedCharValues; + + p++; + if (*p == '\0') break; + + + if (*p == '#') + { + int num; + p++; + if (*p == '\0') break; + num = atoi(p); + + while((*p >= '0') && (*p <= '9')) p++; + + if (*p == ';') + { + *p2 = (char) num; + p2++; + p++; + } + continue; + } + else + { + while(pair->name) + { + if (!strncmp(p, pair->name, pair->name_len)) + { + if (*(p+pair->name_len) == ';') + { + *p2 = pair->value; + p2++; + p += (pair->name_len + 1); + break; + } + } + pair++; + } + continue; + } + } + + *p2 = *p; + p2++; + p++; + } + + *p2 = '\0'; + + return ret; +} + +/** + * This is the main function for 'escape/unescape' options + */ +int +escMain(int argc, char **argv, int escape) +{ + int ret = 0; + int readStdIn = 0; + + char* inp = NULL; + xmlChar* outBuf = NULL; + + if (argc < 2) escUsage(argc, argv, escape, EXIT_BAD_ARGS); + + inp = argv[2]; + + if (argc > 2) + { + if (!strcmp(argv[2], "--help") || !strcmp(argv[2], "-h") || + !strcmp(argv[2], "-?") || !strcmp(argv[2], "-Z")) + escUsage(argc, argv, escape, EXIT_SUCCESS); + if (!strcmp(argv[2], "-")) readStdIn = 1; + } + else + { + readStdIn = 1; + } + + if (readStdIn) + { + static char line[INSZ]; + + while (!feof(stdin)) + { + if (fgets(line, INSZ - 1, stdin)) + { + if (escape) + { + outBuf = xmlEncodeEntitiesReentrant(NULL, (xmlChar*) line); + if (outBuf) + { + fprintf(stdout, "%s", outBuf); + xmlFree(outBuf); + } + } + else + { + outBuf = (xmlChar*) xml_unescape(line); + if (outBuf) + { + fprintf(stdout, "%s", outBuf); + free(outBuf); + } + } + } + } + + return ret; + } + + if (escape) + { + outBuf = xmlEncodeEntitiesReentrant(NULL, (xmlChar*) inp); + if (outBuf) + { + fprintf(stdout, "%s\n", outBuf); + xmlFree(outBuf); + } + } + else + { + outBuf = (xmlChar*) xml_unescape(inp); + if (outBuf) + { + fprintf(stdout, "%s\n", outBuf); + free(outBuf); + } + } + + return ret; +} diff --git a/src/xml_format.c b/src/xml_format.c new file mode 100644 index 0000000..4d6fe31 --- /dev/null +++ b/src/xml_format.c @@ -0,0 +1,392 @@ +/* $Id: xml_format.c,v 1.25 2005/01/07 02:33:40 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include <libxml/xmlmemory.h> +#include <libxml/debugXML.h> +#include <libxml/xmlIO.h> +#include <libxml/HTMLtree.h> +#include <libxml/xinclude.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/xpointer.h> +#include <libxml/parserInternals.h> +#include <libxml/uri.h> + +#include "xmlstar.h" + +/* + * TODO: 1. Attribute formatting options (as every attribute on a new line) + * 2. exit values on errors + */ + +typedef struct _foOptions { + int indent; /* indent output */ + int indent_tab; /* indent output with tab */ + int indent_spaces; /* num spaces for indentation */ + int omit_decl; /* omit xml declaration */ + int recovery; /* try to recover what is parsable */ + int dropdtd; /* remove the DOCTYPE of the input docs */ + int options; /* global parsing flags */ +#ifdef LIBXML_HTML_ENABLED + int html; /* inputs are in HTML format */ +#endif + int quiet; /* quiet mode */ +} foOptions; + +typedef foOptions *foOptionsPtr; + +const char *encoding = NULL; +static char *spaces = NULL; + +/** + * Print small help for command line options + */ +void +foUsage(int argc, char **argv, exit_status status) +{ + extern void fprint_format_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_format_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * Initialize global command line options + */ +void +foInitOptions(foOptionsPtr ops) +{ + ops->indent = 1; + ops->indent_tab = 0; + ops->indent_spaces = 2; + ops->omit_decl = 0; + ops->recovery = 0; + ops->dropdtd = 0; + ops->options = XML_PARSE_NONET; +#ifdef LIBXML_HTML_ENABLED + ops->html = 0; +#endif + ops->quiet = 0; +} + +/** + * Initialize LibXML + */ +void +foInitLibXml(foOptionsPtr ops) +{ + /* + * Initialize library memory + */ + xmlInitMemory(); + + LIBXML_TEST_VERSION + + /* + * Store line numbers in the document tree + */ + xmlLineNumbersDefault(1); + + xmlSubstituteEntitiesDefault(1); + xmlKeepBlanksDefault(0); + xmlPedanticParserDefault(0); + + xmlGetWarningsDefaultValue = 1; + xmlDoValidityCheckingDefaultValue = 0; + xmlLoadExtDtdDefaultValue = 0; + + xmlTreeIndentString = NULL; + if (ops->indent) + { + xmlIndentTreeOutput = 1; + if (ops->indent_tab) + { + xmlTreeIndentString = "\t"; + } + else if (ops->indent_spaces > 0) + { + spaces = xmlMalloc(ops->indent_spaces + 1); + xmlTreeIndentString = spaces; + memset(spaces, ' ', ops->indent_spaces); + spaces[ops->indent_spaces] = '\0'; + } + } + else + xmlIndentTreeOutput = 0; +} + +/** + * Parse global command line options + */ +int +foParseOptions(foOptionsPtr ops, int argc, char **argv) +{ + int i; + + i = 2; + while(i < argc) + { + if (!strcmp(argv[i], "--noindent") || !strcmp(argv[i], "-n")) + { + ops->indent = 0; + i++; + } + else if (!strcmp(argv[i], "--encode") || !strcmp(argv[i], "-e")) + { + i++; + encoding = argv[i]; + i++; + } + else if (!strcmp(argv[i], "--indent-tab") || !strcmp(argv[i], "-t")) + { + ops->indent_tab = 1; + i++; + } + else if (!strcmp(argv[i], "--omit-decl") || !strcmp(argv[i], "-o")) + { + ops->omit_decl = 1; + i++; + } + else if (!strcmp(argv[i], "--dropdtd") || !strcmp(argv[i], "-D")) + { + ops->dropdtd = 1; + i++; + } + else if (!strcmp(argv[i], "--recover") || !strcmp(argv[i], "-R")) + { + ops->recovery = 1; + ops->options |= XML_PARSE_RECOVER; + i++; + } + else if (!strcmp(argv[i], "--nocdata") || !strcmp(argv[i], "-C")) + { + ops->options |= XML_PARSE_NOCDATA; + i++; + } + else if (!strcmp(argv[i], "--nsclean") || !strcmp(argv[i], "-N")) + { + ops->options |= XML_PARSE_NSCLEAN; + i++; + } + else if (!strcmp(argv[i], "--indent-spaces") || !strcmp(argv[i], "-s")) + { + int value; + i++; + if (i >= argc) foUsage(argc, argv, EXIT_BAD_ARGS); + if (sscanf(argv[i], "%d", &value) == 1) + { + if (value > 0) ops->indent_spaces = value; + } + else + { + foUsage(argc, argv, EXIT_BAD_ARGS); + } + ops->indent_tab = 0; + i++; + } + else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-Q")) + { + ops->quiet = 1; + i++; + } +#ifdef LIBXML_HTML_ENABLED + else if (!strcmp(argv[i], "--html") || !strcmp(argv[i], "-H")) + { + ops->html = 1; + i++; + } +#endif + else if (!strcmp(argv[i], "--net")) + { + ops->options &= ~XML_PARSE_NONET; + i++; + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + foUsage(argc, argv, EXIT_SUCCESS); + } + else if (!strcmp(argv[i], "-")) + { + i++; + break; + } + else if (argv[i][0] == '-') + { + foUsage(argc, argv, EXIT_BAD_ARGS); + } + else + { + i++; + break; + } + } + + return i-1; +} + +void my_error_func(void* ctx, const char * msg, ...) { + /* do nothing */ +} + +void my_structured_error_func(void * userData, xmlErrorPtr error) { + /* do nothing */ +} + +/** + * 'process' xml document(s) + */ +int +foProcess(foOptionsPtr ops, int start, int argc, char **argv) +{ + int ret = 0; + xmlDocPtr doc = NULL; + char *fileName = "-"; + + if ((start > 1) && (start < argc) && (argv[start][0] != '-') && + strcmp(argv[start-1], "--indent-spaces") && + strcmp(argv[start-1], "-s")) + { + fileName = argv[start]; + } +/* + if (ops->recovery) + { + doc = xmlRecoverFile(fileName); + } + else +*/ + if (ops->quiet) { + xmlSetGenericErrorFunc(NULL, my_error_func); + xmlSetStructuredErrorFunc(NULL, my_structured_error_func); + } + +#ifdef LIBXML_HTML_ENABLED + if (ops->html) + { + doc = htmlReadFile(fileName, NULL, ops->options); + } + else +#endif + doc = xmlReadFile(fileName, NULL, ops->options); + + if (doc == NULL) + { + /*fprintf(stderr, "%s:: error: XML parse error\n", fileName);*/ + return 2; + } + + /* + * Remove DOCTYPE nodes + */ + if (ops->dropdtd) { + xmlDtdPtr dtd; + + dtd = xmlGetIntSubset(doc); + if (dtd != NULL) { + xmlUnlinkNode((xmlNodePtr)dtd); + xmlFreeDtd(dtd); + } + } + + if (!ops->omit_decl) + { + if (encoding != NULL) + { + xmlSaveFormatFileEnc("-", doc, encoding, 1); + } + else + { + xmlSaveFormatFile("-", doc, 1); + } + } + else + { + int format = 1; + xmlOutputBufferPtr buf = NULL; + xmlCharEncodingHandlerPtr handler = NULL; + buf = xmlOutputBufferCreateFile(stdout, handler); + + if (doc->children != NULL) + { + xmlNodePtr child = doc->children; + while (child != NULL) + { + xmlNodeDumpOutput(buf, doc, child, 0, format, encoding); + xmlOutputBufferWriteString(buf, "\n"); + child = child->next; + } + } + ret = xmlOutputBufferClose(buf); + } + + xmlFreeDoc(doc); + return ret; +} + +/** + * Cleanup memory + */ +void +foCleanup() +{ + free(spaces); + spaces = NULL; + xmlCleanupParser(); +#if 0 + xmlMemoryDump(); +#endif +} + +/** + * This is the main function for 'format' option + */ +int +foMain(int argc, char **argv) +{ + int ret = 0; + int start; + static foOptions ops; + + if (argc <=1) foUsage(argc, argv, EXIT_BAD_ARGS); + foInitOptions(&ops); + start = foParseOptions(&ops, argc, argv); + if (argc-start > 1) foUsage(argc, argv, EXIT_BAD_ARGS); + foInitLibXml(&ops); + ret = foProcess(&ops, start, argc, argv); + foCleanup(); + + return ret; +} diff --git a/src/xml_ls.c b/src/xml_ls.c new file mode 100644 index 0000000..c7341b9 --- /dev/null +++ b/src/xml_ls.c @@ -0,0 +1,204 @@ +/* $Id: xml_ls.c,v 1.17 2005/03/19 01:18:02 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <dirent.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include <libxml/xmlmemory.h> +#include <libxml/c14n.h> + +#include "xmlstar.h" +#include "escape.h" + +#if !HAVE_LSTAT +# if HAVE_STAT +# define lstat stat +# else +/* TODO: #ifdef out code that uses stat instead */ +# error "lstat() or stat() required" +# endif +#endif + +#ifndef S_ISLNK +# define S_ISLNK(m) 0 +#endif + +#ifndef S_ISSOCK +# define S_ISSOCK(m) 0 +#endif + +/** + * Print small help for command line options + */ +void +lsUsage(int argc, char **argv, exit_status status) +{ + extern void fprint_ls_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_ls_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + + +const char * +get_file_type(mode_t mode) +{ + if (S_ISREG(mode)) return "f"; /* regular file */ + else if (S_ISDIR(mode)) return "d"; /* directory */ + else if (S_ISCHR(mode)) return "c"; /* character device */ + else if (S_ISBLK(mode)) return "b"; /* block device */ + else if (S_ISLNK(mode)) return "l"; /* symlink */ + else if (S_ISFIFO(mode)) return "p"; /* fifo */ + else if (S_ISSOCK(mode)) return "s"; /* socket */ + else return "u"; /* unknown */ +} + +const char * +get_file_perms(mode_t mode) +{ + int i; + static char perms[10]; + + strcpy(perms, "---------"); + + for(i=0; i < sizeof perms - 1; i+=3) + { + if(mode &(S_IRUSR>>i)) + perms[i+0] = 'r'; + + if(mode &(S_IWUSR>>i)) + perms[i+1] = 'w'; + + if(mode &(S_IXUSR>>i)) + perms[i+2] = 'x'; + } + +#ifdef S_ISUID + if((mode & S_ISUID)) + perms[2] = 's'; +#endif + +#ifdef S_ISGID + if((mode & S_ISGID)) + perms[5] = 's'; +#endif + +#ifdef S_ISVTX + if((mode & S_ISVTX)) + perms[8] = 't'; +#endif + + return(perms); +} + +int +xml_print_dir(const char* dir) +{ + DIR *dirp; + struct dirent *d; + struct stat stats; + int num_files = 0; + + if((dirp = opendir(dir)) == NULL) + return(-1); + + chdir(dir); + + while((d = readdir(dirp)) != NULL) + { + xmlChar *xml_str; + char atime[20]; + char mtime[20]; + int size_len; + + if ((d->d_name == NULL) || !strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + + if(lstat(d->d_name, &stats) != 0) + { + fprintf(stderr, "couldn't stat: %s\n", d->d_name); + } + +#if defined (__MINGW32__) + /* somehow atime is -1 on Windows XP when the atime is in future */ + if (stats.st_atime < 0) stats.st_atime = 0; + /* somehow mtime is -1 on Windows XP when the mtime is in future */ + if (stats.st_mtime < 0) stats.st_mtime = 0; +#endif + + /* format time as per ISO 8601 */ + strftime(atime, sizeof atime, "%Y%m%dT%H%M%SZ", gmtime(&stats.st_atime)); + strftime(mtime, sizeof mtime, "%Y%m%dT%H%M%SZ", gmtime(&stats.st_mtime)); + + xml_str = xml_C11NNormalizeAttr((const xmlChar *) d->d_name); + printf("<%s p=\"%s\" a=\"%s\" m=\"%s\" s=\"", + get_file_type(stats.st_mode), get_file_perms(stats.st_mode), + atime, mtime); + size_len = printf("%lu", (unsigned long) stats.st_size); + printf("\"%.*s", 16-size_len, " "); + printf(" n=\"%s\"/>\n", xml_str); + num_files++; + xmlFree(xml_str); + + } /* end of for loop */ + + closedir(dirp); + return num_files; +} + +int +lsMain(int argc, char** argv) +{ + const char *dir = "."; + int files; + + if (argc == 3) { + if (strcmp(argv[2], "--help") == 0) + lsUsage(argc, argv, EXIT_SUCCESS); + else + dir = argv[2]; + } else if (argc > 3) { + lsUsage(argc, argv, EXIT_BAD_ARGS); + } + + printf("<dir>\n"); + files = xml_print_dir(dir); + printf("</dir>\n"); + return (files >= 0)? EXIT_SUCCESS : EXIT_FAILURE; +} + diff --git a/src/xml_pyx.c b/src/xml_pyx.c new file mode 100644 index 0000000..e97dbd2 --- /dev/null +++ b/src/xml_pyx.c @@ -0,0 +1,293 @@ +/* $Id: xml_pyx.c,v 1.9 2005/03/12 03:24:23 mgrouch Exp $ */ + +/** + * Based on xmln from pyxie project + * + * The PYX format is a line-oriented representation of + * XML documents that is derived from the SGML ESIS format. + * (see ESIS - ISO 8879 Element Structure Information Set spec, + * ISO/IEC JTC1/SC18/WG8 N931 (ESIS)) + * + * A non-validating, ESIS generating tool + * ESIS Generation by Sean Mc Grath http://www.digitome.com/sean.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <libxml/parser.h> +#include <libxml/parserInternals.h> + +#include "xmlstar.h" + +/** + * Output newline and tab characters as escapes + * Required both for attribute values and character data (#PCDATA) + */ +static void +SanitizeData(const xmlChar *s, int len) +{ + while (len--) + { + switch (*s) + { + case 10: + printf("\\n"); + break; + case 13: + break; + case 9: + printf ("\\t"); + break; + case '\\': + printf ("\\\\"); + break; + default: + putchar (*s); + } + s++; + } +} + +static void +print_qname(const xmlChar *prefix, const xmlChar *localname) +{ + if (prefix) + printf("%s:", prefix); + printf("%s", localname); +} + +int +CompareAttributes(const void *a1,const void *a2) +{ + typedef xmlChar const *const xmlCStr; + xmlCStr *attr1 = a1, *attr2 = a2; + return xmlStrcmp(*attr1, *attr2); +} + +void +pyxStartElement (void * ctx, + const xmlChar * localname, + const xmlChar * prefix, + const xmlChar * URI, + int nb_namespaces, + const xmlChar ** namespaces, + int nb_attributes, + int nb_defaulted, + const xmlChar ** attributes) +{ + int i; + fprintf(stdout,"("); + print_qname(prefix, localname); + fprintf(stdout, "\n"); + + + if (nb_attributes > 1) + /* Sort the pairs based on the name part of the pair */ + qsort ((void *)attributes, + nb_attributes, + sizeof(xmlChar *)*5, + CompareAttributes); + + for (i = 0; i < nb_namespaces; i++) { + int aidx = i * 2; + const xmlChar + *prefix = namespaces[aidx], + *uri = namespaces[aidx+1]; + /* namespace definitions take the form xmlns:prefix=uri*/ + putchar('A'); + if (xmlStrlen(prefix) > 0) + print_qname(BAD_CAST "xmlns", prefix); + else + fputs("xmlns", stdout); + putchar(' '); + SanitizeData(uri, xmlStrlen(uri)); + putchar('\n'); + } + + for (i = 0; i < nb_attributes; i++) { + int aidx = i * 5; + const xmlChar *localname = attributes[aidx], + *prefix = attributes[aidx+1], + /* *nsURI = attributes[aidx+2], */ + *valueBegin = attributes[aidx+3], + *valueEnd = attributes[aidx+4]; + int valueLen = valueEnd - valueBegin; + + /* Attribute Name */ + putchar('A'); + print_qname(prefix, localname); + putchar(' '); + /* value - can contain literal "\n" so escape */ + SanitizeData(valueBegin, valueLen); + putchar('\n'); + } +} + +void +pyxEndElement(void *userData, const xmlChar *localname, const xmlChar *prefix, + const xmlChar *URI) +{ + fprintf(stdout,")"); + print_qname(prefix, localname); + putchar('\n'); +} + +void +pyxCharacterData(void *userData, const xmlChar *s, int len) +{ + fprintf(stdout, "-"); + SanitizeData(s, len); + putchar('\n'); +} + +void +pyxProcessingInstruction(void *userData, + const xmlChar *target, + const xmlChar *data) +{ + fprintf(stdout,"?%s ",target); + SanitizeData(data, xmlStrlen(data)); + fprintf(stdout,"\n"); +} + +void +pyxUnparsedEntityDeclHandler(void *userData, + const xmlChar *entityName, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName) +{ + fprintf(stdout, "U%s %s %s%s%s\n", + (char *)entityName, (char *)notationName, (char *)systemId, + (publicId == NULL? "": " "), + (publicId == NULL? "": (char *) publicId)); +} + +void +pyxNotationDeclHandler(void *userData, + const xmlChar *notationName, + const xmlChar *publicId, + const xmlChar *systemId) +{ + fprintf(stdout, "N%s %s%s%s\n", (char*) notationName, (char*) systemId, + (publicId == NULL? "": " "), + (publicId == NULL? "": (const char*) publicId)); +} + +void +pyxExternalEntityReferenceHandler(void* userData, + const xmlChar *name) +{ + const xmlChar *p = name; + fprintf (stdout, "&"); + /* Up to space is the name of the referenced entity */ + while (*p && (*p != ' ')) { + putchar (*p); + p++; + } +} + +static void +pyxExternalSubsetHandler(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + fprintf(stdout, "D %s PUBLIC", name); /* TODO: re-check */ + if (ExternalID == NULL) + fprintf(stdout, " "); + else + fprintf(stdout, " \"%s\"", ExternalID); + if (SystemID == NULL) + fprintf(stdout, "\n"); + else + fprintf(stdout, " \"%s\"\n", SystemID); +} + +static void +pyxCommentHandler(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value) +{ + fprintf(stdout,"C"); + SanitizeData(value, xmlStrlen(value)); + fprintf(stdout,"\n"); +} + +static void +pyxCdataBlockHandler(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value, int len) +{ + fprintf(stdout,"["); + SanitizeData(value, len); + fprintf(stdout,"\n"); +} + +static void +pyxUsage(const char *argv0, exit_status status) +{ + extern void fprint_pyx_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_pyx_usage(o, argv0); + fprintf(o, "%s", more_info); + exit(status); +} + +int +pyx_process_file(const char *filename) +{ + int ret; + xmlParserCtxtPtr ctxt; + + xmlInitParser(); + ctxt = xmlCreateFileParserCtxt(filename); + + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + + /* Establish Event Handlers */ + ctxt->sax->initialized = XML_SAX2_MAGIC; + ctxt->sax->startElementNs = pyxStartElement; + ctxt->sax->endElementNs = pyxEndElement; + ctxt->sax->processingInstruction = pyxProcessingInstruction; + ctxt->sax->characters = pyxCharacterData; + ctxt->sax->notationDecl = pyxNotationDeclHandler; + ctxt->sax->reference = pyxExternalEntityReferenceHandler; + ctxt->sax->unparsedEntityDecl = pyxUnparsedEntityDeclHandler; + ctxt->sax->externalSubset = pyxExternalSubsetHandler; + ctxt->sax->comment = pyxCommentHandler; + ctxt->sax->cdataBlock = pyxCdataBlockHandler; + + ret = xmlParseDocument(ctxt); + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + + return ret; +} + +int +pyxMain(int argc,const char *argv[]) +{ + int status = 0; + + if ((argc > 2) && + ( + (strcmp(argv[2],"-h") == 0) || + (strcmp(argv[2],"-H") == 0) || + (strcmp(argv[2],"-Z") == 0) || + (strcmp(argv[2],"-?") == 0) || + (strcmp(argv[2],"--help") == 0) + )) + { + pyxUsage(argv[0], EXIT_SUCCESS); + } + if (argc == 2) { + status = pyx_process_file("-"); + } + else { + argv++; + argc--; + for (++argv; argc>1; argc--,argv++) { + int ret = pyx_process_file(*argv); + if (ret != 0) status = ret; + } + } + return status; +} diff --git a/src/xml_select.c b/src/xml_select.c new file mode 100644 index 0000000..890582b --- /dev/null +++ b/src/xml_select.c @@ -0,0 +1,1017 @@ +/* $Id: xml_select.c,v 1.67 2005/01/07 02:02:13 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <ctype.h> + +#include <libxml/tree.h> +#include <libxslt/templates.h> + +#include "xmlstar.h" +#include "trans.h" + +/* max length of xmlstarlet supplied (ie not from command line) namespaces + * currently xalanredirect is longest, at 13 characters*/ +#define MAX_NS_PREFIX_LEN 20 + +typedef struct { + const xmlChar *href, *prefix; +} NsEntry; + +static const NsEntry ns_entries[] = { + { BAD_CAST "http://exslt.org/common", BAD_CAST "exslt" }, + { BAD_CAST "http://exslt.org/math", BAD_CAST "math" }, + { BAD_CAST "http://exslt.org/dates-and-times", BAD_CAST "date" }, + { BAD_CAST "http://exslt.org/functions", BAD_CAST "func" }, + { BAD_CAST "http://exslt.org/sets", BAD_CAST "set" }, + { BAD_CAST "http://exslt.org/strings", BAD_CAST "str" }, + { BAD_CAST "http://exslt.org/dynamic", BAD_CAST "dyn" }, + { BAD_CAST "http://icl.com/saxon", BAD_CAST "saxon" }, + { BAD_CAST "org.apache.xalan.xslt.extensions.Redirect", + BAD_CAST "xalanredirect"}, /* see MAX_NS_PREFIX_LEN */ + { BAD_CAST "http://www.jclark.com/xt", BAD_CAST "xt" }, + { BAD_CAST "http://xmlsoft.org/XSLT/namespace", BAD_CAST "libxslt" }, + { BAD_CAST "http://xmlsoft.org/XSLT/", BAD_CAST "test" }, +}; + +static const NsEntry* +lookup_ns_entry(const char *prefix, int len) { + int i; + for (i = 0; i < COUNT_OF(ns_entries); i++) { + if (xmlStrncmp(BAD_CAST prefix, ns_entries[i].prefix, len) == 0) + return &ns_entries[i]; + } + return NULL; +} + + +typedef struct _selOptions { + int quiet; /* No output */ + int printXSLT; /* Display prepared XSLT */ + int printRoot; /* Print root element in output (if XML) */ + int outText; /* Output is text */ + int indent; /* Indent output */ + int noblanks; /* Remove insignificant spaces from XML tree */ + int no_omit_decl; /* Print XML declaration line <?xml version="1.0"?> */ + int nonet; /* refuse to fetch DTDs or entities over network */ + const xmlChar *encoding; /* the "encoding" attribute on the stylesheet's <xsl:output/> */ +} selOptions; + +typedef selOptions *selOptionsPtr; + +typedef enum { TARG_NONE = 0, TARG_SORT_OP, TARG_XPATH, + TARG_ATTR_STRING, TARG_STRING, TARG_VAR, + /* template args below don't consume any command line args */ + TARG_NEWLINE, TARG_NO_CMDLINE = TARG_NEWLINE, + TARG_INP_NAME, TARG_STR_NAME_SELECT +} template_argument_type; +typedef struct { + const xmlChar *attrname; + template_argument_type type; +} template_option_argument; + +#define TEMPLATE_OPT_MAX_ARGS 2 + +typedef struct { + char shortopt; + const char *longopt; + const xmlChar *xslname; + template_option_argument arguments[TEMPLATE_OPT_MAX_ARGS]; + int nest; +} template_option; + +static const template_option + OPT_TEMPLATE = { 't', "template" }, + OPT_COPY_OF = { 'c', "copy-of", BAD_CAST "copy-of", {{BAD_CAST "select", TARG_XPATH}}, 0 }, + OPT_VALUE_OF = { 'v', "value-of", BAD_CAST "with-param", {{BAD_CAST "name", TARG_STR_NAME_SELECT}, + {BAD_CAST "select", TARG_XPATH}}, -1 }, + OPT_OUTPUT = { 'o', "output", BAD_CAST "text", {{NULL, TARG_STRING}}, 0 }, + OPT_NL = { 'n', "nl", BAD_CAST "value-of", {{NULL, TARG_NEWLINE}}, 0 }, + OPT_INP_NAME = { 'f', "inp-name", BAD_CAST "copy-of", {{NULL, TARG_INP_NAME}}, 0 }, + OPT_MATCH = { 'm', "match", BAD_CAST "for-each", {{BAD_CAST "select", TARG_XPATH}}, 1 }, + OPT_IF = { 'i', "if", BAD_CAST"when", {{BAD_CAST "test", TARG_XPATH}}, 1 }, + OPT_ELIF = { 0, "elif", BAD_CAST "when", {{BAD_CAST "test", TARG_XPATH}}, 1 }, + OPT_ELSE = { 0, "else", BAD_CAST "otherwise", {{NULL}}, 1 }, + OPT_ELEM = { 'e', "elem", BAD_CAST "element", {{BAD_CAST "name", TARG_ATTR_STRING}}, 1 }, + OPT_ATTR = { 'a', "attr", BAD_CAST "attribute", {{BAD_CAST "name", TARG_ATTR_STRING}}, 1 }, + OPT_BREAK = { 'b', "break", NULL, {{NULL}}, -1 }, + OPT_SORT = { 's', "sort", BAD_CAST "sort", {{NULL, TARG_SORT_OP}, {BAD_CAST "select", TARG_XPATH}}, 0 }, + OPT_VAR = { 0, "var", BAD_CAST "variable", {{BAD_CAST "name", TARG_VAR}}, 1}, + + *TEMPLATE_OPTIONS[] = { + &OPT_TEMPLATE, + &OPT_COPY_OF, + &OPT_VALUE_OF, + &OPT_OUTPUT, + &OPT_NL, + &OPT_INP_NAME, + &OPT_MATCH, + &OPT_IF, + &OPT_ELIF, + &OPT_ELSE, + &OPT_ELEM, + &OPT_ATTR, + &OPT_BREAK, + &OPT_SORT, + &OPT_VAR + }; + +void +caseSortFunction(xsltTransformContextPtr ctxt, xmlNodePtr *sorts, + int nbsorts); + +/** + * Print small help for command line options + */ +void +selUsage(const char *argv0, exit_status status) +{ + extern void fprint_select_usage(FILE* out, const char* argv0); + extern const char more_info[]; + extern const char libxslt_more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_select_usage(o, argv0); + fprintf(o, "%s", more_info); + fprintf(o, "%s", libxslt_more_info); + exit(status); +} + +/** + * Initialize global command line options + */ +void +selInitOptions(selOptionsPtr ops) +{ + ops->quiet = 0; + ops->printXSLT = 0; + ops->printRoot = 0; + ops->outText = 0; + ops->indent = 0; + ops->noblanks = 0; + ops->no_omit_decl = 0; + ops->nonet = 1; + ops->encoding = NULL; +} + +/** + * Parse global command line options + */ +int +selParseOptions(selOptionsPtr ops, int argc, char **argv) +{ + int i; + + i = 2; + while((i < argc) && (strcmp(argv[i], "-t")) && strcmp(argv[i], "--template")) + { + if (!strcmp(argv[i], "-C")) + { + ops->printXSLT = 1; + } + else if (!strcmp(argv[i], "-Q") || !strcmp(argv[i], "--quiet")) + { + ops->quiet = 1; + } + else if (!strcmp(argv[i], "-B") || !strcmp(argv[i], "--noblanks")) + { + ops->noblanks = 1; + } + else if (!strcmp(argv[i], "-T") || !strcmp(argv[i], "--text")) + { + ops->outText = 1; + } + else if (!strcmp(argv[i], "-R") || !strcmp(argv[i], "--root")) + { + ops->printRoot = 1; + } + else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "--indent")) + { + ops->indent = 1; + } + else if (!strcmp(argv[i], "-D") || !strcmp(argv[i], "--xml-decl")) + { + ops->no_omit_decl = 1; + } + else if (!strcmp(argv[i], "-E") || !strcmp(argv[i], "--encode")) + { + if ((i+1) < argc) + { + if (argv[i + 1][0] == '-') + { + fprintf(stderr, "-E option requires argument <encoding> ex: (utf-8, unicode...)\n"); + exit(EXIT_BAD_ARGS); + } + else + { + ops->encoding = BAD_CAST argv[i + 1]; + } + } + else + { + fprintf(stderr, "-E option requires argument <encoding> ex: (utf-8, unicode...)\n"); + exit(EXIT_BAD_ARGS); + } + + } + else if (!strcmp(argv[i], "--net")) + { + ops->nonet = 0; + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h") || + !strcmp(argv[i], "-?") || !strcmp(argv[i], "-Z")) + { + selUsage(argv[0], EXIT_SUCCESS); + } + i++; + } + + return i; +} + + +/** + * Search for namespace references in @xpath and add them to @root. Note that we + * might pickup things that aren't actually namespace references because we + * don't have a full XPath parser. That's okay, an extra namespace definition + * won't hurt anyone. + */ +static void +checkNsRefs(xmlNodePtr root, const char *xpath) +{ + const char *colon; + for (colon = xpath; colon; colon++) { + int ns_idx = -1; + + colon = strchr(colon, ':'); + if (!colon) break; + + for (;; ns_idx--) { + if (&colon[ns_idx] < xpath + ||!isalnum(colon[ns_idx])) { + const NsEntry *ns; + ns_idx++; + ns = lookup_ns_entry(&colon[ns_idx], -ns_idx); + if (ns) xmlNewNs(root, ns->href, ns->prefix); + break; + } + if (-ns_idx >= MAX_NS_PREFIX_LEN) break; + } + } +} + +/** + * Prepare XSLT template based on command line options + * Assumes start points to -t option + */ +int +selGenTemplate(xmlNodePtr root, xmlNodePtr template_node, + xmlNsPtr xslns, selOptionsPtr ops, int* use_inputfile, int* use_value_of, + int* lastTempl, int start, int argc, char **argv) +{ + int i; + int templateEmpty; + int nextTempl; + const template_option *targ = NULL; + xmlNodePtr node = template_node; + + if (strcmp(argv[start], "-t") != 0 && + strcmp(argv[start], "--template") != 0) + { + fprintf(stderr, "not at the beginning of template\n"); + abort(); + } + + *lastTempl = 0; + templateEmpty = 1; + nextTempl = 0; + i = start + 1; + + while(i < argc) + { + xmlNodePtr newnode = NULL; + const template_option *newtarg = NULL; + int j; + int nesting; + + if (argv[i][0] == '-' && argv[i][1] != '\0') + { + for (j = 0; j < sizeof(TEMPLATE_OPTIONS)/sizeof(*TEMPLATE_OPTIONS); j++) + { + newtarg = TEMPLATE_OPTIONS[j]; + if (argv[i][1] == '-' && strcmp(newtarg->longopt, &argv[i][2]) == 0) + goto found_option; /* long option */ + else if(newtarg->shortopt == argv[i][1]) + goto found_option; /* short option */ + } + fprintf(stderr, "unrecognized option: %s\n", argv[i]); + exit(EXIT_BAD_ARGS); + } + else + { + break; + } + + found_option: + if (newtarg == &OPT_SORT && (targ != &OPT_MATCH && targ != &OPT_SORT)) + { + fprintf(stderr, "sort(s) must follow match\n"); + exit(EXIT_BAD_ARGS); + } + else if (newtarg == &OPT_TEMPLATE) + { + nextTempl = 1; + i--; + break; + } + else if (newtarg == &OPT_IF) + { + node = xmlNewChild(node, xslns, BAD_CAST "choose", NULL); + node->_private = (void*) &OPT_IF; + } + else if (newtarg == &OPT_ELIF || newtarg == &OPT_ELSE) + { + node = node->parent; + if (node->_private != &OPT_IF) { + fprintf(stderr, "else without if\n"); + exit(EXIT_BAD_ARGS); + } + } + else if (newtarg == &OPT_VALUE_OF) + { + node = xmlNewChild(node, xslns, BAD_CAST "call-template", NULL); + xmlNewProp(node, BAD_CAST "name", BAD_CAST "value-of-template"); + node->_private = (void*) &OPT_VALUE_OF; + *use_value_of = 1; + /* value-of-template uses exslt:node-set */ + checkNsRefs(root, "exslt:node-set"); + } + + i++; + templateEmpty = 0; + nesting = newtarg->nest; + + if (newtarg->xslname) + newnode = xmlNewChild(node, xslns, newtarg->xslname, NULL); + + for (j = 0; j < TEMPLATE_OPT_MAX_ARGS && newtarg->arguments[j].type; j++) + { + if (i >= argc && newtarg->arguments[j].type < TARG_NO_CMDLINE) + selUsage(argv[0], EXIT_BAD_ARGS); + switch (newtarg->arguments[j].type) + { + case TARG_VAR: { + char *equals = strchr(argv[i], '='); + if (equals) { + *equals = '\0'; + xmlNewProp(newnode, BAD_CAST "select", BAD_CAST (&equals[1])); + nesting = 0; + } + xmlNewProp(newnode, newtarg->arguments[j].attrname, BAD_CAST argv[i]); + break; + } + + case TARG_XPATH: + checkNsRefs(root, argv[i]); + case TARG_ATTR_STRING: + xmlNewProp(newnode, newtarg->arguments[j].attrname, BAD_CAST argv[i]); + break; + + case TARG_STRING: + xmlNodeAddContent(newnode, BAD_CAST argv[i]); + break; + + case TARG_NEWLINE: + xmlNewProp(newnode, BAD_CAST "select", BAD_CAST "'\n'"); + break; + case TARG_STR_NAME_SELECT: + xmlNewProp(newnode, BAD_CAST "name", BAD_CAST "select"); + break; + + case TARG_INP_NAME: + *use_inputfile = 1; + xmlNewProp(newnode, BAD_CAST "select", BAD_CAST "$inputFile"); + break; + + case TARG_SORT_OP: { + char order, data_type, case_order; + int nread; + nread = sscanf(argv[i], "%c:%c:%c", &order, &data_type, &case_order); + if (nread != 3) selUsage(argv[0], EXIT_BAD_ARGS); /* TODO: allow missing letters */ + + if (order == 'A' || order == 'D') + xmlNewProp(newnode, BAD_CAST "order", + BAD_CAST (order == 'A'? "ascending" : "descending")); + if (data_type == 'N' || data_type == 'T') + xmlNewProp(newnode, BAD_CAST "data-type", + BAD_CAST (data_type == 'N'? "number" : "text")); + if (case_order == 'U' || case_order == 'L') + xmlNewProp(newnode, BAD_CAST "case-order", + BAD_CAST (case_order == 'U'? "upper-first" : "lower-first")); + } break; + + default: + assert(0); + } + if (newtarg->arguments[j].type < TARG_NO_CMDLINE) i++; + } + + switch (nesting) { + case -1: + do { node = node->parent; } while(node->_private); + break; + case 0: + break; + case 1: + node = newnode; + break; + default: + assert(0); + } + targ = newtarg; + } + + if (templateEmpty) + { + fprintf(stderr, "error in arguments:"); + fprintf(stderr, " -t or --template option must be followed by"); + fprintf(stderr, " --match or other options\n"); + exit(EXIT_BAD_ARGS); + } + + if (!nextTempl) + { + if (i >= argc || argv[i][0] != '-' || strcmp(argv[i], "-") == 0) + { + *lastTempl = 1; + return i; /* return index of next input filename */ + } + } + + /* return index to beginning of the next template */ + return ++i; +} + +/** + * Prepare XSLT stylesheet based on command line options + */ +int +selPrepareXslt(xmlDocPtr style, selOptionsPtr ops, xmlChar *ns_arr[], + int start, int argc, char **argv) +{ + int i, t, ns, use_inputfile = 0, use_value_of = 0; + xmlNodePtr root, root_template = NULL; + xmlNsPtr xslns; + xmlBufferPtr attr_buf; + + root = xmlNewDocRawNode(style, NULL, BAD_CAST "stylesheet", NULL); + xmlDocSetRootElement(style, root); + xmlNewProp(root, BAD_CAST "version", BAD_CAST "1.0"); + xslns = xmlNewNs(root, XSLT_NAMESPACE, BAD_CAST "xsl"); + xmlSetNs(root, xslns); + + ns = 0; + while(ns_arr[ns]) + { + xmlNewNs(root, ns_arr[ns+1], xmlStrlen(ns_arr[ns])?ns_arr[ns] : NULL); + ns += 2; + } + cleanupNSArr(ns_arr); + + { + xmlNodePtr output; + output = xmlNewChild(root, xslns, BAD_CAST "output", NULL); + xmlNewProp(output, BAD_CAST "omit-xml-declaration", + BAD_CAST ((ops->no_omit_decl)?"no":"yes")); + xmlNewProp(output, BAD_CAST "indent", + BAD_CAST ((ops->indent)?"yes":"no")); + if (ops->encoding) xmlNewProp(output, BAD_CAST "encoding", ops->encoding); + if (ops->outText) xmlNewProp(output, BAD_CAST "method", BAD_CAST "text"); + } + + for (i = start, t = 0; i < argc; i++) + if(!strcmp(argv[i], "-t") || !strcmp(argv[i], "--template")) + t++; + + /* + * At least one -t option must be found + */ + if (t == 0) + { + fprintf(stderr, "error in arguments:"); + fprintf(stderr, " no -t or --template options found\n"); + exit(EXIT_BAD_ARGS); + } + + if (t > 1) + root_template = xmlNewChild(root, xslns, BAD_CAST "template", NULL); + + t = 0; + i = start; + while(i < argc) + { + if(!strcmp(argv[i], "-t") || !strcmp(argv[i], "--template")) + { + xmlNodePtr call_template, template; + int lastTempl = 0; + t++; + template = xmlNewChild(root, xslns, BAD_CAST "template", NULL); + + if (root_template) { + xmlChar num_buf[1+10+1]; /* t+maxnumber+NUL */ + xmlStrPrintf(num_buf, sizeof num_buf, BAD_CAST "t%d", t); + + call_template = xmlNewChild(root_template, xslns, + BAD_CAST "call-template", NULL); + xmlNewProp(call_template, BAD_CAST "name", num_buf); + xmlNewProp(template, BAD_CAST "name", num_buf); + } else { + root_template = template; + } + + i = selGenTemplate(root, template, + xslns, ops, &use_inputfile, &use_value_of, + &lastTempl, i, argc, argv); + if (lastTempl) break; + } + } + + if (!ops->outText && ops->printRoot) { + xmlNodePtr result_root = root_template; + xmlNodeSetName(result_root, BAD_CAST "xsl-select"); + xmlSetNs(result_root, NULL); + xmlUnlinkNode(result_root); + + root_template = xmlNewChild(root, xslns, BAD_CAST "template", NULL); + xmlAddChild(root_template, result_root); + } + + xmlNewProp(root_template, BAD_CAST "match", BAD_CAST "/"); + + attr_buf = xmlBufferCreate(); + for (ns = 0; ns < COUNT_OF(ns_entries); ns++) { + if (xmlSearchNs(NULL, root, ns_entries[ns].prefix)) { + if (xmlBufferLength(attr_buf) != 0) + xmlBufferWriteChar(attr_buf, " "); + xmlBufferCat(attr_buf, ns_entries[ns].prefix); + } + } + if (xmlBufferLength(attr_buf) != 0) + xmlNewProp(root, BAD_CAST "extension-element-prefixes", + xmlBufferContent(attr_buf)); + + xmlBufferFree(attr_buf); + + if (use_inputfile) { + xmlNodePtr param; + param = xmlNewChild(root, xslns, BAD_CAST "param", BAD_CAST "-"); + xmlNewProp(param, BAD_CAST "name", BAD_CAST "inputFile"); + } + + if (use_value_of) { + xmlNodePtr value_of_template, for_each, value_of, param; + /* <xsl:template name='value-of-template' xmlns:xsl=XSLT_NS> */ + value_of_template = xmlNewChild(root, xslns, BAD_CAST "template", NULL); + xmlNewProp(value_of_template, BAD_CAST "name", BAD_CAST "value-of-template"); + /* <xsl:param name='select'/> */ + param = xmlNewChild(value_of_template, xslns, BAD_CAST "param", NULL); + xmlNewProp(param, BAD_CAST "name", BAD_CAST "select"); + /* <xsl:value-of select='$select'/> */ + value_of = xmlNewChild(value_of_template, xslns, BAD_CAST "value-of", NULL); + xmlNewProp(value_of, BAD_CAST "select", BAD_CAST "$select"); + /* <xsl:for-each select='exslt:node-set($select)[position()>1]'> */ + for_each = xmlNewChild(value_of_template, xslns, BAD_CAST "for-each", NULL); + xmlNewProp(for_each, BAD_CAST "select", BAD_CAST "exslt:node-set($select)[position()>1]"); + /* <xsl:value-of select='\" \"'/> */ + value_of = xmlNewChild(for_each, xslns, BAD_CAST "value-of", NULL); + xmlNewProp(value_of, BAD_CAST "select", BAD_CAST "'\n'"); + /* <xsl:value-of select='.'/> */ + value_of = xmlNewChild(for_each, xslns, BAD_CAST "value-of", NULL); + xmlNewProp(value_of, BAD_CAST "select", BAD_CAST "."); + } + + return i; +} + +/** + * copy namespace definitions from @root to @style_tree + */ +static void +extract_ns_defs(xmlNodePtr root, xmlDocPtr style_tree) +{ + xmlNsPtr nsDef; + xmlNodePtr style_root = xmlDocGetRootElement(style_tree); + if (!root) return; + + for (nsDef = root->nsDef; nsDef; nsDef = nsDef->next) { + xmlNewNs(style_root, nsDef->href, nsDef->prefix); + } +} + +static void +do_file(const char *filename, xmlDocPtr style_tree, + int xml_options, const selOptions *ops, xsltOptions *xsltOps, + int *status) +{ + xmlChar *value; + xmlDocPtr doc; + + /* Pass input file name as predefined parameter 'inputFile' */ + const char *params[2+1] = { "inputFile" }; + value = xmlStrdup((const xmlChar *)"'"); + value = xmlStrcat(value, (const xmlChar *)filename); + value = xmlStrcat(value, (const xmlChar *)"'"); + params[1] = (char *) value; + + + doc = xmlReadFile(filename, NULL, xml_options); + if (doc != NULL) { + xmlDocPtr res; + + static xsltStylesheetPtr style = NULL; + if (!style) { + extract_ns_defs(xmlDocGetRootElement(doc), style_tree); + /* Parse XSLT stylesheet */ + style = xsltParseStylesheetDoc(style_tree); + if (!style) exit(EXIT_LIB_ERROR); + } + + res = xsltTransform(xsltOps, doc, params, style, filename); + if (!ops->quiet && (!res || xsltSaveResultToFile(stdout, res, style) < 0)) + { + *status = EXIT_LIB_ERROR; + } + else if ((ops->quiet || *status == EXIT_FAILURE) && res && res->children) + { + *status = EXIT_SUCCESS; + if (ops->quiet) exit(EXIT_SUCCESS); + } + } else { + *status = EXIT_BAD_FILE; + } + + xmlFree(value); +} + +/** + * This is the main function for 'select' option + */ +int +selMain(int argc, char **argv) +{ + static xsltOptions xsltOps; + static selOptions ops; + int start, i, n, status = EXIT_FAILURE; + int nCount = 0; + xmlDocPtr style_tree; + int xml_options = 0; + + if (argc <= 2) selUsage(argv[0], EXIT_BAD_ARGS); + + selInitOptions(&ops); + xsltInitOptions(&xsltOps); + start = selParseOptions(&ops, argc, argv); + xml_options |= XML_PARSE_NOENT; /* substitute entities */ + xml_options |= ops.nonet? XML_PARSE_NONET : 0; + xsltOps.nonet = ops.nonet; + xsltOps.noblanks = ops.noblanks; + xsltInitLibXml(&xsltOps); + xsltSetSortFunc(caseSortFunction); + + /* set parameters */ + parseNSArr(ns_arr, &nCount, start, argv+2); + + style_tree = xmlNewDoc(NULL); + i = selPrepareXslt(style_tree, &ops, ns_arr, start, argc, argv); + + if (ops.printXSLT) + { + if (i < argc) { + xmlTextReaderPtr reader = xmlReaderForFile(argv[i], NULL, xml_options); + xmlTextReaderRead(reader); + extract_ns_defs(xmlTextReaderCurrentNode(reader), style_tree); + xmlTextReaderClose(reader); + } + xmlDocFormatDump(stdout, style_tree, 1); + exit(EXIT_SUCCESS); + } + + for (n=i; n<argc; n++) + do_file(argv[n], style_tree, xml_options, &ops, &xsltOps, &status); + + if (i == argc) + do_file("-", style_tree, xml_options, &ops, &xsltOps, &status); + + /* + * Shutdown libxml + */ + xsltCleanupGlobals(); + xmlCleanupParser(); + + return status; +} + + + + +/****************************************************************************/ + +/** + * @number: compare numerically? + * @returns: negative if @obj1 compares less than @obj2 + */ +static int +compareFunction(xmlXPathObjectPtr obj1, xmlXPathObjectPtr obj2, + int number, int lower_first, int descending) +{ + int tst; + + if (number) { + /* We make NaN smaller than number in accordance + with XSLT spec */ + if (xmlXPathIsNaN(obj1->floatval)) { + if (xmlXPathIsNaN(obj2->floatval)) + tst = 0; + else + tst = -1; + } else if (xmlXPathIsNaN(obj2->floatval)) + tst = 1; + else if (obj1->floatval == obj2->floatval) + tst = 0; + else if (obj1->floatval > obj2->floatval) + tst = 1; + else tst = -1; + } else { + tst = xmlStrcasecmp(obj1->stringval, obj2->stringval); + if (tst == 0) { + tst = xmlStrcmp(obj1->stringval, obj2->stringval); + if (lower_first) + tst = -tst; + } + } + if (descending) + tst = -tst; + + return tst; +} + +/** + * xsltSortFunction: + * @ctxt: a XSLT process context + * @sorts: array of sort nodes + * @nbsorts: the number of sorts in the array + * + * reorder the current node list accordingly to the set of sorting + * requirement provided by the arry of nodes. + * + * like xsltDefaultSortFunction, but respect case-order attribute + */ +void +caseSortFunction(xsltTransformContextPtr ctxt, xmlNodePtr *sorts, + int nbsorts) { +#ifdef XSLT_REFACTORED + xsltStyleItemSortPtr comp; +#else + xsltStylePreCompPtr comp; +#endif + xmlXPathObjectPtr *resultsTab[XSLT_MAX_SORT]; + xmlXPathObjectPtr *results = NULL, *res; + xmlNodeSetPtr list = NULL; + int descending, number, desc, numb; + int len = 0; + int i, j, incr; + int tst; + int depth; + xmlNodePtr node; + xmlXPathObjectPtr tmp; + int tempstype[XSLT_MAX_SORT], temporder[XSLT_MAX_SORT], + tempcaseorder[XSLT_MAX_SORT]; + + if ((ctxt == NULL) || (sorts == NULL) || (nbsorts <= 0) || + (nbsorts >= XSLT_MAX_SORT)) + return; + if (sorts[0] == NULL) + return; + comp = sorts[0]->psvi; + if (comp == NULL) + return; + + list = ctxt->nodeList; + if ((list == NULL) || (list->nodeNr <= 1)) + return; /* nothing to do */ + + for (j = 0; j < nbsorts; j++) { + comp = sorts[j]->psvi; + tempstype[j] = 0; + if ((comp->stype == NULL) && (comp->has_stype != 0)) { + comp->stype = + xsltEvalAttrValueTemplate(ctxt, sorts[j], + (const xmlChar *) "data-type", + XSLT_NAMESPACE); + if (comp->stype != NULL) { + tempstype[j] = 1; + if (xmlStrEqual(comp->stype, (const xmlChar *) "text")) + comp->number = 0; + else if (xmlStrEqual(comp->stype, (const xmlChar *) "number")) + comp->number = 1; + else { + xsltTransformError(ctxt, NULL, sorts[j], + "xsltDoSortFunction: no support for data-type = %s\n", + comp->stype); + comp->number = 0; /* use default */ + } + } + } + temporder[j] = 0; + if ((comp->order == NULL) && (comp->has_order != 0)) { + comp->order = xsltEvalAttrValueTemplate(ctxt, sorts[j], + (const xmlChar *) "order", + XSLT_NAMESPACE); + if (comp->order != NULL) { + temporder[j] = 1; + if (xmlStrEqual(comp->order, (const xmlChar *) "ascending")) + comp->descending = 0; + else if (xmlStrEqual(comp->order, + (const xmlChar *) "descending")) + comp->descending = 1; + else { + xsltTransformError(ctxt, NULL, sorts[j], + "xsltDoSortFunction: invalid value %s for order\n", + comp->order); + comp->descending = 0; /* use default */ + } + } + } + + tempcaseorder[j] = 0; + if ((comp->case_order == NULL) /* && (comp->has_case_order != 0) */) { + comp->case_order = xsltEvalAttrValueTemplate(ctxt, sorts[j], + (const xmlChar *) "case-order", XSLT_NAMESPACE); + if (comp->case_order != NULL) { + tempcaseorder[j] = 1; + if (xmlStrEqual(comp->case_order, BAD_CAST "upper-first")) + comp->lower_first = 0; + else if (xmlStrEqual(comp->case_order, BAD_CAST "lower-first")) + comp->lower_first = 1; + else { + xsltTransformError(ctxt, NULL, sorts[j], + "xsltDoSortFunction: invalid value %s for case-order\n", + comp->case_order); + comp->lower_first = 0; /* use default */ + } + } + } + } + + len = list->nodeNr; + + resultsTab[0] = xsltComputeSortResult(ctxt, sorts[0]); + for (i = 1;i < XSLT_MAX_SORT;i++) + resultsTab[i] = NULL; + + results = resultsTab[0]; + + comp = sorts[0]->psvi; + descending = comp->descending; + number = comp->number; + if (results == NULL) + return; + + /* Shell's sort of node-set */ + for (incr = len / 2; incr > 0; incr /= 2) { + for (i = incr; i < len; i++) { + j = i - incr; + if (results[i] == NULL) + continue; + + while (j >= 0) { + if (results[j] == NULL) + tst = 1; + else + tst = compareFunction(results[j], results[j + incr], + number, comp->lower_first, descending); + + if (tst == 0) { + /* + * Okay we need to use multi level sorts + */ + depth = 1; + while (depth < nbsorts) { + if (sorts[depth] == NULL) + break; + comp = sorts[depth]->psvi; + if (comp == NULL) + break; + desc = comp->descending; + numb = comp->number; + + /* + * Compute the result of the next level for the + * full set, this might be optimized ... or not + */ + if (resultsTab[depth] == NULL) + resultsTab[depth] = xsltComputeSortResult(ctxt, + sorts[depth]); + res = resultsTab[depth]; + if (res == NULL) + break; + if (res[j] == NULL) { + if (res[j+incr] != NULL) + tst = 1; + } else { + tst = compareFunction(res[j], res[j+incr], + numb, comp->lower_first, desc); + } + + /* + * if we still can't differenciate at this level + * try one level deeper. + */ + if (tst != 0) + break; + depth++; + } + } + if (tst == 0) { + tst = results[j]->index > results[j + incr]->index; + } + if (tst > 0) { + tmp = results[j]; + results[j] = results[j + incr]; + results[j + incr] = tmp; + node = list->nodeTab[j]; + list->nodeTab[j] = list->nodeTab[j + incr]; + list->nodeTab[j + incr] = node; + depth = 1; + while (depth < nbsorts) { + if (sorts[depth] == NULL) + break; + if (resultsTab[depth] == NULL) + break; + res = resultsTab[depth]; + tmp = res[j]; + res[j] = res[j + incr]; + res[j + incr] = tmp; + depth++; + } + j -= incr; + } else + break; + } + } + } + + for (j = 0; j < nbsorts; j++) { + comp = sorts[j]->psvi; + if (tempstype[j] == 1) { + /* The data-type needs to be recomputed each time */ + xmlFree((void *)(comp->stype)); + comp->stype = NULL; + } + if (temporder[j] == 1) { + /* The order needs to be recomputed each time */ + xmlFree((void *)(comp->order)); + comp->order = NULL; + } + if (tempcaseorder[j] == 1) { + /* The case-order needs to be recomputed each time */ + xmlFree((void *)(comp->case_order)); + comp->case_order = NULL; + } + if (resultsTab[j] != NULL) { + for (i = 0;i < len;i++) + xmlXPathFreeObject(resultsTab[j][i]); + xmlFree(resultsTab[j]); + } + } +} diff --git a/src/xml_trans.c b/src/xml_trans.c new file mode 100644 index 0000000..b5f8355 --- /dev/null +++ b/src/xml_trans.c @@ -0,0 +1,282 @@ +/* $Id: xml_trans.c,v 1.38 2005/01/07 02:40:59 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "xmlstar.h" +#include "trans.h" + +/* + * TODO: + * 1. proper command line arguments handling + * 2. review and clean up all code (free memory) + * 3. check embedded stylesheet support + * 4. exit values on errors + */ + +/** + * Display usage syntax + */ +void +trUsage(const char *argv0, exit_status status) +{ + extern void fprint_trans_usage(FILE* o, const char* argv0); + extern const char more_info[]; + extern const char libxslt_more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_trans_usage(o, argv0); + fprintf(o, "%s", more_info); + fprintf(o, "%s", libxslt_more_info); + exit(status); +} + +/** + * Parse global command line options + */ +int +trParseOptions(xsltOptionsPtr ops, int argc, char **argv) +{ + int i; + + if (argc <= 2) return argc; + for (i=2; i<argc; i++) + { + if (argv[i][0] == '-') + { + if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + trUsage(argv[0], EXIT_SUCCESS); + } + else if (!strcmp(argv[i], "--show-ext")) + { + ops->show_extensions = 1; + } + else if (!strcmp(argv[i], "--val")) + { + ops->noval = 0; + } + else if (!strcmp(argv[i], "--net")) + { + ops->nonet = 0; + } + else if (!strcmp(argv[i], "-E") || !strcmp(argv[i], "--embed")) + { + ops->embed = 1; + } + else if (!strcmp(argv[i], "--omit-decl")) + { + ops->omit_decl = 1; + } + else if (!strcmp(argv[i], "--maxdepth")) + { + int value; + i++; + if (i >= argc) trUsage(argv[0], EXIT_BAD_ARGS); + if (sscanf(argv[i], "%d", &value) == 1) + if (value > 0) xsltMaxDepth = value; + } +#ifdef LIBXML_XINCLUDE_ENABLED + else if (!strcmp(argv[i], "--xinclude")) + { + ops->xinclude = 1; + } +#endif +#ifdef LIBXML_HTML_ENABLED + else if (!strcmp(argv[i], "--html")) + { + ops->html = 1; + } +#endif + } + else + break; + } + + return i; +} + +/** + * Cleanup memory + */ +void +trCleanup() +{ + xsltCleanupGlobals(); + xmlCleanupParser(); +#if 0 + xmlMemoryDump(); +#endif +} + +/** + * Parse command line for XSLT parameters + */ +int +trParseParams(const char** params, int* plen, + int count, char **argv) +{ + int i; + *plen = 0; + params[0] = 0; + + for (i=0; i<count; i++) + { + if (argv[i][0] == '-') + { + if (!strcmp(argv[i], "-p")) + { + int j; + xmlChar *name, *value; + + i++; + if (i >= count) trUsage(argv[0], EXIT_BAD_ARGS); + + for(j=0; argv[i][j] && (argv[i][j] != '='); j++); + if (argv[i][j] != '=') trUsage(argv[0], EXIT_BAD_ARGS); + + name = xmlStrndup((const xmlChar *) argv[i], j); + value = xmlStrdup((const xmlChar *) argv[i]+j+1); + + if (*plen >= MAX_PARAMETERS) + { + fprintf(stderr, "too many params increase MAX_PARAMETERS\n"); + exit(EXIT_INTERNAL_ERROR); + } + + params[*plen] = (char *)name; + (*plen)++; + params[*plen] = (char *)value; + (*plen)++; + params[*plen] = 0; + } + else if (!strcmp(argv[i], "-s")) + { + int j; + const xmlChar *string; + xmlChar *name, *value; + + i++; + if (i >= count) trUsage(argv[0], EXIT_BAD_ARGS); + + for(j=0; argv[i][j] && (argv[i][j] != '='); j++); + if (argv[i][j] != '=') trUsage(argv[0], EXIT_BAD_ARGS); + + name = xmlStrndup((const xmlChar *)argv[i], j); + string = (const xmlChar *)(argv[i]+j+1); + + if (xmlStrchr(string, '"')) + { + if (xmlStrchr(string, '\'')) + { + fprintf(stderr, + "string parameter contains both quote and double-quotes\n"); + exit(EXIT_INTERNAL_ERROR); + } + value = xmlStrdup((const xmlChar *)"'"); + value = xmlStrcat(value, string); + value = xmlStrcat(value, (const xmlChar *)"'"); + } + else + { + value = xmlStrdup((const xmlChar *)"\""); + value = xmlStrcat(value, string); + value = xmlStrcat(value, (const xmlChar *)"\""); + } + + if (*plen >= MAX_PARAMETERS) + { + fprintf(stderr, "too many params increase MAX_PARAMETERS\n"); + exit(EXIT_INTERNAL_ERROR); + } + + params[*plen] = (char *)name; + (*plen)++; + params[*plen] = (char *)value; + (*plen)++; + params[*plen] = 0; + } + } + else + break; + } + + return i; +} + +/** + * Cleanup memory allocated by XSLT parameters + */ +void +trCleanupParams(const char **xsltParams) +{ + const char **p = xsltParams; + + while (*p) + { + xmlFree((char *)*p); + p++; + } +} + +/** + * This is the main function for 'tr' option + */ +int +trMain(int argc, char **argv) +{ + static xsltOptions ops; + static const char *xsltParams[2 * MAX_PARAMETERS + 1]; + + int errorno = 0; + int start, xslt_ind; + int pCount; + + if (argc <= 2) trUsage(argv[0], EXIT_BAD_ARGS); + + xsltInitOptions(&ops); + start = trParseOptions(&ops, argc, argv); + xslt_ind = start; + xsltInitLibXml(&ops); + + /* set parameters */ + start += trParseParams(xsltParams, &pCount, argc-start-1, argv+start+1); + + /* run transformation */ + errorno = xsltRun(&ops, argv[xslt_ind], xsltParams, + argc-start-1, argv+start+1); + + /* free resources */ + trCleanupParams(xsltParams); + trCleanup(); + + return errorno; +} diff --git a/src/xml_validate.c b/src/xml_validate.c new file mode 100644 index 0000000..0882baa --- /dev/null +++ b/src/xml_validate.c @@ -0,0 +1,471 @@ +/* $Id: xml_validate.c,v 1.36 2005/01/07 01:52:43 mgrouch Exp $ */ + +/* + +XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents + +Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#include <config.h> + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "xmlstar.h" +#include "trans.h" + +#ifdef LIBXML_SCHEMAS_ENABLED +#include <libxml/xmlschemas.h> +#include <libxml/xmlschemastypes.h> +#endif + +#ifdef LIBXML_SCHEMAS_ENABLED +#include <libxml/relaxng.h> +#endif + +#include <libxml/xmlreader.h> + +/* + * TODO: Use cases + * 1. find malfomed XML documents in a given set of XML files + * 2. find XML documents which do not match DTD/XSD in a given set of XML files + * 3. precompile DTD once + */ + +typedef struct _valOptions { + char *dtd; /* External DTD URL or file name */ + char *schema; /* External Schema URL or file name */ + char *relaxng; /* External Relax-NG Schema URL or file name */ + int err; /* Allow stderr messages */ + int embed; /* Validate using embeded DTD */ + int wellFormed; /* Check if well formed only */ + int listGood; /* >0 list good, <0 list bad */ + int show_val_res; /* display file names and valid/invalid message */ + int nonet; /* disallow network access */ +} valOptions; + +typedef valOptions *valOptionsPtr; + +/** + * display short help message + */ +void +valUsage(int argc, char **argv, exit_status status) +{ + extern void fprint_validate_usage(FILE* o, const char* argv0); + extern const char more_info[]; + FILE *o = (status == EXIT_SUCCESS)? stdout : stderr; + fprint_validate_usage(o, argv[0]); + fprintf(o, "%s", more_info); + exit(status); +} + +/** + * Initialize global command line options + */ +void +valInitOptions(valOptionsPtr ops) +{ + ops->wellFormed = 1; + ops->listGood = -1; + ops->err = 0; + ops->embed = 0; + ops->dtd = NULL; + ops->schema = NULL; + ops->relaxng = NULL; + ops->show_val_res = 1; + ops->nonet = 1; +} + +/** + * Parse global command line options + */ +int +valParseOptions(valOptionsPtr ops, int argc, char **argv) +{ + int i; + + i = 2; + while(i < argc) + { + if (!strcmp(argv[i], "--well-formed") || !strcmp(argv[i], "-w")) + { + ops->wellFormed = 1; + i++; + } + else if (!strcmp(argv[i], "--err") || !strcmp(argv[i], "-e")) + { + ops->err = 1; + i++; + } + else if (!strcmp(argv[i], "--embed") || !strcmp(argv[i], "-E")) + { + ops->embed = 1; + i++; + } + else if (!strcmp(argv[i], "--list-good") || !strcmp(argv[i], "-g")) + { + ops->listGood = 1; + ops->show_val_res = 0; + i++; + } + else if (!strcmp(argv[i], "--list-bad") || !strcmp(argv[i], "-b")) + { + ops->listGood = -1; + ops->show_val_res = 0; + i++; + } + else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-q")) + { + ops->listGood = 0; + ops->show_val_res = 0; + i++; + } + else if (!strcmp(argv[i], "--dtd") || !strcmp(argv[i], "-d")) + { + i++; + if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS); + ops->dtd = argv[i]; + i++; + } + else if (!strcmp(argv[i], "--xsd") || !strcmp(argv[i], "-s")) + { + i++; + if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS); + ops->schema = argv[i]; + i++; + } + else if (!strcmp(argv[i], "--relaxng") || !strcmp(argv[i], "-r")) + { + i++; + if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS); + ops->relaxng = argv[i]; + i++; + } + else if (!strcmp(argv[i], "--net")) + { + ops->nonet = 0; + i++; + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + valUsage(argc, argv, EXIT_SUCCESS); + } + else if (!strcmp(argv[i], "-")) + { + i++; + break; + } + else if (argv[i][0] == '-') + { + valUsage(argc, argv, EXIT_BAD_ARGS); + } + else + { + i++; + break; + } + } + + return i-1; +} + +/** + * Validate XML document against DTD + */ +int +valAgainstDtd(valOptionsPtr ops, char* dtdvalid, xmlDocPtr doc, char* filename) +{ + int result = 0; + + if (dtdvalid != NULL) + { + xmlDtdPtr dtd; + +#if !defined(LIBXML_VALID_ENABLED) + xmlGenericError(xmlGenericErrorContext, + "libxml2 has no validation support"); + return 2; +#endif + dtd = xmlParseDTD(NULL, (const xmlChar *)dtdvalid); + if (dtd == NULL) + { + xmlGenericError(xmlGenericErrorContext, + "Could not parse DTD %s\n", dtdvalid); + result = 2; + } + else + { + xmlValidCtxtPtr cvp; + + if ((cvp = xmlNewValidCtxt()) == NULL) + { + xmlGenericError(xmlGenericErrorContext, + "Couldn't allocate validation context\n"); + exit(-1); + } + + if (ops->err) + { + cvp->userData = (void *) stderr; + cvp->error = (xmlValidityErrorFunc) fprintf; + cvp->warning = (xmlValidityWarningFunc) fprintf; + } + else + { + cvp->userData = (void *) NULL; + cvp->error = (xmlValidityErrorFunc) NULL; + cvp->warning = (xmlValidityWarningFunc) NULL; + } + + if (!xmlValidateDtd(cvp, doc, dtd)) + { + if ((ops->listGood < 0) && !ops->show_val_res) + { + fprintf(stdout, "%s\n", filename); + } + else if (ops->listGood == 0) + xmlGenericError(xmlGenericErrorContext, + "%s: does not match %s\n", + filename, dtdvalid); + result = 3; + } + else + { + if ((ops->listGood > 0) && !ops->show_val_res) + { + fprintf(stdout, "%s\n", filename); + } + } + xmlFreeDtd(dtd); + xmlFreeValidCtxt(cvp); + } + } + + return result; +} + +/** + * This is the main function for 'validate' option + */ +int +valMain(int argc, char **argv) +{ + int start; + static valOptions ops; + static ErrorInfo errorInfo; + int invalidFound = 0; + int options = XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR; + + if (argc <= 2) valUsage(argc, argv, EXIT_BAD_ARGS); + valInitOptions(&ops); + start = valParseOptions(&ops, argc, argv); + if (ops.nonet) options |= XML_PARSE_NONET; + + errorInfo.verbose = ops.err; + xmlSetStructuredErrorFunc(&errorInfo, reportError); + xmlLineNumbersDefault(1); + + if (ops.dtd) + { + /* xmlReader doesn't work with external dtd, have to use SAX + * interface */ + int i; + + for (i=start; i<argc; i++) + { + xmlDocPtr doc; + int ret; + + ret = 0; + doc = NULL; + + errorInfo.filename = argv[i]; + doc = xmlReadFile(argv[i], NULL, options); + if (doc) + { + /* TODO: precompile DTD once */ + ret = valAgainstDtd(&ops, ops.dtd, doc, argv[i]); + xmlFreeDoc(doc); + } + else + { + ret = 1; /* Malformed XML or could not open file */ + if ((ops.listGood < 0) && !ops.show_val_res) + { + fprintf(stdout, "%s\n", argv[i]); + } + } + if (ret) invalidFound = 1; + + if (ops.show_val_res) + { + if (ret == 0) + fprintf(stdout, "%s - valid\n", argv[i]); + else + fprintf(stdout, "%s - invalid\n", argv[i]); + } + } + } + else if (ops.schema || ops.relaxng || ops.embed || ops.wellFormed) + { + int i; + xmlTextReaderPtr reader = NULL; + +#ifdef LIBXML_SCHEMAS_ENABLED + xmlSchemaPtr schema = NULL; + xmlSchemaParserCtxtPtr schemaParserCtxt = NULL; + xmlSchemaValidCtxtPtr schemaCtxt = NULL; + + xmlRelaxNGPtr relaxng = NULL; + xmlRelaxNGParserCtxtPtr relaxngParserCtxt = NULL; + /* there is no xmlTextReaderRelaxNGValidateCtxt() !? */ + + /* TODO: Do not print debug stuff */ + if (ops.schema) + { + schemaParserCtxt = xmlSchemaNewParserCtxt(ops.schema); + if (!schemaParserCtxt) + { + invalidFound = 2; + goto schemaCleanup; + } + errorInfo.filename = ops.schema; + schema = xmlSchemaParse(schemaParserCtxt); + if (!schema) + { + invalidFound = 2; + goto schemaCleanup; + } + + xmlSchemaFreeParserCtxt(schemaParserCtxt); + schemaCtxt = xmlSchemaNewValidCtxt(schema); + if (!schemaCtxt) + { + invalidFound = 2; + goto schemaCleanup; + } + + } + else if (ops.relaxng) + { + relaxngParserCtxt = xmlRelaxNGNewParserCtxt(ops.relaxng); + if (!relaxngParserCtxt) + { + invalidFound = 2; + goto schemaCleanup; + } + + errorInfo.filename = ops.relaxng; + relaxng = xmlRelaxNGParse(relaxngParserCtxt); + if (!relaxng) + { + invalidFound = 2; + goto schemaCleanup; + } + + } +#endif /* LIBXML_SCHEMAS_ENABLED */ + + for (i=start; i<argc; i++) + { + int ret = 0; + if (ops.embed) options |= XML_PARSE_DTDVALID; + + if (!reader) + { + reader = xmlReaderForFile(argv[i], NULL, options); + } + else + { + ret = xmlReaderNewFile(reader, argv[i], NULL, options); + } + + errorInfo.xmlReader = reader; + errorInfo.filename = argv[i]; + + if (reader && ret == 0) + { +#ifdef LIBXML_SCHEMAS_ENABLED + if (schemaCtxt) + { + ret = xmlTextReaderSchemaValidateCtxt(reader, + schemaCtxt, 0); + } + else if (relaxng) + { + ret = xmlTextReaderRelaxNGSetSchema(reader, + relaxng); + } +#endif /* LIBXML_SCHEMAS_ENABLED */ + + if (ret == 0) + { + do + { + ret = xmlTextReaderRead(reader); + } while (ret == 1); + if (ret != -1 && (schema || relaxng || ops.embed)) + ret = !xmlTextReaderIsValid(reader); + } + } + else + { + if (ops.err) + fprintf(stderr, "couldn't read file '%s'\n", errorInfo.filename); + ret = 1; /* could not open file */ + } + if (ret) invalidFound = 1; + + if (!ops.show_val_res) + { + if ((ops.listGood > 0) && (ret == 0)) + fprintf(stdout, "%s\n", argv[i]); + if ((ops.listGood < 0) && (ret != 0)) + fprintf(stdout, "%s\n", argv[i]); + } + else + { + if (ret == 0) + fprintf(stdout, "%s - valid\n", argv[i]); + else + fprintf(stdout, "%s - invalid\n", argv[i]); + } + } + errorInfo.xmlReader = NULL; + xmlFreeTextReader(reader); + +#ifdef LIBXML_SCHEMAS_ENABLED + schemaCleanup: + xmlSchemaFreeValidCtxt(schemaCtxt); + xmlRelaxNGFree(relaxng); + xmlSchemaFree(schema); + xmlRelaxNGCleanupTypes(); + xmlSchemaCleanupTypes(); +#endif /* LIBXML_SCHEMAS_ENABLED */ + } + + xmlCleanupParser(); + return invalidFound; +} diff --git a/src/xmlstar.h b/src/xmlstar.h new file mode 100644 index 0000000..36133f2 --- /dev/null +++ b/src/xmlstar.h @@ -0,0 +1,43 @@ +#ifndef XMLSTAR_H +#define XMLSTAR_H + +#include <config.h> +#include <stdlib.h> + +#if HAVE_SETMODE && HAVE_DECL_O_BINARY +# include <io.h> +# include <fcntl.h> +# define set_stdout_binary() setmode(1, O_BINARY) +#else +# define set_stdout_binary() +#endif + +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/xmlreader.h> + +typedef enum { /* EXIT_SUCCESS = 0, EXIT_FAILURE = 1, */ + EXIT_BAD_ARGS = EXIT_FAILURE+1, EXIT_BAD_FILE, + EXIT_LIB_ERROR, EXIT_INTERNAL_ERROR } exit_status; + +#define COUNT_OF(array) (sizeof(array)/sizeof(*array)) + +typedef enum { QUIET, VERBOSE } Verbosity; + +typedef struct _errorInfo { + const char *filename; /* file error occured in, if any, else NULL */ + xmlTextReaderPtr xmlReader; + Verbosity verbose; +} ErrorInfo; + +void reportError(void *ptr, xmlErrorPtr error); + +void registerXstarVariable(xmlXPathContextPtr ctxt, + const char* name, xmlXPathObjectPtr value); +void registerXstarNs(xmlXPathContextPtr ctxt); + +int parseNSArr(xmlChar** ns_arr, int* plen, int argc, char **argv); +void cleanupNSArr(xmlChar **ns_arr); +extern xmlChar *ns_arr[]; + +#endif /* XMLSTAR_H */ |