summaryrefslogtreecommitdiff
path: root/src/xml_validate.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml_validate.c')
-rw-r--r--src/xml_validate.c471
1 files changed, 471 insertions, 0 deletions
diff --git a/src/xml_validate.c b/src/xml_validate.c
new file mode 100644
index 0000000..0882baa
--- /dev/null
+++ b/src/xml_validate.c
@@ -0,0 +1,471 @@
+/* $Id: xml_validate.c,v 1.36 2005/01/07 01:52:43 mgrouch Exp $ */
+
+/*
+
+XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
+
+Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <config.h>
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "xmlstar.h"
+#include "trans.h"
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+#include <libxml/xmlschemas.h>
+#include <libxml/xmlschemastypes.h>
+#endif
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+#include <libxml/relaxng.h>
+#endif
+
+#include <libxml/xmlreader.h>
+
+/*
+ * TODO: Use cases
+ * 1. find malfomed XML documents in a given set of XML files
+ * 2. find XML documents which do not match DTD/XSD in a given set of XML files
+ * 3. precompile DTD once
+ */
+
+typedef struct _valOptions {
+ char *dtd; /* External DTD URL or file name */
+ char *schema; /* External Schema URL or file name */
+ char *relaxng; /* External Relax-NG Schema URL or file name */
+ int err; /* Allow stderr messages */
+ int embed; /* Validate using embeded DTD */
+ int wellFormed; /* Check if well formed only */
+ int listGood; /* >0 list good, <0 list bad */
+ int show_val_res; /* display file names and valid/invalid message */
+ int nonet; /* disallow network access */
+} valOptions;
+
+typedef valOptions *valOptionsPtr;
+
+/**
+ * display short help message
+ */
+void
+valUsage(int argc, char **argv, exit_status status)
+{
+ extern void fprint_validate_usage(FILE* o, const char* argv0);
+ extern const char more_info[];
+ FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
+ fprint_validate_usage(o, argv[0]);
+ fprintf(o, "%s", more_info);
+ exit(status);
+}
+
+/**
+ * Initialize global command line options
+ */
+void
+valInitOptions(valOptionsPtr ops)
+{
+ ops->wellFormed = 1;
+ ops->listGood = -1;
+ ops->err = 0;
+ ops->embed = 0;
+ ops->dtd = NULL;
+ ops->schema = NULL;
+ ops->relaxng = NULL;
+ ops->show_val_res = 1;
+ ops->nonet = 1;
+}
+
+/**
+ * Parse global command line options
+ */
+int
+valParseOptions(valOptionsPtr ops, int argc, char **argv)
+{
+ int i;
+
+ i = 2;
+ while(i < argc)
+ {
+ if (!strcmp(argv[i], "--well-formed") || !strcmp(argv[i], "-w"))
+ {
+ ops->wellFormed = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--err") || !strcmp(argv[i], "-e"))
+ {
+ ops->err = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--embed") || !strcmp(argv[i], "-E"))
+ {
+ ops->embed = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--list-good") || !strcmp(argv[i], "-g"))
+ {
+ ops->listGood = 1;
+ ops->show_val_res = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--list-bad") || !strcmp(argv[i], "-b"))
+ {
+ ops->listGood = -1;
+ ops->show_val_res = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-q"))
+ {
+ ops->listGood = 0;
+ ops->show_val_res = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--dtd") || !strcmp(argv[i], "-d"))
+ {
+ i++;
+ if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
+ ops->dtd = argv[i];
+ i++;
+ }
+ else if (!strcmp(argv[i], "--xsd") || !strcmp(argv[i], "-s"))
+ {
+ i++;
+ if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
+ ops->schema = argv[i];
+ i++;
+ }
+ else if (!strcmp(argv[i], "--relaxng") || !strcmp(argv[i], "-r"))
+ {
+ i++;
+ if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
+ ops->relaxng = argv[i];
+ i++;
+ }
+ else if (!strcmp(argv[i], "--net"))
+ {
+ ops->nonet = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
+ {
+ valUsage(argc, argv, EXIT_SUCCESS);
+ }
+ else if (!strcmp(argv[i], "-"))
+ {
+ i++;
+ break;
+ }
+ else if (argv[i][0] == '-')
+ {
+ valUsage(argc, argv, EXIT_BAD_ARGS);
+ }
+ else
+ {
+ i++;
+ break;
+ }
+ }
+
+ return i-1;
+}
+
+/**
+ * Validate XML document against DTD
+ */
+int
+valAgainstDtd(valOptionsPtr ops, char* dtdvalid, xmlDocPtr doc, char* filename)
+{
+ int result = 0;
+
+ if (dtdvalid != NULL)
+ {
+ xmlDtdPtr dtd;
+
+#if !defined(LIBXML_VALID_ENABLED)
+ xmlGenericError(xmlGenericErrorContext,
+ "libxml2 has no validation support");
+ return 2;
+#endif
+ dtd = xmlParseDTD(NULL, (const xmlChar *)dtdvalid);
+ if (dtd == NULL)
+ {
+ xmlGenericError(xmlGenericErrorContext,
+ "Could not parse DTD %s\n", dtdvalid);
+ result = 2;
+ }
+ else
+ {
+ xmlValidCtxtPtr cvp;
+
+ if ((cvp = xmlNewValidCtxt()) == NULL)
+ {
+ xmlGenericError(xmlGenericErrorContext,
+ "Couldn't allocate validation context\n");
+ exit(-1);
+ }
+
+ if (ops->err)
+ {
+ cvp->userData = (void *) stderr;
+ cvp->error = (xmlValidityErrorFunc) fprintf;
+ cvp->warning = (xmlValidityWarningFunc) fprintf;
+ }
+ else
+ {
+ cvp->userData = (void *) NULL;
+ cvp->error = (xmlValidityErrorFunc) NULL;
+ cvp->warning = (xmlValidityWarningFunc) NULL;
+ }
+
+ if (!xmlValidateDtd(cvp, doc, dtd))
+ {
+ if ((ops->listGood < 0) && !ops->show_val_res)
+ {
+ fprintf(stdout, "%s\n", filename);
+ }
+ else if (ops->listGood == 0)
+ xmlGenericError(xmlGenericErrorContext,
+ "%s: does not match %s\n",
+ filename, dtdvalid);
+ result = 3;
+ }
+ else
+ {
+ if ((ops->listGood > 0) && !ops->show_val_res)
+ {
+ fprintf(stdout, "%s\n", filename);
+ }
+ }
+ xmlFreeDtd(dtd);
+ xmlFreeValidCtxt(cvp);
+ }
+ }
+
+ return result;
+}
+
+/**
+ * This is the main function for 'validate' option
+ */
+int
+valMain(int argc, char **argv)
+{
+ int start;
+ static valOptions ops;
+ static ErrorInfo errorInfo;
+ int invalidFound = 0;
+ int options = XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR;
+
+ if (argc <= 2) valUsage(argc, argv, EXIT_BAD_ARGS);
+ valInitOptions(&ops);
+ start = valParseOptions(&ops, argc, argv);
+ if (ops.nonet) options |= XML_PARSE_NONET;
+
+ errorInfo.verbose = ops.err;
+ xmlSetStructuredErrorFunc(&errorInfo, reportError);
+ xmlLineNumbersDefault(1);
+
+ if (ops.dtd)
+ {
+ /* xmlReader doesn't work with external dtd, have to use SAX
+ * interface */
+ int i;
+
+ for (i=start; i<argc; i++)
+ {
+ xmlDocPtr doc;
+ int ret;
+
+ ret = 0;
+ doc = NULL;
+
+ errorInfo.filename = argv[i];
+ doc = xmlReadFile(argv[i], NULL, options);
+ if (doc)
+ {
+ /* TODO: precompile DTD once */
+ ret = valAgainstDtd(&ops, ops.dtd, doc, argv[i]);
+ xmlFreeDoc(doc);
+ }
+ else
+ {
+ ret = 1; /* Malformed XML or could not open file */
+ if ((ops.listGood < 0) && !ops.show_val_res)
+ {
+ fprintf(stdout, "%s\n", argv[i]);
+ }
+ }
+ if (ret) invalidFound = 1;
+
+ if (ops.show_val_res)
+ {
+ if (ret == 0)
+ fprintf(stdout, "%s - valid\n", argv[i]);
+ else
+ fprintf(stdout, "%s - invalid\n", argv[i]);
+ }
+ }
+ }
+ else if (ops.schema || ops.relaxng || ops.embed || ops.wellFormed)
+ {
+ int i;
+ xmlTextReaderPtr reader = NULL;
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+ xmlSchemaPtr schema = NULL;
+ xmlSchemaParserCtxtPtr schemaParserCtxt = NULL;
+ xmlSchemaValidCtxtPtr schemaCtxt = NULL;
+
+ xmlRelaxNGPtr relaxng = NULL;
+ xmlRelaxNGParserCtxtPtr relaxngParserCtxt = NULL;
+ /* there is no xmlTextReaderRelaxNGValidateCtxt() !? */
+
+ /* TODO: Do not print debug stuff */
+ if (ops.schema)
+ {
+ schemaParserCtxt = xmlSchemaNewParserCtxt(ops.schema);
+ if (!schemaParserCtxt)
+ {
+ invalidFound = 2;
+ goto schemaCleanup;
+ }
+ errorInfo.filename = ops.schema;
+ schema = xmlSchemaParse(schemaParserCtxt);
+ if (!schema)
+ {
+ invalidFound = 2;
+ goto schemaCleanup;
+ }
+
+ xmlSchemaFreeParserCtxt(schemaParserCtxt);
+ schemaCtxt = xmlSchemaNewValidCtxt(schema);
+ if (!schemaCtxt)
+ {
+ invalidFound = 2;
+ goto schemaCleanup;
+ }
+
+ }
+ else if (ops.relaxng)
+ {
+ relaxngParserCtxt = xmlRelaxNGNewParserCtxt(ops.relaxng);
+ if (!relaxngParserCtxt)
+ {
+ invalidFound = 2;
+ goto schemaCleanup;
+ }
+
+ errorInfo.filename = ops.relaxng;
+ relaxng = xmlRelaxNGParse(relaxngParserCtxt);
+ if (!relaxng)
+ {
+ invalidFound = 2;
+ goto schemaCleanup;
+ }
+
+ }
+#endif /* LIBXML_SCHEMAS_ENABLED */
+
+ for (i=start; i<argc; i++)
+ {
+ int ret = 0;
+ if (ops.embed) options |= XML_PARSE_DTDVALID;
+
+ if (!reader)
+ {
+ reader = xmlReaderForFile(argv[i], NULL, options);
+ }
+ else
+ {
+ ret = xmlReaderNewFile(reader, argv[i], NULL, options);
+ }
+
+ errorInfo.xmlReader = reader;
+ errorInfo.filename = argv[i];
+
+ if (reader && ret == 0)
+ {
+#ifdef LIBXML_SCHEMAS_ENABLED
+ if (schemaCtxt)
+ {
+ ret = xmlTextReaderSchemaValidateCtxt(reader,
+ schemaCtxt, 0);
+ }
+ else if (relaxng)
+ {
+ ret = xmlTextReaderRelaxNGSetSchema(reader,
+ relaxng);
+ }
+#endif /* LIBXML_SCHEMAS_ENABLED */
+
+ if (ret == 0)
+ {
+ do
+ {
+ ret = xmlTextReaderRead(reader);
+ } while (ret == 1);
+ if (ret != -1 && (schema || relaxng || ops.embed))
+ ret = !xmlTextReaderIsValid(reader);
+ }
+ }
+ else
+ {
+ if (ops.err)
+ fprintf(stderr, "couldn't read file '%s'\n", errorInfo.filename);
+ ret = 1; /* could not open file */
+ }
+ if (ret) invalidFound = 1;
+
+ if (!ops.show_val_res)
+ {
+ if ((ops.listGood > 0) && (ret == 0))
+ fprintf(stdout, "%s\n", argv[i]);
+ if ((ops.listGood < 0) && (ret != 0))
+ fprintf(stdout, "%s\n", argv[i]);
+ }
+ else
+ {
+ if (ret == 0)
+ fprintf(stdout, "%s - valid\n", argv[i]);
+ else
+ fprintf(stdout, "%s - invalid\n", argv[i]);
+ }
+ }
+ errorInfo.xmlReader = NULL;
+ xmlFreeTextReader(reader);
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+ schemaCleanup:
+ xmlSchemaFreeValidCtxt(schemaCtxt);
+ xmlRelaxNGFree(relaxng);
+ xmlSchemaFree(schema);
+ xmlRelaxNGCleanupTypes();
+ xmlSchemaCleanupTypes();
+#endif /* LIBXML_SCHEMAS_ENABLED */
+ }
+
+ xmlCleanupParser();
+ return invalidFound;
+}