summaryrefslogtreecommitdiff
path: root/src/xml_format.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml_format.c')
-rw-r--r--src/xml_format.c392
1 files changed, 392 insertions, 0 deletions
diff --git a/src/xml_format.c b/src/xml_format.c
new file mode 100644
index 0000000..4d6fe31
--- /dev/null
+++ b/src/xml_format.c
@@ -0,0 +1,392 @@
+/* $Id: xml_format.c,v 1.25 2005/01/07 02:33:40 mgrouch Exp $ */
+
+/*
+
+XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
+
+Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+#include <config.h>
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libxml/xmlmemory.h>
+#include <libxml/debugXML.h>
+#include <libxml/xmlIO.h>
+#include <libxml/HTMLtree.h>
+#include <libxml/xinclude.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#include <libxml/xpointer.h>
+#include <libxml/parserInternals.h>
+#include <libxml/uri.h>
+
+#include "xmlstar.h"
+
+/*
+ * TODO: 1. Attribute formatting options (as every attribute on a new line)
+ * 2. exit values on errors
+ */
+
+typedef struct _foOptions {
+ int indent; /* indent output */
+ int indent_tab; /* indent output with tab */
+ int indent_spaces; /* num spaces for indentation */
+ int omit_decl; /* omit xml declaration */
+ int recovery; /* try to recover what is parsable */
+ int dropdtd; /* remove the DOCTYPE of the input docs */
+ int options; /* global parsing flags */
+#ifdef LIBXML_HTML_ENABLED
+ int html; /* inputs are in HTML format */
+#endif
+ int quiet; /* quiet mode */
+} foOptions;
+
+typedef foOptions *foOptionsPtr;
+
+const char *encoding = NULL;
+static char *spaces = NULL;
+
+/**
+ * Print small help for command line options
+ */
+void
+foUsage(int argc, char **argv, exit_status status)
+{
+ extern void fprint_format_usage(FILE* o, const char* argv0);
+ extern const char more_info[];
+ FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
+ fprint_format_usage(o, argv[0]);
+ fprintf(o, "%s", more_info);
+ exit(status);
+}
+
+/**
+ * Initialize global command line options
+ */
+void
+foInitOptions(foOptionsPtr ops)
+{
+ ops->indent = 1;
+ ops->indent_tab = 0;
+ ops->indent_spaces = 2;
+ ops->omit_decl = 0;
+ ops->recovery = 0;
+ ops->dropdtd = 0;
+ ops->options = XML_PARSE_NONET;
+#ifdef LIBXML_HTML_ENABLED
+ ops->html = 0;
+#endif
+ ops->quiet = 0;
+}
+
+/**
+ * Initialize LibXML
+ */
+void
+foInitLibXml(foOptionsPtr ops)
+{
+ /*
+ * Initialize library memory
+ */
+ xmlInitMemory();
+
+ LIBXML_TEST_VERSION
+
+ /*
+ * Store line numbers in the document tree
+ */
+ xmlLineNumbersDefault(1);
+
+ xmlSubstituteEntitiesDefault(1);
+ xmlKeepBlanksDefault(0);
+ xmlPedanticParserDefault(0);
+
+ xmlGetWarningsDefaultValue = 1;
+ xmlDoValidityCheckingDefaultValue = 0;
+ xmlLoadExtDtdDefaultValue = 0;
+
+ xmlTreeIndentString = NULL;
+ if (ops->indent)
+ {
+ xmlIndentTreeOutput = 1;
+ if (ops->indent_tab)
+ {
+ xmlTreeIndentString = "\t";
+ }
+ else if (ops->indent_spaces > 0)
+ {
+ spaces = xmlMalloc(ops->indent_spaces + 1);
+ xmlTreeIndentString = spaces;
+ memset(spaces, ' ', ops->indent_spaces);
+ spaces[ops->indent_spaces] = '\0';
+ }
+ }
+ else
+ xmlIndentTreeOutput = 0;
+}
+
+/**
+ * Parse global command line options
+ */
+int
+foParseOptions(foOptionsPtr ops, int argc, char **argv)
+{
+ int i;
+
+ i = 2;
+ while(i < argc)
+ {
+ if (!strcmp(argv[i], "--noindent") || !strcmp(argv[i], "-n"))
+ {
+ ops->indent = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--encode") || !strcmp(argv[i], "-e"))
+ {
+ i++;
+ encoding = argv[i];
+ i++;
+ }
+ else if (!strcmp(argv[i], "--indent-tab") || !strcmp(argv[i], "-t"))
+ {
+ ops->indent_tab = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--omit-decl") || !strcmp(argv[i], "-o"))
+ {
+ ops->omit_decl = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--dropdtd") || !strcmp(argv[i], "-D"))
+ {
+ ops->dropdtd = 1;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--recover") || !strcmp(argv[i], "-R"))
+ {
+ ops->recovery = 1;
+ ops->options |= XML_PARSE_RECOVER;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--nocdata") || !strcmp(argv[i], "-C"))
+ {
+ ops->options |= XML_PARSE_NOCDATA;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--nsclean") || !strcmp(argv[i], "-N"))
+ {
+ ops->options |= XML_PARSE_NSCLEAN;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--indent-spaces") || !strcmp(argv[i], "-s"))
+ {
+ int value;
+ i++;
+ if (i >= argc) foUsage(argc, argv, EXIT_BAD_ARGS);
+ if (sscanf(argv[i], "%d", &value) == 1)
+ {
+ if (value > 0) ops->indent_spaces = value;
+ }
+ else
+ {
+ foUsage(argc, argv, EXIT_BAD_ARGS);
+ }
+ ops->indent_tab = 0;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-Q"))
+ {
+ ops->quiet = 1;
+ i++;
+ }
+#ifdef LIBXML_HTML_ENABLED
+ else if (!strcmp(argv[i], "--html") || !strcmp(argv[i], "-H"))
+ {
+ ops->html = 1;
+ i++;
+ }
+#endif
+ else if (!strcmp(argv[i], "--net"))
+ {
+ ops->options &= ~XML_PARSE_NONET;
+ i++;
+ }
+ else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
+ {
+ foUsage(argc, argv, EXIT_SUCCESS);
+ }
+ else if (!strcmp(argv[i], "-"))
+ {
+ i++;
+ break;
+ }
+ else if (argv[i][0] == '-')
+ {
+ foUsage(argc, argv, EXIT_BAD_ARGS);
+ }
+ else
+ {
+ i++;
+ break;
+ }
+ }
+
+ return i-1;
+}
+
+void my_error_func(void* ctx, const char * msg, ...) {
+ /* do nothing */
+}
+
+void my_structured_error_func(void * userData, xmlErrorPtr error) {
+ /* do nothing */
+}
+
+/**
+ * 'process' xml document(s)
+ */
+int
+foProcess(foOptionsPtr ops, int start, int argc, char **argv)
+{
+ int ret = 0;
+ xmlDocPtr doc = NULL;
+ char *fileName = "-";
+
+ if ((start > 1) && (start < argc) && (argv[start][0] != '-') &&
+ strcmp(argv[start-1], "--indent-spaces") &&
+ strcmp(argv[start-1], "-s"))
+ {
+ fileName = argv[start];
+ }
+/*
+ if (ops->recovery)
+ {
+ doc = xmlRecoverFile(fileName);
+ }
+ else
+*/
+ if (ops->quiet) {
+ xmlSetGenericErrorFunc(NULL, my_error_func);
+ xmlSetStructuredErrorFunc(NULL, my_structured_error_func);
+ }
+
+#ifdef LIBXML_HTML_ENABLED
+ if (ops->html)
+ {
+ doc = htmlReadFile(fileName, NULL, ops->options);
+ }
+ else
+#endif
+ doc = xmlReadFile(fileName, NULL, ops->options);
+
+ if (doc == NULL)
+ {
+ /*fprintf(stderr, "%s:: error: XML parse error\n", fileName);*/
+ return 2;
+ }
+
+ /*
+ * Remove DOCTYPE nodes
+ */
+ if (ops->dropdtd) {
+ xmlDtdPtr dtd;
+
+ dtd = xmlGetIntSubset(doc);
+ if (dtd != NULL) {
+ xmlUnlinkNode((xmlNodePtr)dtd);
+ xmlFreeDtd(dtd);
+ }
+ }
+
+ if (!ops->omit_decl)
+ {
+ if (encoding != NULL)
+ {
+ xmlSaveFormatFileEnc("-", doc, encoding, 1);
+ }
+ else
+ {
+ xmlSaveFormatFile("-", doc, 1);
+ }
+ }
+ else
+ {
+ int format = 1;
+ xmlOutputBufferPtr buf = NULL;
+ xmlCharEncodingHandlerPtr handler = NULL;
+ buf = xmlOutputBufferCreateFile(stdout, handler);
+
+ if (doc->children != NULL)
+ {
+ xmlNodePtr child = doc->children;
+ while (child != NULL)
+ {
+ xmlNodeDumpOutput(buf, doc, child, 0, format, encoding);
+ xmlOutputBufferWriteString(buf, "\n");
+ child = child->next;
+ }
+ }
+ ret = xmlOutputBufferClose(buf);
+ }
+
+ xmlFreeDoc(doc);
+ return ret;
+}
+
+/**
+ * Cleanup memory
+ */
+void
+foCleanup()
+{
+ free(spaces);
+ spaces = NULL;
+ xmlCleanupParser();
+#if 0
+ xmlMemoryDump();
+#endif
+}
+
+/**
+ * This is the main function for 'format' option
+ */
+int
+foMain(int argc, char **argv)
+{
+ int ret = 0;
+ int start;
+ static foOptions ops;
+
+ if (argc <=1) foUsage(argc, argv, EXIT_BAD_ARGS);
+ foInitOptions(&ops);
+ start = foParseOptions(&ops, argc, argv);
+ if (argc-start > 1) foUsage(argc, argv, EXIT_BAD_ARGS);
+ foInitLibXml(&ops);
+ ret = foProcess(&ops, start, argc, argv);
+ foCleanup();
+
+ return ret;
+}