From 0602c535e9c34efca22dba1b1b7465e0618e7bfc Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 4 Sep 2012 14:38:06 +0800 Subject: Rewrite EXSLT string:replace to be conformant For https://bugzilla.gnome.org/show_bug.cgi?id=569703 The libexslt implementation of str:replace fails to conform to its specification on several counts: a) the current version returns a string; it's supposed to return a nodeset. b) the current version treats the replacements as strings; it's supposed to treat them as nodes. c) the current version can modify replacement text; it's supposed to only modify text from the original string. d) the current version ignores the requirement to perform substitutions in descending order of search string length. Steps to reproduce: a) the returning of a string rather than a nodeset can be seen by simply inspecting the code. b) the code explicity converts replacement nodes to strings; this can be seen by inspection. d) the failure to perform substitutions in descending order of search string length can be seen in the lack of any sorting in the source code. c) the problem of modifying text not belonging to the original string can be seen in the following stylesheet, which can be simply applied to itself to produce output. Price is $1.10 $ \ \$ $\backslash$ Actual results: The output is: Price is $\backslash$$1.10 Expected results: The output should be: Price is \$1.10 Does this happen every time? yes. Other information: str:replace specification is at: http://www.exslt.org/str/functions/replace/str.replace.html --- libexslt/strings.c | 322 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 227 insertions(+), 95 deletions(-) (limited to 'libexslt') diff --git a/libexslt/strings.c b/libexslt/strings.c index 1a11976c..5167a414 100644 --- a/libexslt/strings.c +++ b/libexslt/strings.c @@ -505,38 +505,48 @@ exsltStrConcatFunction (xmlXPathParserContextPtr ctxt, int nargs) { } /** - * exsltStrReplaceInternal: - * @str: string to modify - * @searchStr: string to find - * @replaceStr: string to replace occurrences of searchStr + * exsltStrReturnString: + * @ctxt: an XPath parser context + * @str: a string + * @len: length of string * - * Search and replace string function used by exsltStrReplaceFunction + * Returns a string as a node set. */ -static xmlChar* -exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr, - const xmlChar* replaceStr) +static int +exsltStrReturnString(xmlXPathParserContextPtr ctxt, const xmlChar *str, + int len) { - const xmlChar *curr, *next; - xmlChar *ret = NULL; - int searchStrSize; + xsltTransformContextPtr tctxt = xsltXPathGetTransformContext(ctxt); + xmlDocPtr container; + xmlNodePtr text_node; + xmlXPathObjectPtr ret; - curr = str; - searchStrSize = xmlStrlen(searchStr); + container = xsltCreateRVT(tctxt); + if (container == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } + xsltRegisterLocalRVT(tctxt, container); - do { - next = xmlStrstr(curr, searchStr); - if (next == NULL) { - ret = xmlStrcat (ret, curr); - break; - } + text_node = xmlNewTextLen(str, len); + if (text_node == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } + xmlAddChild((xmlNodePtr) container, text_node); + + ret = xmlXPathNewNodeSet(text_node); + if (ret == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } - ret = xmlStrncat (ret, curr, next - curr); - ret = xmlStrcat (ret, replaceStr); - curr = next + searchStrSize; - } while (*curr != 0); + xsltExtensionInstructionResultRegister(tctxt, ret); + valuePush(ctxt, ret); - return ret; + return(0); } + /** * exsltStrReplaceFunction: * @ctxt: an XPath parser context @@ -547,97 +557,219 @@ exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr, */ static void exsltStrReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs) { - xmlChar *str = NULL, *searchStr = NULL, *replaceStr = NULL; - xmlNodeSetPtr replaceSet = NULL, searchSet = NULL; - xmlChar *ret = NULL, *retSwap = NULL; - int i; + int i, i_empty, n, slen0, rlen0, *slen, *rlen; + void *mem = NULL; + const xmlChar *src, *start; + xmlChar *string, *search_str = NULL, *replace_str = NULL; + xmlChar **search, **replace; + xmlNodeSetPtr search_set = NULL, replace_set = NULL; + xmlBufferPtr buf; if (nargs != 3) { - xmlXPathSetArityError(ctxt); - return; + xmlXPathSetArityError(ctxt); + return; } - /* pull out replace argument */ + /* get replace argument */ + + if (!xmlXPathStackIsNodeSet(ctxt)) + replace_str = xmlXPathPopString(ctxt); + else + replace_set = xmlXPathPopNodeSet(ctxt); + + if (xmlXPathCheckError(ctxt)) + goto fail_replace; + + /* get search argument */ + if (!xmlXPathStackIsNodeSet(ctxt)) { - replaceStr = xmlXPathPopString(ctxt); + search_str = xmlXPathPopString(ctxt); + n = 1; } - else { - replaceSet = xmlXPathPopNodeSet(ctxt); - if (xmlXPathCheckError(ctxt)) { - xmlXPathSetTypeError(ctxt); - goto fail; - } + else { + search_set = xmlXPathPopNodeSet(ctxt); + n = search_set != NULL ? search_set->nodeNr : 0; } - /* behavior driven by search argument from here on */ - if (!xmlXPathStackIsNodeSet(ctxt)) { - searchStr = xmlXPathPopString(ctxt); - str = xmlXPathPopString(ctxt); - - if (replaceStr == NULL) { - xmlXPathSetTypeError(ctxt); - goto fail; - } - - ret = exsltStrReplaceInternal(str, searchStr, replaceStr); - } - else { - searchSet = xmlXPathPopNodeSet(ctxt); - if (searchSet == NULL || xmlXPathCheckError(ctxt)) { - xmlXPathSetTypeError(ctxt); - goto fail; - } - - str = xmlXPathPopString(ctxt); - ret = xmlStrdup(str); - - for (i = 0; i < searchSet->nodeNr; i++) { - searchStr = xmlXPathCastNodeToString(searchSet->nodeTab[i]); - - if (replaceSet != NULL) { - replaceStr = NULL; - if (i < replaceSet->nodeNr) { - replaceStr = xmlXPathCastNodeToString(replaceSet->nodeTab[i]); - } - - retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr); - - if (replaceStr != NULL) { - xmlFree(replaceStr); - replaceStr = NULL; - } + if (xmlXPathCheckError(ctxt)) + goto fail_search; + + /* get string argument */ + + string = xmlXPathPopString(ctxt); + if (xmlXPathCheckError(ctxt)) + goto fail_string; + + /* check for empty search node list */ + + if (n <= 0) { + exsltStrReturnString(ctxt, string, xmlStrlen(string)); + goto done_empty_search; + } + + /* allocate memory for string pointer and length arrays */ + + if (n == 1) { + search = &search_str; + replace = &replace_str; + slen = &slen0; + rlen = &rlen0; + } + else { + mem = xmlMalloc(2 * n * (sizeof(const xmlChar *) + sizeof(int))); + if (mem == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_malloc; + } + search = (xmlChar **) mem; + replace = search + n; + slen = (int *) (replace + n); + rlen = slen + n; + } + + /* process arguments */ + + i_empty = -1; + + for (i=0; inodeTab[i]); + if (search[i] == NULL) { + n = i; + goto fail_process_args; + } + } + + slen[i] = xmlStrlen(search[i]); + if (i_empty < 0 && slen[i] == 0) + i_empty = i; + + if (replace_set != NULL) { + if (i < replace_set->nodeNr) { + replace[i] = xmlXPathCastNodeToString(replace_set->nodeTab[i]); + if (replace[i] == NULL) { + n = i + 1; + goto fail_process_args; + } + } + else + replace[i] = NULL; } else { - retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr); + if (i == 0) + replace[i] = replace_str; + else + replace[i] = NULL; } - xmlFree(ret); - if (searchStr != NULL) { - xmlFree(searchStr); - searchStr = NULL; + if (replace[i] == NULL) + rlen[i] = 0; + else + rlen[i] = xmlStrlen(replace[i]); + } + + if (i_empty >= 0 && rlen[i_empty] == 0) + i_empty = -1; + + /* replace operation */ + + buf = xmlBufferCreate(); + if (buf == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer; + } + src = string; + start = string; + + while (*src != 0) { + int max_len = 0, i_match = 0; + + for (i=0; i max_len && + xmlStrncmp(src, search[i], slen[i]) == 0) + { + i_match = i; + max_len = slen[i]; + } } - ret = retSwap; - } + if (max_len == 0) { + if (i_empty >= 0 && start < src) { + if (xmlBufferAdd(buf, start, src - start) || + xmlBufferAdd(buf, replace[i_empty], rlen[i_empty])) + { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } + start = src; + } - if (replaceSet != NULL) - xmlXPathFreeNodeSet(replaceSet); + src += xmlUTF8Size(src); + } + else { + if ((start < src && + xmlBufferAdd(buf, start, src - start)) || + (rlen[i_match] && + xmlBufferAdd(buf, replace[i_match], rlen[i_match]))) + { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } - if (searchSet != NULL) - xmlXPathFreeNodeSet(searchSet); - } + src += slen[i_match]; + start = src; + } + } - xmlXPathReturnString(ctxt, ret); + if (start < src && xmlBufferAdd(buf, start, src - start)) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } - fail: - if (replaceStr != NULL) - xmlFree(replaceStr); + /* create result node set */ - if (searchStr != NULL) - xmlFree(searchStr); + exsltStrReturnString(ctxt, xmlBufferContent(buf), xmlBufferLength(buf)); - if (str != NULL) - xmlFree(str); + /* clean up */ + +fail_buffer_add: + xmlBufferFree(buf); + +fail_buffer: +fail_process_args: + if (search_set != NULL) { + for (i=0; i