diff options
author | Nick Wellnhofer <wellnhofer@aevum.de> | 2012-09-04 14:38:06 +0800 |
---|---|---|
committer | Daniel Veillard <veillard@redhat.com> | 2012-09-04 14:38:06 +0800 |
commit | 0602c535e9c34efca22dba1b1b7465e0618e7bfc (patch) | |
tree | 283ce2e4e5da6db29b4494fdfaf7241e52bccbf2 | |
parent | 39b0c27af8a18e38b95bf50c40767610d8a13906 (diff) | |
download | libxslt-0602c535e9c34efca22dba1b1b7465e0618e7bfc.tar.gz libxslt-0602c535e9c34efca22dba1b1b7465e0618e7bfc.tar.bz2 libxslt-0602c535e9c34efca22dba1b1b7465e0618e7bfc.zip |
Rewrite EXSLT string:replace to be conformant
For https://bugzilla.gnome.org/show_bug.cgi?id=569703
The libexslt implementation of str:replace fails to conform to its
specification on several counts:
a) the current version returns a string; it's supposed to
return a nodeset.
b) the current version treats the replacements as strings;
it's supposed to treat them as nodes.
c) the current version can modify replacement text; it's
supposed to only modify text from the original string.
d) the current version ignores the requirement to perform
substitutions in descending order of search string length.
Steps to reproduce:
a) the returning of a string rather than a nodeset can be seen
by simply inspecting the code.
b) the code explicity converts replacement nodes to strings;
this can be seen by inspection.
d) the failure to perform substitutions in descending order
of search string length can be seen in the lack of any
sorting in the source code.
c) the problem of modifying text not belonging to the original
string can be seen in the following stylesheet, which can
be simply applied to itself to produce output.
<xsl:stylesheet version="1.0"
extension-element-prefixes="str exsl"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
xmlns:str="http://exslt.org/strings"
>
<xsl:variable name="Text">
Price is $1.10
</xsl:variable>
<xsl:template match="/">
<xsl:apply-templates select="exsl:node-set($Text)/text()"/>
</xsl:template>
<xsl:template match="text()">
<xsl:variable name="Replace">
<FromXml>
<from>$</from>
<from>\</from>
</FromXml>
<ToTex>
<to>\$</to>
<to>$\backslash$</to>
</ToTex>
</xsl:variable>
<xsl:value-of
select="str:replace(.,exsl:node-set($Replace)/FromXml/from,exsl:node-set($Replace)/ToTex/to)"/>
</xsl:template>
</xsl:stylesheet>
Actual results:
The output is:
<?xml version="1.0"?>
Price is $\backslash$$1.10
Expected results:
The output should be:
<?xml version="1.0"?>
Price is \$1.10
Does this happen every time?
yes.
Other information:
str:replace specification is at:
http://www.exslt.org/str/functions/replace/str.replace.html
-rw-r--r-- | libexslt/strings.c | 322 | ||||
-rw-r--r-- | tests/exslt/strings/replace.1.out | 12 | ||||
-rw-r--r-- | tests/exslt/strings/replace.1.xml | 4 | ||||
-rw-r--r-- | tests/exslt/strings/replace.1.xsl | 13 |
4 files changed, 255 insertions, 96 deletions
diff --git a/libexslt/strings.c b/libexslt/strings.c index 1a11976c..5167a414 100644 --- a/libexslt/strings.c +++ b/libexslt/strings.c @@ -505,38 +505,48 @@ exsltStrConcatFunction (xmlXPathParserContextPtr ctxt, int nargs) { } /** - * exsltStrReplaceInternal: - * @str: string to modify - * @searchStr: string to find - * @replaceStr: string to replace occurrences of searchStr + * exsltStrReturnString: + * @ctxt: an XPath parser context + * @str: a string + * @len: length of string * - * Search and replace string function used by exsltStrReplaceFunction + * Returns a string as a node set. */ -static xmlChar* -exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr, - const xmlChar* replaceStr) +static int +exsltStrReturnString(xmlXPathParserContextPtr ctxt, const xmlChar *str, + int len) { - const xmlChar *curr, *next; - xmlChar *ret = NULL; - int searchStrSize; + xsltTransformContextPtr tctxt = xsltXPathGetTransformContext(ctxt); + xmlDocPtr container; + xmlNodePtr text_node; + xmlXPathObjectPtr ret; - curr = str; - searchStrSize = xmlStrlen(searchStr); + container = xsltCreateRVT(tctxt); + if (container == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } + xsltRegisterLocalRVT(tctxt, container); - do { - next = xmlStrstr(curr, searchStr); - if (next == NULL) { - ret = xmlStrcat (ret, curr); - break; - } + text_node = xmlNewTextLen(str, len); + if (text_node == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } + xmlAddChild((xmlNodePtr) container, text_node); + + ret = xmlXPathNewNodeSet(text_node); + if (ret == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + return(-1); + } - ret = xmlStrncat (ret, curr, next - curr); - ret = xmlStrcat (ret, replaceStr); - curr = next + searchStrSize; - } while (*curr != 0); + xsltExtensionInstructionResultRegister(tctxt, ret); + valuePush(ctxt, ret); - return ret; + return(0); } + /** * exsltStrReplaceFunction: * @ctxt: an XPath parser context @@ -547,97 +557,219 @@ exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr, */ static void exsltStrReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs) { - xmlChar *str = NULL, *searchStr = NULL, *replaceStr = NULL; - xmlNodeSetPtr replaceSet = NULL, searchSet = NULL; - xmlChar *ret = NULL, *retSwap = NULL; - int i; + int i, i_empty, n, slen0, rlen0, *slen, *rlen; + void *mem = NULL; + const xmlChar *src, *start; + xmlChar *string, *search_str = NULL, *replace_str = NULL; + xmlChar **search, **replace; + xmlNodeSetPtr search_set = NULL, replace_set = NULL; + xmlBufferPtr buf; if (nargs != 3) { - xmlXPathSetArityError(ctxt); - return; + xmlXPathSetArityError(ctxt); + return; } - /* pull out replace argument */ + /* get replace argument */ + + if (!xmlXPathStackIsNodeSet(ctxt)) + replace_str = xmlXPathPopString(ctxt); + else + replace_set = xmlXPathPopNodeSet(ctxt); + + if (xmlXPathCheckError(ctxt)) + goto fail_replace; + + /* get search argument */ + if (!xmlXPathStackIsNodeSet(ctxt)) { - replaceStr = xmlXPathPopString(ctxt); + search_str = xmlXPathPopString(ctxt); + n = 1; } - else { - replaceSet = xmlXPathPopNodeSet(ctxt); - if (xmlXPathCheckError(ctxt)) { - xmlXPathSetTypeError(ctxt); - goto fail; - } + else { + search_set = xmlXPathPopNodeSet(ctxt); + n = search_set != NULL ? search_set->nodeNr : 0; } - /* behavior driven by search argument from here on */ - if (!xmlXPathStackIsNodeSet(ctxt)) { - searchStr = xmlXPathPopString(ctxt); - str = xmlXPathPopString(ctxt); - - if (replaceStr == NULL) { - xmlXPathSetTypeError(ctxt); - goto fail; - } - - ret = exsltStrReplaceInternal(str, searchStr, replaceStr); - } - else { - searchSet = xmlXPathPopNodeSet(ctxt); - if (searchSet == NULL || xmlXPathCheckError(ctxt)) { - xmlXPathSetTypeError(ctxt); - goto fail; - } - - str = xmlXPathPopString(ctxt); - ret = xmlStrdup(str); - - for (i = 0; i < searchSet->nodeNr; i++) { - searchStr = xmlXPathCastNodeToString(searchSet->nodeTab[i]); - - if (replaceSet != NULL) { - replaceStr = NULL; - if (i < replaceSet->nodeNr) { - replaceStr = xmlXPathCastNodeToString(replaceSet->nodeTab[i]); - } - - retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr); - - if (replaceStr != NULL) { - xmlFree(replaceStr); - replaceStr = NULL; - } + if (xmlXPathCheckError(ctxt)) + goto fail_search; + + /* get string argument */ + + string = xmlXPathPopString(ctxt); + if (xmlXPathCheckError(ctxt)) + goto fail_string; + + /* check for empty search node list */ + + if (n <= 0) { + exsltStrReturnString(ctxt, string, xmlStrlen(string)); + goto done_empty_search; + } + + /* allocate memory for string pointer and length arrays */ + + if (n == 1) { + search = &search_str; + replace = &replace_str; + slen = &slen0; + rlen = &rlen0; + } + else { + mem = xmlMalloc(2 * n * (sizeof(const xmlChar *) + sizeof(int))); + if (mem == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_malloc; + } + search = (xmlChar **) mem; + replace = search + n; + slen = (int *) (replace + n); + rlen = slen + n; + } + + /* process arguments */ + + i_empty = -1; + + for (i=0; i<n; ++i) { + if (search_set != NULL) { + search[i] = xmlXPathCastNodeToString(search_set->nodeTab[i]); + if (search[i] == NULL) { + n = i; + goto fail_process_args; + } + } + + slen[i] = xmlStrlen(search[i]); + if (i_empty < 0 && slen[i] == 0) + i_empty = i; + + if (replace_set != NULL) { + if (i < replace_set->nodeNr) { + replace[i] = xmlXPathCastNodeToString(replace_set->nodeTab[i]); + if (replace[i] == NULL) { + n = i + 1; + goto fail_process_args; + } + } + else + replace[i] = NULL; } else { - retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr); + if (i == 0) + replace[i] = replace_str; + else + replace[i] = NULL; } - xmlFree(ret); - if (searchStr != NULL) { - xmlFree(searchStr); - searchStr = NULL; + if (replace[i] == NULL) + rlen[i] = 0; + else + rlen[i] = xmlStrlen(replace[i]); + } + + if (i_empty >= 0 && rlen[i_empty] == 0) + i_empty = -1; + + /* replace operation */ + + buf = xmlBufferCreate(); + if (buf == NULL) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer; + } + src = string; + start = string; + + while (*src != 0) { + int max_len = 0, i_match = 0; + + for (i=0; i<n; ++i) { + if (*src == search[i][0] && + slen[i] > max_len && + xmlStrncmp(src, search[i], slen[i]) == 0) + { + i_match = i; + max_len = slen[i]; + } } - ret = retSwap; - } + if (max_len == 0) { + if (i_empty >= 0 && start < src) { + if (xmlBufferAdd(buf, start, src - start) || + xmlBufferAdd(buf, replace[i_empty], rlen[i_empty])) + { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } + start = src; + } - if (replaceSet != NULL) - xmlXPathFreeNodeSet(replaceSet); + src += xmlUTF8Size(src); + } + else { + if ((start < src && + xmlBufferAdd(buf, start, src - start)) || + (rlen[i_match] && + xmlBufferAdd(buf, replace[i_match], rlen[i_match]))) + { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } - if (searchSet != NULL) - xmlXPathFreeNodeSet(searchSet); - } + src += slen[i_match]; + start = src; + } + } - xmlXPathReturnString(ctxt, ret); + if (start < src && xmlBufferAdd(buf, start, src - start)) { + xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR); + goto fail_buffer_add; + } - fail: - if (replaceStr != NULL) - xmlFree(replaceStr); + /* create result node set */ - if (searchStr != NULL) - xmlFree(searchStr); + exsltStrReturnString(ctxt, xmlBufferContent(buf), xmlBufferLength(buf)); - if (str != NULL) - xmlFree(str); + /* clean up */ + +fail_buffer_add: + xmlBufferFree(buf); + +fail_buffer: +fail_process_args: + if (search_set != NULL) { + for (i=0; i<n; ++i) + xmlFree(search[i]); + } + if (replace_set != NULL) { + for (i=0; i<n; ++i) { + if (replace[i] != NULL) + xmlFree(replace[i]); + } + } + + if (mem != NULL) + xmlFree(mem); + +fail_malloc: +done_empty_search: + xmlFree(string); + +fail_string: + if (search_set != NULL) + xmlXPathFreeNodeSet(search_set); + else + xmlFree(search_str); + +fail_search: + if (replace_set != NULL) + xmlXPathFreeNodeSet(replace_set); + else + xmlFree(replace_str); + +fail_replace: + return; } /** diff --git a/tests/exslt/strings/replace.1.out b/tests/exslt/strings/replace.1.out index 076d1101..f41d67cc 100644 --- a/tests/exslt/strings/replace.1.out +++ b/tests/exslt/strings/replace.1.out @@ -1,5 +1,9 @@ <?xml version="1.0"?> <out>; + result nodes: 1 + result text nodes: 1 + result string: a + str:replace('a, simple, list', ', ', '-') a-simple-list @@ -19,4 +23,10 @@ tee, eye, billow, a longer string str:replace('fee, fi, fo, fum', $x, 'j') - j, j, j, j</out> + j, , , + + str:replace('foo', '', 'baz') + fbazobazo + + str:replace('Price is $1.10', $from, $to) + Price is \$1.10</out> diff --git a/tests/exslt/strings/replace.1.xml b/tests/exslt/strings/replace.1.xml index 6371161f..16aac14c 100644 --- a/tests/exslt/strings/replace.1.xml +++ b/tests/exslt/strings/replace.1.xml @@ -8,5 +8,9 @@ <y>eye</y> <y>billow</y> <y>a longer string</y> + <from>$</from> + <from>\</from> + <to>\$</to> + <to>$\backslash$</to> </strings> </doc> diff --git a/tests/exslt/strings/replace.1.xsl b/tests/exslt/strings/replace.1.xsl index 0c9e86cd..f9a74423 100644 --- a/tests/exslt/strings/replace.1.xsl +++ b/tests/exslt/strings/replace.1.xsl @@ -7,8 +7,15 @@ <xsl:template match="/"> <xsl:variable name="x" select="doc/strings/x"/> <xsl:variable name="y" select="doc/strings/y"/> + <xsl:variable name="from" select="doc/strings/from"/> + <xsl:variable name="to" select="doc/strings/to"/> + <xsl:variable name="result" select="str:replace('a', 'b', 'c')"/> <out>; + result nodes: <xsl:value-of select="count($result)"/> + result text nodes: <xsl:value-of select="count($result/self::text())"/> + result string: <xsl:value-of select="$result/self::text()"/> + str:replace('a, simple, list', ', ', '-') <xsl:copy-of select="str:replace('a, simple, list', ', ', '-')"/> @@ -30,6 +37,12 @@ str:replace('fee, fi, fo, fum', $x, 'j') <xsl:copy-of select="str:replace('fee, fi, fo, fum', $x, 'j')" /> + str:replace('foo', '', 'baz') + <xsl:copy-of select="str:replace('foo', '', 'baz')" /> + + str:replace('Price is $1.10', $from, $to) + <xsl:copy-of select="str:replace('Price is $1.10', $from, $to)" /> + </out> </xsl:template> |