diff options
Diffstat (limited to 'testsuite')
186 files changed, 15797 insertions, 0 deletions
diff --git a/testsuite/0range.good b/testsuite/0range.good new file mode 100644 index 0000000..7cfab5b --- /dev/null +++ b/testsuite/0range.good @@ -0,0 +1 @@ +yes diff --git a/testsuite/0range.inp b/testsuite/0range.inp new file mode 100644 index 0000000..c09c47b --- /dev/null +++ b/testsuite/0range.inp @@ -0,0 +1,6 @@ +1 +2 +3 +4 +aaa +yes diff --git a/testsuite/0range.sed b/testsuite/0range.sed new file mode 100644 index 0000000..33aa8b8 --- /dev/null +++ b/testsuite/0range.sed @@ -0,0 +1 @@ +0,/aaa/d diff --git a/testsuite/8bit.good b/testsuite/8bit.good new file mode 100644 index 0000000..1bd5178 --- /dev/null +++ b/testsuite/8bit.good @@ -0,0 +1,9 @@ +äƤâ¤è ¤ßäÆ»ý¤Á +·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á +¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù +²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í +¤½¤é¤ß¤Ä ÆüËܤιñ¤Ï +¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì +¤·¤¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤» +¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á +²È¤ò¤â̾¤ò¤â diff --git a/testsuite/8bit.inp b/testsuite/8bit.inp new file mode 100644 index 0000000..8c9c4bb --- /dev/null +++ b/testsuite/8bit.inp @@ -0,0 +1,9 @@ +äƤâ¤è ¤ßäÆ»ý¤Á +·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á +¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù +²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í +¤½¤é¤ß¤Ä ÂçϤιñ¤Ï +¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì +¤·¤¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤» +¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á +²È¤ò¤â̾¤ò¤â diff --git a/testsuite/8bit.sed b/testsuite/8bit.sed new file mode 100644 index 0000000..7b3ed8d --- /dev/null +++ b/testsuite/8bit.sed @@ -0,0 +1,21 @@ +# The first poem from the Man'yoshu. I like Hitomaro's poems better +# but I couldn't find a copy of any of them in Japanese. This version +# of this poem is from $BNc2r8E8l<-E5(B($BBh;0HG(B)$B;0>JF2(B. +# +# Speaking of Hitomaro, here is the english translation of one of my +# favorites. I just know that everyone reading these test cases wants +# to see this. +# +# In the autumn mountains +# The yellow leaves are so thick. +# Alas, how shall I seek my love +# Who has wandered away? +# +# I see the messenger come +# As the yellow leaves are falling. +# Oh, well I remember +# How on such a day we used to meet-- +# My lover and I! +# -- Kakinomoto Hitomaro +# +s/ÂçÏÂ/ÆüËÜ/ diff --git a/testsuite/8to7.good b/testsuite/8to7.good new file mode 100644 index 0000000..4485882 --- /dev/null +++ b/testsuite/8to7.good @@ -0,0 +1,14 @@ +\344\306\244\342\244\350 \244\337\344\306\273\375\244\301$ +\267\241\266\372\244\342\244\350 \244\337\267\241\266\372\273\375\244\ +\301$ +\244\263\244\316\265\326\244\313 \272\332\305\246\244\336\244\271\273\ +\371$ +\262\310\264\326\244\253\244\312 \271\360\244\351\244\265\244\315$ +\244\275\244\351\244\337\244\304 \302\347\317\302\244\316\271\361\244\ +\317$ +\244\252\244\267\244\343\244\312\244\331\244\306 \244\357\244\354\244\ +\263\244\275\265\357\244\354$ +\244\267\244\255\244\312\244\331\244\306 \244\357\244\354\244\263\244\ +\275 \272\302\244\273$ +\244\357\244\313\244\263\244\275\244\317 \271\360\244\351\244\341$ +\262\310\244\362\244\342\314\276\244\362\244\342$ diff --git a/testsuite/8to7.inp b/testsuite/8to7.inp new file mode 100644 index 0000000..8c9c4bb --- /dev/null +++ b/testsuite/8to7.inp @@ -0,0 +1,9 @@ +äƤâ¤è ¤ßäÆ»ý¤Á +·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á +¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù +²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í +¤½¤é¤ß¤Ä ÂçϤιñ¤Ï +¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì +¤·¤¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤» +¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á +²È¤ò¤â̾¤ò¤â diff --git a/testsuite/8to7.sed b/testsuite/8to7.sed new file mode 100644 index 0000000..f9d3f50 --- /dev/null +++ b/testsuite/8to7.sed @@ -0,0 +1 @@ +l;d diff --git a/testsuite/BOOST.tests b/testsuite/BOOST.tests new file mode 100644 index 0000000..98fd3b6 --- /dev/null +++ b/testsuite/BOOST.tests @@ -0,0 +1,829 @@ +; +; +; this file contains a script of tests to run through regress.exe +; +; comments start with a semicolon and proceed to the end of the line +; +; changes to regular expression compile flags start with a "-" as the first +; non-whitespace character and consist of a list of the printable names +; of the flags, for example "match_default" +; +; Other lines contain a test to perform using the current flag status +; the first token contains the expression to compile, the second the string +; to match it against. If the second string is "!" then the expression should +; not compile, that is the first string is an invalid regular expression. +; This is then followed by a list of integers that specify what should match, +; each pair represents the starting and ending positions of a subexpression +; starting with the zeroth subexpression (the whole match). +; A value of -1 indicates that the subexpression should not take part in the +; match at all, if the first value is -1 then no part of the expression should +; match the string. +; +; Tests taken from BOOST testsuite and adapted to glibc regex. +; +; Boost Software License - Version 1.0 - August 17th, 2003 +; +; Permission is hereby granted, free of charge, to any person or organization +; obtaining a copy of the software and accompanying documentation covered by +; this license (the "Software") to use, reproduce, display, distribute, +; execute, and transmit the Software, and to prepare derivative works of the +; Software, and to permit third-parties to whom the Software is furnished to +; do so, all subject to the following: +; +; The copyright notices in the Software and this entire statement, including +; the above license grant, this restriction and the following disclaimer, +; must be included in all copies of the Software, in whole or in part, and +; all derivative works of the Software, unless such copies or derivative +; works are solely in the form of machine-executable object code generated by +; a source language processor. +; +; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +; DEALINGS IN THE SOFTWARE. +; + +- match_default normal REG_EXTENDED + +; +; try some really simple literals: +a a 0 1 +Z Z 0 1 +Z aaa -1 -1 +Z xxxxZZxxx 4 5 + +; and some simple brackets: +(a) zzzaazz 3 4 3 4 +() zzz 0 0 0 0 +() "" 0 0 0 0 +( ! +) ) 0 1 +(aa ! +aa) baa)b 1 4 +a b -1 -1 +\(\) () 0 2 +\(a\) (a) 0 3 +\() () 0 2 +(\) ! +p(a)rameter ABCparameterXYZ 3 12 4 5 +[pq](a)rameter ABCparameterXYZ 3 12 4 5 + +; now try escaped brackets: +- match_default bk_parens REG_BASIC +\(a\) zzzaazz 3 4 3 4 +\(\) zzz 0 0 0 0 +\(\) "" 0 0 0 0 +\( ! +\) ! +\(aa ! +aa\) ! +() () 0 2 +(a) (a) 0 3 +(\) ! +\() ! + +; now move on to "." wildcards +- match_default normal REG_EXTENDED REG_STARTEND +. a 0 1 +. \n 0 1 +. \r 0 1 +. \0 0 1 + +; +; now move on to the repetion ops, +; starting with operator * +- match_default normal REG_EXTENDED +a* b 0 0 +ab* a 0 1 +ab* ab 0 2 +ab* sssabbbbbbsss 3 10 +ab*c* a 0 1 +ab*c* abbb 0 4 +ab*c* accc 0 4 +ab*c* abbcc 0 5 +*a ! +\<* ! +\>* ! +\n* \n\n 0 2 +\** ** 0 2 +\* * 0 1 + +; now try operator + +ab+ a -1 -1 +ab+ ab 0 2 +ab+ sssabbbbbbsss 3 10 +ab+c+ a -1 -1 +ab+c+ abbb -1 -1 +ab+c+ accc -1 -1 +ab+c+ abbcc 0 5 ++a ! +\<+ ! +\>+ ! +\n+ \n\n 0 2 +\+ + 0 1 +\+ ++ 0 1 +\++ ++ 0 2 + +; now try operator ? +- match_default normal REG_EXTENDED +a? b 0 0 +ab? a 0 1 +ab? ab 0 2 +ab? sssabbbbbbsss 3 5 +ab?c? a 0 1 +ab?c? abbb 0 2 +ab?c? accc 0 2 +ab?c? abcc 0 3 +?a ! +\<? ! +\>? ! +\n? \n\n 0 1 +\? ? 0 1 +\? ?? 0 1 +\?? ?? 0 1 + +; now try operator {} +- match_default normal REG_EXTENDED +a{2} a -1 -1 +a{2} aa 0 2 +a{2} aaa 0 2 +a{2,} a -1 -1 +a{2,} aa 0 2 +a{2,} aaaaa 0 5 +a{2,4} a -1 -1 +a{2,4} aa 0 2 +a{2,4} aaa 0 3 +a{2,4} aaaa 0 4 +a{2,4} aaaaa 0 4 +a{} ! +a{2 ! +a} a} 0 2 +\{\} {} 0 2 + +- match_default normal REG_BASIC +a\{2\} a -1 -1 +a\{2\} aa 0 2 +a\{2\} aaa 0 2 +a\{2,\} a -1 -1 +a\{2,\} aa 0 2 +a\{2,\} aaaaa 0 5 +a\{2,4\} a -1 -1 +a\{2,4\} aa 0 2 +a\{2,4\} aaa 0 3 +a\{2,4\} aaaa 0 4 +a\{2,4\} aaaaa 0 4 +{} {} 0 2 + +; now test the alternation operator | +- match_default normal REG_EXTENDED +a|b a 0 1 +a|b b 0 1 +a(b|c) ab 0 2 1 2 +a(b|c) ac 0 2 1 2 +a(b|c) ad -1 -1 -1 -1 +a\| a| 0 2 + +; now test the set operator [] +- match_default normal REG_EXTENDED +; try some literals first +[abc] a 0 1 +[abc] b 0 1 +[abc] c 0 1 +[abc] d -1 -1 +[^bcd] a 0 1 +[^bcd] b -1 -1 +[^bcd] d -1 -1 +[^bcd] e 0 1 +a[b]c abc 0 3 +a[ab]c abc 0 3 +a[^ab]c adc 0 3 +a[]b]c a]c 0 3 +a[[b]c a[c 0 3 +a[-b]c a-c 0 3 +a[^]b]c adc 0 3 +a[^-b]c adc 0 3 +a[b-]c a-c 0 3 +a[b ! +a[] ! + +; then some ranges +[b-e] a -1 -1 +[b-e] b 0 1 +[b-e] e 0 1 +[b-e] f -1 -1 +[^b-e] a 0 1 +[^b-e] b -1 -1 +[^b-e] e -1 -1 +[^b-e] f 0 1 +a[1-3]c a2c 0 3 +a[3-1]c ! +a[1-3-5]c ! +a[1- ! + +; and some classes +a[[:alpha:]]c abc 0 3 +a[[:unknown:]]c ! +a[[: ! +a[[:alpha ! +a[[:alpha:] ! +a[[:alpha,:] ! +a[[:]:]]b ! +a[[:-:]]b ! +a[[:alph:]] ! +a[[:alphabet:]] ! +[[:alnum:]]+ -%@a0X_- 3 6 +[[:alpha:]]+ -%@aX_0- 3 5 +[[:blank:]]+ "a \tb" 1 4 +[[:cntrl:]]+ a\n\tb 1 3 +[[:digit:]]+ a019b 1 4 +[[:graph:]]+ " a%b " 1 4 +[[:lower:]]+ AabC 1 3 +; This test fails with STLPort, disable for now as this is a corner case anyway... +;[[:print:]]+ "\na b\n" 1 4 +[[:punct:]]+ " %-&\t" 1 4 +[[:space:]]+ "a \n\t\rb" 1 5 +[[:upper:]]+ aBCd 1 3 +[[:xdigit:]]+ p0f3Cx 1 5 + +; now test flag settings: +- escape_in_lists REG_NO_POSIX_TEST +[\n] \n 0 1 +- REG_NO_POSIX_TEST + +; line anchors +- match_default normal REG_EXTENDED +^ab ab 0 2 +^ab xxabxx -1 -1 +ab$ ab 0 2 +ab$ abxx -1 -1 +- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL +^ab ab -1 -1 +^ab xxabxx -1 -1 +ab$ ab -1 -1 +ab$ abxx -1 -1 + +; back references +- match_default normal REG_PERL +a(b)\2c ! +a(b\1)c ! +a(b*)c\1d abbcbbd 0 7 1 3 +a(b*)c\1d abbcbd -1 -1 +a(b*)c\1d abbcbbbd -1 -1 +^(.)\1 abc -1 -1 +a([bc])\1d abcdabbd 4 8 5 6 +; strictly speaking this is at best ambiguous, at worst wrong, this is what most +; re implimentations will match though. +a(([bc])\2)*d abbccd 0 6 3 5 3 4 + +a(([bc])\2)*d abbcbd -1 -1 +a((b)*\2)*d abbbd 0 5 1 4 2 3 +; perl only: +(ab*)[ab]*\1 ababaaa 0 7 0 1 +(a)\1bcd aabcd 0 5 0 1 +(a)\1bc*d aabcd 0 5 0 1 +(a)\1bc*d aabd 0 4 0 1 +(a)\1bc*d aabcccd 0 7 0 1 +(a)\1bc*[ce]d aabcccd 0 7 0 1 +^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 + +; posix only: +- match_default extended REG_EXTENDED +(ab*)[ab]*\1 ababaaa 0 7 0 1 + +; +; word operators: +\w a 0 1 +\w z 0 1 +\w A 0 1 +\w Z 0 1 +\w _ 0 1 +\w } -1 -1 +\w ` -1 -1 +\w [ -1 -1 +\w @ -1 -1 +; non-word: +\W a -1 -1 +\W z -1 -1 +\W A -1 -1 +\W Z -1 -1 +\W _ -1 -1 +\W } 0 1 +\W ` 0 1 +\W [ 0 1 +\W @ 0 1 +; word start: +\<abcd " abcd" 2 6 +\<ab cab -1 -1 +\<ab "\nab" 1 3 +\<tag ::tag 2 5 +;word end: +abc\> abc 0 3 +abc\> abcd -1 -1 +abc\> abc\n 0 3 +abc\> abc:: 0 3 +; word boundary: +\babcd " abcd" 2 6 +\bab cab -1 -1 +\bab "\nab" 1 3 +\btag ::tag 2 5 +abc\b abc 0 3 +abc\b abcd -1 -1 +abc\b abc\n 0 3 +abc\b abc:: 0 3 +; within word: +\B ab 1 1 +a\Bb ab 0 2 +a\B ab 0 1 +a\B a -1 -1 +a\B "a " -1 -1 + +; +; buffer operators: +\`abc abc 0 3 +\`abc \nabc -1 -1 +\`abc " abc" -1 -1 +abc\' abc 0 3 +abc\' abc\n -1 -1 +abc\' "abc " -1 -1 + +; +; now follows various complex expressions designed to try and bust the matcher: +a(((b)))c abc 0 3 1 2 1 2 1 2 +a(b|(c))d abd 0 3 1 2 -1 -1 +a(b|(c))d acd 0 3 1 2 1 2 +a(b*|c)d abbd 0 4 1 3 +; just gotta have one DFA-buster, of course +a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 +; and an inline expansion in case somebody gets tricky +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 +; and in case somebody just slips in an NFA... +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 +; one really big one +1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 +; fish for problems as brackets go past 8 +[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 +[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 +[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 +[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 +; and as parenthesis go past 9: +(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 +(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 +(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 +(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 +(a)d|(b)c abc 1 3 -1 -1 1 2 +_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 + +; subtleties of matching +;a(b)?c\1d acd 0 3 -1 -1 +; POSIX is about the following test: +a(b)?c\1d acd -1 -1 -1 -1 +a(b?c)+d accd 0 4 2 3 +(wee|week)(knights|night) weeknights 0 10 0 3 3 10 +.* abc 0 3 +a(b|(c))d abd 0 3 1 2 -1 -1 +a(b|(c))d acd 0 3 1 2 1 2 +a(b*|c|e)d abbd 0 4 1 3 +a(b*|c|e)d acd 0 3 1 2 +a(b*|c|e)d ad 0 2 1 1 +a(b?)c abc 0 3 1 2 +a(b?)c ac 0 2 1 1 +a(b+)c abc 0 3 1 2 +a(b+)c abbbc 0 5 1 4 +a(b*)c ac 0 2 1 1 +(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 +a([bc]?)c abc 0 3 1 2 +a([bc]?)c ac 0 2 1 1 +a([bc]+)c abc 0 3 1 2 +a([bc]+)c abcc 0 4 1 3 +a([bc]+)bc abcbc 0 5 1 3 +a(bb+|b)b abb 0 3 1 2 +a(bbb+|bb+|b)b abb 0 3 1 2 +a(bbb+|bb+|b)b abbb 0 4 1 3 +a(bbb+|bb+|b)bb abbb 0 4 1 2 +(.*).* abcdef 0 6 0 6 +(a*)* bc 0 0 0 0 +xyx*xz xyxxxxyxxxz 5 11 + +; do we get the right subexpression when it is used more than once? +a(b|c)*d ad 0 2 -1 -1 +a(b|c)*d abcd 0 4 2 3 +a(b|c)+d abd 0 3 1 2 +a(b|c)+d abcd 0 4 2 3 +a(b|c?)+d ad 0 2 1 1 +a(b|c){0,0}d ad 0 2 -1 -1 +a(b|c){0,1}d ad 0 2 -1 -1 +a(b|c){0,1}d abd 0 3 1 2 +a(b|c){0,2}d ad 0 2 -1 -1 +a(b|c){0,2}d abcd 0 4 2 3 +a(b|c){0,}d ad 0 2 -1 -1 +a(b|c){0,}d abcd 0 4 2 3 +a(b|c){1,1}d abd 0 3 1 2 +a(b|c){1,2}d abd 0 3 1 2 +a(b|c){1,2}d abcd 0 4 2 3 +a(b|c){1,}d abd 0 3 1 2 +a(b|c){1,}d abcd 0 4 2 3 +a(b|c){2,2}d acbd 0 4 2 3 +a(b|c){2,2}d abcd 0 4 2 3 +a(b|c){2,4}d abcd 0 4 2 3 +a(b|c){2,4}d abcbd 0 5 3 4 +a(b|c){2,4}d abcbcd 0 6 4 5 +a(b|c){2,}d abcd 0 4 2 3 +a(b|c){2,}d abcbd 0 5 3 4 +; perl only: these conflict with the POSIX test below +;a(b|c?)+d abcd 0 4 3 3 +;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 +;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 + +; posix only: +- match_default extended REG_EXTENDED REG_STARTEND + +a(b|c?)+d abcd 0 4 2 3 +a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 +a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 +a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 +a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1 +a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3 +a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1 +a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3 +a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1 + +- match_default normal REG_PERL +; try to match C++ syntax elements: +; line comment: +//[^\n]* "++i //here is a line comment\n" 4 28 +; block comment: +/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 +/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 +/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 +/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 +/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 +/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 +; preprossor directives: +^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 +^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 +; perl only: +^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 +; literals: +((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 +((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 +((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 +((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 +((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 +; strings: +'([^\\']|\\.)*' '\\x3A' 0 6 4 5 +'([^\\']|\\.)*' '\\'' 0 4 1 3 +'([^\\']|\\.)*' '\\n' 0 4 1 3 + +; finally try some case insensitive matches: +- match_default normal REG_EXTENDED REG_ICASE +; upper and lower have no meaning here so they fail, however these +; may compile with other libraries... +;[[:lower:]] ! +;[[:upper:]] ! +0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 + +; known and suspected bugs: +- match_default normal REG_EXTENDED +\( ( 0 1 +\) ) 0 1 +\$ $ 0 1 +\^ ^ 0 1 +\. . 0 1 +\* * 0 1 +\+ + 0 1 +\? ? 0 1 +\[ [ 0 1 +\] ] 0 1 +\| | 0 1 +\\ \\ 0 1 +# # 0 1 +\# # 0 1 +a- a- 0 2 +\- - 0 1 +\{ { 0 1 +\} } 0 1 +0 0 0 1 +1 1 0 1 +9 9 0 1 +b b 0 1 +B B 0 1 +< < 0 1 +> > 0 1 +w w 0 1 +W W 0 1 +` ` 0 1 +' ' 0 1 +\n \n 0 1 +, , 0 1 +a a 0 1 +f f 0 1 +n n 0 1 +r r 0 1 +t t 0 1 +v v 0 1 +c c 0 1 +x x 0 1 +: : 0 1 +(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 + +- match_default normal REG_EXTENDED REG_ICASE +a A 0 1 +A a 0 1 +[abc]+ abcABC 0 6 +[ABC]+ abcABC 0 6 +[a-z]+ abcABC 0 6 +[A-Z]+ abzANZ 0 6 +[a-Z]+ abzABZ 0 6 +[A-z]+ abzABZ 0 6 +[[:lower:]]+ abyzABYZ 0 8 +[[:upper:]]+ abzABZ 0 6 +[[:alpha:]]+ abyzABYZ 0 8 +[[:alnum:]]+ 09abyzABYZ 0 10 + +; word start: +\<abcd " abcd" 2 6 +\<ab cab -1 -1 +\<ab "\nab" 1 3 +\<tag ::tag 2 5 +;word end: +abc\> abc 0 3 +abc\> abcd -1 -1 +abc\> abc\n 0 3 +abc\> abc:: 0 3 + +; collating elements and rewritten set code: +- match_default normal REG_EXTENDED REG_STARTEND +;[[.zero.]] 0 0 1 +;[[.one.]] 1 0 1 +;[[.two.]] 2 0 1 +;[[.three.]] 3 0 1 +[[.a.]] baa 1 2 +;[[.right-curly-bracket.]] } 0 1 +;[[.NUL.]] \0 0 1 +[[:<:]z] ! +[a[:>:]] ! +[[=a=]] a 0 1 +;[[=right-curly-bracket=]] } 0 1 +- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE +[[.A.]] A 0 1 +[[.A.]] a 0 1 +[[.A.]-b]+ AaBb 0 4 +[A-[.b.]]+ AaBb 0 4 +[[.a.]-B]+ AaBb 0 4 +[a-[.B.]]+ AaBb 0 4 +- match_default normal REG_EXTENDED REG_STARTEND +[[.a.]-c]+ abcd 0 3 +[a-[.c.]]+ abcd 0 3 +[[:alpha:]-a] ! +[a-[:alpha:]] ! + +; try mutli-character ligatures: +;[[.ae.]] ae 0 2 +;[[.ae.]] aE -1 -1 +;[[.AE.]] AE 0 2 +;[[.Ae.]] Ae 0 2 +;[[.ae.]-b] a -1 -1 +;[[.ae.]-b] b 0 1 +;[[.ae.]-b] ae 0 2 +;[a-[.ae.]] a 0 1 +;[a-[.ae.]] b -1 -1 +;[a-[.ae.]] ae 0 2 +- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE +;[[.ae.]] AE 0 2 +;[[.ae.]] Ae 0 2 +;[[.AE.]] Ae 0 2 +;[[.Ae.]] aE 0 2 +;[[.AE.]-B] a -1 -1 +;[[.Ae.]-b] b 0 1 +;[[.Ae.]-b] B 0 1 +;[[.ae.]-b] AE 0 2 + +- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST +\s+ "ab ab" 2 5 +\S+ " abc " 2 5 + +- match_default normal REG_EXTENDED REG_STARTEND +\`abc abc 0 3 +\`abc aabc -1 -1 +abc\' abc 0 3 +abc\' abcd -1 -1 +abc\' abc\n\n -1 -1 +abc\' abc 0 3 + +; extended repeat checking to exercise new algorithms: +ab.*xy abxy_ 0 4 +ab.*xy ab_xy_ 0 5 +ab.*xy abxy 0 4 +ab.*xy ab_xy 0 5 +ab.* ab 0 2 +ab.* ab__ 0 4 + +ab.{2,5}xy ab__xy_ 0 6 +ab.{2,5}xy ab____xy_ 0 8 +ab.{2,5}xy ab_____xy_ 0 9 +ab.{2,5}xy ab__xy 0 6 +ab.{2,5}xy ab_____xy 0 9 +ab.{2,5} ab__ 0 4 +ab.{2,5} ab_______ 0 7 +ab.{2,5}xy ab______xy -1 -1 +ab.{2,5}xy ab_xy -1 -1 + +ab.*?xy abxy_ 0 4 +ab.*?xy ab_xy_ 0 5 +ab.*?xy abxy 0 4 +ab.*?xy ab_xy 0 5 +ab.*? ab 0 2 +ab.*? ab__ 0 4 + +ab.{2,5}?xy ab__xy_ 0 6 +ab.{2,5}?xy ab____xy_ 0 8 +ab.{2,5}?xy ab_____xy_ 0 9 +ab.{2,5}?xy ab__xy 0 6 +ab.{2,5}?xy ab_____xy 0 9 +ab.{2,5}? ab__ 0 4 +ab.{2,5}? ab_______ 0 7 +ab.{2,5}?xy ab______xy -1 -1 +ab.{2,5}xy ab_xy -1 -1 + +; again but with slower algorithm variant: +- match_default REG_EXTENDED +; now again for single character repeats: + +ab_*xy abxy_ 0 4 +ab_*xy ab_xy_ 0 5 +ab_*xy abxy 0 4 +ab_*xy ab_xy 0 5 +ab_* ab 0 2 +ab_* ab__ 0 4 + +ab_{2,5}xy ab__xy_ 0 6 +ab_{2,5}xy ab____xy_ 0 8 +ab_{2,5}xy ab_____xy_ 0 9 +ab_{2,5}xy ab__xy 0 6 +ab_{2,5}xy ab_____xy 0 9 +ab_{2,5} ab__ 0 4 +ab_{2,5} ab_______ 0 7 +ab_{2,5}xy ab______xy -1 -1 +ab_{2,5}xy ab_xy -1 -1 + +ab_*?xy abxy_ 0 4 +ab_*?xy ab_xy_ 0 5 +ab_*?xy abxy 0 4 +ab_*?xy ab_xy 0 5 +ab_*? ab 0 2 +ab_*? ab__ 0 4 + +ab_{2,5}?xy ab__xy_ 0 6 +ab_{2,5}?xy ab____xy_ 0 8 +ab_{2,5}?xy ab_____xy_ 0 9 +ab_{2,5}?xy ab__xy 0 6 +ab_{2,5}?xy ab_____xy 0 9 +ab_{2,5}? ab__ 0 4 +ab_{2,5}? ab_______ 0 7 +ab_{2,5}?xy ab______xy -1 -1 +ab_{2,5}xy ab_xy -1 -1 + +; and again for sets: +ab[_,;]*xy abxy_ 0 4 +ab[_,;]*xy ab_xy_ 0 5 +ab[_,;]*xy abxy 0 4 +ab[_,;]*xy ab_xy 0 5 +ab[_,;]* ab 0 2 +ab[_,;]* ab__ 0 4 + +ab[_,;]{2,5}xy ab__xy_ 0 6 +ab[_,;]{2,5}xy ab____xy_ 0 8 +ab[_,;]{2,5}xy ab_____xy_ 0 9 +ab[_,;]{2,5}xy ab__xy 0 6 +ab[_,;]{2,5}xy ab_____xy 0 9 +ab[_,;]{2,5} ab__ 0 4 +ab[_,;]{2,5} ab_______ 0 7 +ab[_,;]{2,5}xy ab______xy -1 -1 +ab[_,;]{2,5}xy ab_xy -1 -1 + +ab[_,;]*?xy abxy_ 0 4 +ab[_,;]*?xy ab_xy_ 0 5 +ab[_,;]*?xy abxy 0 4 +ab[_,;]*?xy ab_xy 0 5 +ab[_,;]*? ab 0 2 +ab[_,;]*? ab__ 0 4 + +ab[_,;]{2,5}?xy ab__xy_ 0 6 +ab[_,;]{2,5}?xy ab____xy_ 0 8 +ab[_,;]{2,5}?xy ab_____xy_ 0 9 +ab[_,;]{2,5}?xy ab__xy 0 6 +ab[_,;]{2,5}?xy ab_____xy 0 9 +ab[_,;]{2,5}? ab__ 0 4 +ab[_,;]{2,5}? ab_______ 0 7 +ab[_,;]{2,5}?xy ab______xy -1 -1 +ab[_,;]{2,5}xy ab_xy -1 -1 + +; and again for tricky sets with digraphs: +;ab[_[.ae.]]*xy abxy_ 0 4 +;ab[_[.ae.]]*xy ab_xy_ 0 5 +;ab[_[.ae.]]*xy abxy 0 4 +;ab[_[.ae.]]*xy ab_xy 0 5 +;ab[_[.ae.]]* ab 0 2 +;ab[_[.ae.]]* ab__ 0 4 + +;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 +;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 +;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 +;ab[_[.ae.]]{2,5}xy ab__xy 0 6 +;ab[_[.ae.]]{2,5}xy ab_____xy 0 9 +;ab[_[.ae.]]{2,5} ab__ 0 4 +;ab[_[.ae.]]{2,5} ab_______ 0 7 +;ab[_[.ae.]]{2,5}xy ab______xy -1 -1 +;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 + +;ab[_[.ae.]]*?xy abxy_ 0 4 +;ab[_[.ae.]]*?xy ab_xy_ 0 5 +;ab[_[.ae.]]*?xy abxy 0 4 +;ab[_[.ae.]]*?xy ab_xy 0 5 +;ab[_[.ae.]]*? ab 0 2 +;ab[_[.ae.]]*? ab__ 0 2 + +;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 +;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 +;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 +;ab[_[.ae.]]{2,5}?xy ab__xy 0 6 +;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 +;ab[_[.ae.]]{2,5}? ab__ 0 4 +;ab[_[.ae.]]{2,5}? ab_______ 0 4 +;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 +;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 + +; new bugs detected in spring 2003: +- normal match_continuous REG_NO_POSIX_TEST +b abc 1 2 + +() abc 0 0 0 0 +^() abc 0 0 0 0 +^()+ abc 0 0 0 0 +^(){1} abc 0 0 0 0 +^(){2} abc 0 0 0 0 +^((){2}) abc 0 0 0 0 0 0 +() "" 0 0 0 0 +()\1 "" 0 0 0 0 +()\1 a 0 0 0 0 +a()\1b ab 0 2 1 1 +a()b\1 ab 0 2 1 1 + +; subtleties of matching with no sub-expressions marked +- normal match_nosubs REG_NO_POSIX_TEST +a(b?c)+d accd 0 4 +(wee|week)(knights|night) weeknights 0 10 +.* abc 0 3 +a(b|(c))d abd 0 3 +a(b|(c))d acd 0 3 +a(b*|c|e)d abbd 0 4 +a(b*|c|e)d acd 0 3 +a(b*|c|e)d ad 0 2 +a(b?)c abc 0 3 +a(b?)c ac 0 2 +a(b+)c abc 0 3 +a(b+)c abbbc 0 5 +a(b*)c ac 0 2 +(a|ab)(bc([de]+)f|cde) abcdef 0 6 +a([bc]?)c abc 0 3 +a([bc]?)c ac 0 2 +a([bc]+)c abc 0 3 +a([bc]+)c abcc 0 4 +a([bc]+)bc abcbc 0 5 +a(bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abbb 0 4 +a(bbb+|bb+|b)bb abbb 0 4 +(.*).* abcdef 0 6 +(a*)* bc 0 0 + +- normal nosubs REG_NO_POSIX_TEST +a(b?c)+d accd 0 4 +(wee|week)(knights|night) weeknights 0 10 +.* abc 0 3 +a(b|(c))d abd 0 3 +a(b|(c))d acd 0 3 +a(b*|c|e)d abbd 0 4 +a(b*|c|e)d acd 0 3 +a(b*|c|e)d ad 0 2 +a(b?)c abc 0 3 +a(b?)c ac 0 2 +a(b+)c abc 0 3 +a(b+)c abbbc 0 5 +a(b*)c ac 0 2 +(a|ab)(bc([de]+)f|cde) abcdef 0 6 +a([bc]?)c abc 0 3 +a([bc]?)c ac 0 2 +a([bc]+)c abc 0 3 +a([bc]+)c abcc 0 4 +a([bc]+)bc abcbc 0 5 +a(bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abb 0 3 +a(bbb+|bb+|b)b abbb 0 4 +a(bbb+|bb+|b)bb abbb 0 4 +(.*).* abcdef 0 6 +(a*)* bc 0 0 + diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am new file mode 100644 index 0000000..d578733 --- /dev/null +++ b/testsuite/Makefile.am @@ -0,0 +1,94 @@ +CLEANFILES = tmp* core *.core $(EXTRA_PROGRAMS) *.*out *.log + +TESTS = $(check_PROGRAMS) $(SEDTESTS) +SEDTESTS = + +LDADD = ../lib/libsed.a @INTLLIBS@ +noinst_HEADERS = testcases.h ptestcases.h +AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_builddir)/lib + +if TEST_REGEX +check_PROGRAMS = bug-regex7 \ + bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \ + bug-regex13 bug-regex14 bug-regex15 bug-regex16 bug-regex21 \ + tst-pcre tst-boost runtests runptests tst-rxspencer tst-regex2 + +SEDTESTS += space +endif + +SEDTESTS += \ + appquit enable sep inclib 8bit newjis xabcx dollar noeol noeolw \ + modulo numsub numsub2 numsub3 numsub4 numsub5 0range bkslashes \ + head madding mac-mf empty xbxcx xbxcx3 recall recall2 xemacs \ + fasts uniq manis khadafy linecnt eval distrib 8to7 y-bracket \ + y-newline allsub cv-vars classes middle bsd stdin flipcase \ + insens subwrite writeout readin \ + help version file quiet \ + factor binary3 binary2 binary dc + +TESTS_ENVIRONMENT = MAKE="$(MAKE)" VERSION="$(VERSION)" $(srcdir)/runtest + +EXTRA_DIST = \ + PCRE.tests BOOST.tests SPENCER.tests \ + runtest Makefile.tests \ + 0range.good 0range.inp 0range.sed \ + 8bit.good 8bit.inp 8bit.sed \ + 8to7.good 8to7.inp 8to7.sed \ + allsub.good allsub.inp allsub.sed \ + appquit.good appquit.inp appquit.sed \ + binary.good binary.inp binary.sed binary2.sed binary3.sed \ + bkslashes.good bkslashes.inp bkslashes.sed \ + bsd.good bsd.sh \ + cv-vars.good cv-vars.inp cv-vars.sed \ + classes.good classes.inp classes.sed \ + dc.good dc.inp dc.sed \ + distrib.good distrib.inp distrib.sed distrib.sh \ + dollar.good dollar.inp dollar.sed \ + empty.good empty.inp empty.sed \ + enable.good enable.inp enable.sed \ + eval.good eval.inp eval.sed \ + factor.good factor.inp factor.sed \ + fasts.good fasts.inp fasts.sed \ + flipcase.good flipcase.inp flipcase.sed \ + head.good head.inp head.sed \ + inclib.good inclib.inp inclib.sed \ + insens.good insens.inp insens.sed \ + khadafy.good khadafy.inp khadafy.sed \ + linecnt.good linecnt.inp linecnt.sed \ + space.good space.inp space.sed \ + mac-mf.good mac-mf.inp mac-mf.sed \ + madding.good madding.inp madding.sed \ + manis.good manis.inp manis.sed \ + middle.good middle.sed middle.inp \ + modulo.good modulo.sed modulo.inp \ + newjis.good newjis.inp newjis.sed \ + noeol.good noeol.inp noeol.sed \ + noeolw.good noeolw.1good noeolw.2good noeolw.sed \ + numsub.good numsub.inp numsub.sed \ + numsub2.good numsub2.inp numsub2.sed \ + numsub3.good numsub3.inp numsub3.sed \ + numsub4.good numsub4.inp numsub4.sed \ + numsub5.good numsub5.inp numsub5.sed \ + readin.good readin.in2 readin.inp readin.sed \ + recall.good recall.inp recall.sed \ + recall2.good recall2.inp recall2.sed \ + sep.good sep.inp sep.sed \ + subwrite.inp subwrite.sed subwrt1.good subwrt2.good \ + uniq.good uniq.inp uniq.sed \ + version.gin \ + writeout.inp writeout.sed wrtout1.good wrtout2.good \ + xabcx.good xabcx.inp xabcx.sed \ + xbxcx.good xbxcx.inp xbxcx.sed \ + xbxcx3.good xbxcx3.inp xbxcx3.sed \ + xemacs.good xemacs.inp xemacs.sed \ + y-bracket.good y-bracket.sed y-bracket.inp \ + y-newline.good y-newline.sed y-newline.inp + +clean-local: + test x$(srcdir) = x. || rm -f readin.in2 eval.in2 + +# automake makes `check' depend on $(TESTS). Declare +# dummy targets for $(TESTS) so that make does not complain. + +.PHONY: $(SEDTESTS) +$(SEDTESTS): diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in new file mode 100644 index 0000000..3312d25 --- /dev/null +++ b/testsuite/Makefile.in @@ -0,0 +1,829 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@TEST_REGEX_TRUE@check_PROGRAMS = bug-regex7$(EXEEXT) \ +@TEST_REGEX_TRUE@ bug-regex8$(EXEEXT) bug-regex9$(EXEEXT) \ +@TEST_REGEX_TRUE@ bug-regex10$(EXEEXT) bug-regex11$(EXEEXT) \ +@TEST_REGEX_TRUE@ bug-regex12$(EXEEXT) bug-regex13$(EXEEXT) \ +@TEST_REGEX_TRUE@ bug-regex14$(EXEEXT) bug-regex15$(EXEEXT) \ +@TEST_REGEX_TRUE@ bug-regex16$(EXEEXT) bug-regex21$(EXEEXT) \ +@TEST_REGEX_TRUE@ tst-pcre$(EXEEXT) tst-boost$(EXEEXT) \ +@TEST_REGEX_TRUE@ runtests$(EXEEXT) runptests$(EXEEXT) \ +@TEST_REGEX_TRUE@ tst-rxspencer$(EXEEXT) tst-regex2$(EXEEXT) +@TEST_REGEX_TRUE@am__append_1 = space +subdir = testsuite +DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/version.gin +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/build-aux/absolute-header.m4 \ + $(top_srcdir)/build-aux/acl.m4 \ + $(top_srcdir)/build-aux/alloca.m4 \ + $(top_srcdir)/build-aux/codeset.m4 \ + $(top_srcdir)/build-aux/error.m4 \ + $(top_srcdir)/build-aux/exitfail.m4 \ + $(top_srcdir)/build-aux/extensions.m4 \ + $(top_srcdir)/build-aux/getdelim.m4 \ + $(top_srcdir)/build-aux/getline.m4 \ + $(top_srcdir)/build-aux/getopt.m4 \ + $(top_srcdir)/build-aux/gettext.m4 \ + $(top_srcdir)/build-aux/glibc2.m4 \ + $(top_srcdir)/build-aux/glibc21.m4 \ + $(top_srcdir)/build-aux/gnulib-comp.m4 \ + $(top_srcdir)/build-aux/iconv.m4 \ + $(top_srcdir)/build-aux/intdiv0.m4 \ + $(top_srcdir)/build-aux/intmax.m4 \ + $(top_srcdir)/build-aux/inttypes-pri.m4 \ + $(top_srcdir)/build-aux/inttypes_h.m4 \ + $(top_srcdir)/build-aux/lcmessage.m4 \ + $(top_srcdir)/build-aux/lib-ld.m4 \ + $(top_srcdir)/build-aux/lib-link.m4 \ + $(top_srcdir)/build-aux/lib-prefix.m4 \ + $(top_srcdir)/build-aux/localcharset.m4 \ + $(top_srcdir)/build-aux/lock.m4 \ + $(top_srcdir)/build-aux/longdouble.m4 \ + $(top_srcdir)/build-aux/longlong.m4 \ + $(top_srcdir)/build-aux/mbchar.m4 \ + $(top_srcdir)/build-aux/mbiter.m4 \ + $(top_srcdir)/build-aux/mbrtowc.m4 \ + $(top_srcdir)/build-aux/mbstate_t.m4 \ + $(top_srcdir)/build-aux/memchr.m4 \ + $(top_srcdir)/build-aux/memcmp.m4 \ + $(top_srcdir)/build-aux/memmove.m4 \ + $(top_srcdir)/build-aux/mkstemp.m4 \ + $(top_srcdir)/build-aux/nls.m4 \ + $(top_srcdir)/build-aux/pathmax.m4 \ + $(top_srcdir)/build-aux/po.m4 \ + $(top_srcdir)/build-aux/printf-posix.m4 \ + $(top_srcdir)/build-aux/progtest.m4 \ + $(top_srcdir)/build-aux/quote.m4 \ + $(top_srcdir)/build-aux/quotearg.m4 \ + $(top_srcdir)/build-aux/regex.m4 \ + $(top_srcdir)/build-aux/signed.m4 \ + $(top_srcdir)/build-aux/size_max.m4 \ + $(top_srcdir)/build-aux/ssize_t.m4 \ + $(top_srcdir)/build-aux/stat-macros.m4 \ + $(top_srcdir)/build-aux/stdbool.m4 \ + $(top_srcdir)/build-aux/stdint.m4 \ + $(top_srcdir)/build-aux/stdint_h.m4 \ + $(top_srcdir)/build-aux/strcase.m4 \ + $(top_srcdir)/build-aux/strerror.m4 \ + $(top_srcdir)/build-aux/strverscmp.m4 \ + $(top_srcdir)/build-aux/sys_stat_h.m4 \ + $(top_srcdir)/build-aux/uintmax_t.m4 \ + $(top_srcdir)/build-aux/ulonglong.m4 \ + $(top_srcdir)/build-aux/unistd_h.m4 \ + $(top_srcdir)/build-aux/unlocked-io.m4 \ + $(top_srcdir)/build-aux/visibility.m4 \ + $(top_srcdir)/build-aux/wchar_t.m4 \ + $(top_srcdir)/build-aux/wcwidth.m4 \ + $(top_srcdir)/build-aux/wint_t.m4 \ + $(top_srcdir)/build-aux/xalloc.m4 \ + $(top_srcdir)/build-aux/xsize.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = version.good +bug_regex10_SOURCES = bug-regex10.c +bug_regex10_OBJECTS = bug-regex10.$(OBJEXT) +bug_regex10_LDADD = $(LDADD) +bug_regex10_DEPENDENCIES = ../lib/libsed.a +bug_regex11_SOURCES = bug-regex11.c +bug_regex11_OBJECTS = bug-regex11.$(OBJEXT) +bug_regex11_LDADD = $(LDADD) +bug_regex11_DEPENDENCIES = ../lib/libsed.a +bug_regex12_SOURCES = bug-regex12.c +bug_regex12_OBJECTS = bug-regex12.$(OBJEXT) +bug_regex12_LDADD = $(LDADD) +bug_regex12_DEPENDENCIES = ../lib/libsed.a +bug_regex13_SOURCES = bug-regex13.c +bug_regex13_OBJECTS = bug-regex13.$(OBJEXT) +bug_regex13_LDADD = $(LDADD) +bug_regex13_DEPENDENCIES = ../lib/libsed.a +bug_regex14_SOURCES = bug-regex14.c +bug_regex14_OBJECTS = bug-regex14.$(OBJEXT) +bug_regex14_LDADD = $(LDADD) +bug_regex14_DEPENDENCIES = ../lib/libsed.a +bug_regex15_SOURCES = bug-regex15.c +bug_regex15_OBJECTS = bug-regex15.$(OBJEXT) +bug_regex15_LDADD = $(LDADD) +bug_regex15_DEPENDENCIES = ../lib/libsed.a +bug_regex16_SOURCES = bug-regex16.c +bug_regex16_OBJECTS = bug-regex16.$(OBJEXT) +bug_regex16_LDADD = $(LDADD) +bug_regex16_DEPENDENCIES = ../lib/libsed.a +bug_regex21_SOURCES = bug-regex21.c +bug_regex21_OBJECTS = bug-regex21.$(OBJEXT) +bug_regex21_LDADD = $(LDADD) +bug_regex21_DEPENDENCIES = ../lib/libsed.a +bug_regex7_SOURCES = bug-regex7.c +bug_regex7_OBJECTS = bug-regex7.$(OBJEXT) +bug_regex7_LDADD = $(LDADD) +bug_regex7_DEPENDENCIES = ../lib/libsed.a +bug_regex8_SOURCES = bug-regex8.c +bug_regex8_OBJECTS = bug-regex8.$(OBJEXT) +bug_regex8_LDADD = $(LDADD) +bug_regex8_DEPENDENCIES = ../lib/libsed.a +bug_regex9_SOURCES = bug-regex9.c +bug_regex9_OBJECTS = bug-regex9.$(OBJEXT) +bug_regex9_LDADD = $(LDADD) +bug_regex9_DEPENDENCIES = ../lib/libsed.a +runptests_SOURCES = runptests.c +runptests_OBJECTS = runptests.$(OBJEXT) +runptests_LDADD = $(LDADD) +runptests_DEPENDENCIES = ../lib/libsed.a +runtests_SOURCES = runtests.c +runtests_OBJECTS = runtests.$(OBJEXT) +runtests_LDADD = $(LDADD) +runtests_DEPENDENCIES = ../lib/libsed.a +tst_boost_SOURCES = tst-boost.c +tst_boost_OBJECTS = tst-boost.$(OBJEXT) +tst_boost_LDADD = $(LDADD) +tst_boost_DEPENDENCIES = ../lib/libsed.a +tst_pcre_SOURCES = tst-pcre.c +tst_pcre_OBJECTS = tst-pcre.$(OBJEXT) +tst_pcre_LDADD = $(LDADD) +tst_pcre_DEPENDENCIES = ../lib/libsed.a +tst_regex2_SOURCES = tst-regex2.c +tst_regex2_OBJECTS = tst-regex2.$(OBJEXT) +tst_regex2_LDADD = $(LDADD) +tst_regex2_DEPENDENCIES = ../lib/libsed.a +tst_rxspencer_SOURCES = tst-rxspencer.c +tst_rxspencer_OBJECTS = tst-rxspencer.$(OBJEXT) +tst_rxspencer_LDADD = $(LDADD) +tst_rxspencer_DEPENDENCIES = ../lib/libsed.a +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = bug-regex10.c bug-regex11.c bug-regex12.c bug-regex13.c \ + bug-regex14.c bug-regex15.c bug-regex16.c bug-regex21.c \ + bug-regex7.c bug-regex8.c bug-regex9.c runptests.c runtests.c \ + tst-boost.c tst-pcre.c tst-regex2.c tst-rxspencer.c +DIST_SOURCES = bug-regex10.c bug-regex11.c bug-regex12.c bug-regex13.c \ + bug-regex14.c bug-regex15.c bug-regex16.c bug-regex21.c \ + bug-regex7.c bug-regex8.c bug-regex9.c runptests.c runtests.c \ + tst-boost.c tst-pcre.c tst-regex2.c tst-rxspencer.c +HEADERS = $(noinst_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ABSOLUTE_STDINT_H = @ABSOLUTE_STDINT_H@ +ABSOLUTE_SYS_STAT_H = @ABSOLUTE_SYS_STAT_H@ +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALLOCA_H = @ALLOCA_H@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@ +BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@ +BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@ +BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@ +BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@ +BUILD_HTML_FALSE = @BUILD_HTML_FALSE@ +BUILD_HTML_TRUE = @BUILD_HTML_TRUE@ +BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@ +CATOBJEXT = @CATOBJEXT@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATADIRNAME = @DATADIRNAME@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GENCAT = @GENCAT@ +GETOPT_H = @GETOPT_H@ +GLIBC2 = @GLIBC2@ +GLIBC21 = @GLIBC21@ +GL_COND_LIBTOOL_FALSE = @GL_COND_LIBTOOL_FALSE@ +GL_COND_LIBTOOL_TRUE = @GL_COND_LIBTOOL_TRUE@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_ASPRINTF = @HAVE_ASPRINTF@ +HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ +HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@ +HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ +HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@ +HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@ +HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@ +HAVE_SNPRINTF = @HAVE_SNPRINTF@ +HAVE_STDINT_H = @HAVE_STDINT_H@ +HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@ +HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@ +HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +HAVE_WCHAR_H = @HAVE_WCHAR_H@ +HAVE_WPRINTF = @HAVE_WPRINTF@ +HAVE__BOOL = @HAVE__BOOL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INSTOBJEXT = @INSTOBJEXT@ +INTLBISON = @INTLBISON@ +INTLLIBS = @INTLLIBS@ +INTLOBJS = @INTLOBJS@ +INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMULTITHREAD = @LIBMULTITHREAD@ +LIBOBJS = @LIBOBJS@ +LIBPTH = @LIBPTH@ +LIBS = @LIBS@ +LIBSED_LIBDEPS = @LIBSED_LIBDEPS@ +LIBSED_LTLIBDEPS = @LIBSED_LTLIBDEPS@ +LIBTHREAD = @LIBTHREAD@ +LIB_ACL = @LIB_ACL@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBMULTITHREAD = @LTLIBMULTITHREAD@ +LTLIBOBJS = @LTLIBOBJS@ +LTLIBPTH = @LTLIBPTH@ +LTLIBTHREAD = @LTLIBTHREAD@ +MAKEINFO = @MAKEINFO@ +MAKEINFO_HTML_FALSE = @MAKEINFO_HTML_FALSE@ +MAKEINFO_HTML_TRUE = @MAKEINFO_HTML_TRUE@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@ +PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@ +RANLIB = @RANLIB@ +SED_FEATURE_VERSION = @SED_FEATURE_VERSION@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@ +SIZE_T_SUFFIX = @SIZE_T_SUFFIX@ +STDBOOL_H = @STDBOOL_H@ +STDINT_H = @STDINT_H@ +STRIP = @STRIP@ +SYS_STAT_H = @SYS_STAT_H@ +TEST_REGEX_FALSE = @TEST_REGEX_FALSE@ +TEST_REGEX_TRUE = @TEST_REGEX_TRUE@ +TEXI2HTML = @TEXI2HTML@ +TEXI2HTML_HTML_FALSE = @TEXI2HTML_HTML_FALSE@ +TEXI2HTML_HTML_TRUE = @TEXI2HTML_HTML_TRUE@ +UNISTD_H = @UNISTD_H@ +USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@ +WINT_T_SUFFIX = @WINT_T_SUFFIX@ +WOE32DLL = @WOE32DLL@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +ac_ct_CC = @ac_ct_CC@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +CLEANFILES = tmp* core *.core $(EXTRA_PROGRAMS) *.*out *.log +TESTS = $(check_PROGRAMS) $(SEDTESTS) +SEDTESTS = $(am__append_1) appquit enable sep inclib 8bit newjis xabcx \ + dollar noeol noeolw modulo numsub numsub2 numsub3 numsub4 \ + numsub5 0range bkslashes head madding mac-mf empty xbxcx \ + xbxcx3 recall recall2 xemacs fasts uniq manis khadafy linecnt \ + eval distrib 8to7 y-bracket y-newline allsub cv-vars classes \ + middle bsd stdin flipcase insens subwrite writeout readin help \ + version file quiet factor binary3 binary2 binary dc +LDADD = ../lib/libsed.a @INTLLIBS@ +noinst_HEADERS = testcases.h ptestcases.h +AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_builddir)/lib +TESTS_ENVIRONMENT = MAKE="$(MAKE)" VERSION="$(VERSION)" $(srcdir)/runtest +EXTRA_DIST = \ + PCRE.tests BOOST.tests SPENCER.tests \ + runtest Makefile.tests \ + 0range.good 0range.inp 0range.sed \ + 8bit.good 8bit.inp 8bit.sed \ + 8to7.good 8to7.inp 8to7.sed \ + allsub.good allsub.inp allsub.sed \ + appquit.good appquit.inp appquit.sed \ + binary.good binary.inp binary.sed binary2.sed binary3.sed \ + bkslashes.good bkslashes.inp bkslashes.sed \ + bsd.good bsd.sh \ + cv-vars.good cv-vars.inp cv-vars.sed \ + classes.good classes.inp classes.sed \ + dc.good dc.inp dc.sed \ + distrib.good distrib.inp distrib.sed distrib.sh \ + dollar.good dollar.inp dollar.sed \ + empty.good empty.inp empty.sed \ + enable.good enable.inp enable.sed \ + eval.good eval.inp eval.sed \ + factor.good factor.inp factor.sed \ + fasts.good fasts.inp fasts.sed \ + flipcase.good flipcase.inp flipcase.sed \ + head.good head.inp head.sed \ + inclib.good inclib.inp inclib.sed \ + insens.good insens.inp insens.sed \ + khadafy.good khadafy.inp khadafy.sed \ + linecnt.good linecnt.inp linecnt.sed \ + space.good space.inp space.sed \ + mac-mf.good mac-mf.inp mac-mf.sed \ + madding.good madding.inp madding.sed \ + manis.good manis.inp manis.sed \ + middle.good middle.sed middle.inp \ + modulo.good modulo.sed modulo.inp \ + newjis.good newjis.inp newjis.sed \ + noeol.good noeol.inp noeol.sed \ + noeolw.good noeolw.1good noeolw.2good noeolw.sed \ + numsub.good numsub.inp numsub.sed \ + numsub2.good numsub2.inp numsub2.sed \ + numsub3.good numsub3.inp numsub3.sed \ + numsub4.good numsub4.inp numsub4.sed \ + numsub5.good numsub5.inp numsub5.sed \ + readin.good readin.in2 readin.inp readin.sed \ + recall.good recall.inp recall.sed \ + recall2.good recall2.inp recall2.sed \ + sep.good sep.inp sep.sed \ + subwrite.inp subwrite.sed subwrt1.good subwrt2.good \ + uniq.good uniq.inp uniq.sed \ + version.gin \ + writeout.inp writeout.sed wrtout1.good wrtout2.good \ + xabcx.good xabcx.inp xabcx.sed \ + xbxcx.good xbxcx.inp xbxcx.sed \ + xbxcx3.good xbxcx3.inp xbxcx3.sed \ + xemacs.good xemacs.inp xemacs.sed \ + y-bracket.good y-bracket.sed y-bracket.inp \ + y-newline.good y-newline.sed y-newline.inp + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits testsuite/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnits testsuite/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +version.good: $(top_builddir)/config.status $(srcdir)/version.gin + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +clean-checkPROGRAMS: + -test -z "$(check_PROGRAMS)" || rm -f $(check_PROGRAMS) +bug-regex10$(EXEEXT): $(bug_regex10_OBJECTS) $(bug_regex10_DEPENDENCIES) + @rm -f bug-regex10$(EXEEXT) + $(LINK) $(bug_regex10_LDFLAGS) $(bug_regex10_OBJECTS) $(bug_regex10_LDADD) $(LIBS) +bug-regex11$(EXEEXT): $(bug_regex11_OBJECTS) $(bug_regex11_DEPENDENCIES) + @rm -f bug-regex11$(EXEEXT) + $(LINK) $(bug_regex11_LDFLAGS) $(bug_regex11_OBJECTS) $(bug_regex11_LDADD) $(LIBS) +bug-regex12$(EXEEXT): $(bug_regex12_OBJECTS) $(bug_regex12_DEPENDENCIES) + @rm -f bug-regex12$(EXEEXT) + $(LINK) $(bug_regex12_LDFLAGS) $(bug_regex12_OBJECTS) $(bug_regex12_LDADD) $(LIBS) +bug-regex13$(EXEEXT): $(bug_regex13_OBJECTS) $(bug_regex13_DEPENDENCIES) + @rm -f bug-regex13$(EXEEXT) + $(LINK) $(bug_regex13_LDFLAGS) $(bug_regex13_OBJECTS) $(bug_regex13_LDADD) $(LIBS) +bug-regex14$(EXEEXT): $(bug_regex14_OBJECTS) $(bug_regex14_DEPENDENCIES) + @rm -f bug-regex14$(EXEEXT) + $(LINK) $(bug_regex14_LDFLAGS) $(bug_regex14_OBJECTS) $(bug_regex14_LDADD) $(LIBS) +bug-regex15$(EXEEXT): $(bug_regex15_OBJECTS) $(bug_regex15_DEPENDENCIES) + @rm -f bug-regex15$(EXEEXT) + $(LINK) $(bug_regex15_LDFLAGS) $(bug_regex15_OBJECTS) $(bug_regex15_LDADD) $(LIBS) +bug-regex16$(EXEEXT): $(bug_regex16_OBJECTS) $(bug_regex16_DEPENDENCIES) + @rm -f bug-regex16$(EXEEXT) + $(LINK) $(bug_regex16_LDFLAGS) $(bug_regex16_OBJECTS) $(bug_regex16_LDADD) $(LIBS) +bug-regex21$(EXEEXT): $(bug_regex21_OBJECTS) $(bug_regex21_DEPENDENCIES) + @rm -f bug-regex21$(EXEEXT) + $(LINK) $(bug_regex21_LDFLAGS) $(bug_regex21_OBJECTS) $(bug_regex21_LDADD) $(LIBS) +bug-regex7$(EXEEXT): $(bug_regex7_OBJECTS) $(bug_regex7_DEPENDENCIES) + @rm -f bug-regex7$(EXEEXT) + $(LINK) $(bug_regex7_LDFLAGS) $(bug_regex7_OBJECTS) $(bug_regex7_LDADD) $(LIBS) +bug-regex8$(EXEEXT): $(bug_regex8_OBJECTS) $(bug_regex8_DEPENDENCIES) + @rm -f bug-regex8$(EXEEXT) + $(LINK) $(bug_regex8_LDFLAGS) $(bug_regex8_OBJECTS) $(bug_regex8_LDADD) $(LIBS) +bug-regex9$(EXEEXT): $(bug_regex9_OBJECTS) $(bug_regex9_DEPENDENCIES) + @rm -f bug-regex9$(EXEEXT) + $(LINK) $(bug_regex9_LDFLAGS) $(bug_regex9_OBJECTS) $(bug_regex9_LDADD) $(LIBS) +runptests$(EXEEXT): $(runptests_OBJECTS) $(runptests_DEPENDENCIES) + @rm -f runptests$(EXEEXT) + $(LINK) $(runptests_LDFLAGS) $(runptests_OBJECTS) $(runptests_LDADD) $(LIBS) +runtests$(EXEEXT): $(runtests_OBJECTS) $(runtests_DEPENDENCIES) + @rm -f runtests$(EXEEXT) + $(LINK) $(runtests_LDFLAGS) $(runtests_OBJECTS) $(runtests_LDADD) $(LIBS) +tst-boost$(EXEEXT): $(tst_boost_OBJECTS) $(tst_boost_DEPENDENCIES) + @rm -f tst-boost$(EXEEXT) + $(LINK) $(tst_boost_LDFLAGS) $(tst_boost_OBJECTS) $(tst_boost_LDADD) $(LIBS) +tst-pcre$(EXEEXT): $(tst_pcre_OBJECTS) $(tst_pcre_DEPENDENCIES) + @rm -f tst-pcre$(EXEEXT) + $(LINK) $(tst_pcre_LDFLAGS) $(tst_pcre_OBJECTS) $(tst_pcre_LDADD) $(LIBS) +tst-regex2$(EXEEXT): $(tst_regex2_OBJECTS) $(tst_regex2_DEPENDENCIES) + @rm -f tst-regex2$(EXEEXT) + $(LINK) $(tst_regex2_LDFLAGS) $(tst_regex2_OBJECTS) $(tst_regex2_LDADD) $(LIBS) +tst-rxspencer$(EXEEXT): $(tst_rxspencer_OBJECTS) $(tst_rxspencer_DEPENDENCIES) + @rm -f tst-rxspencer$(EXEEXT) + $(LINK) $(tst_rxspencer_LDFLAGS) $(tst_rxspencer_OBJECTS) $(tst_rxspencer_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex10.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex11.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex12.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex13.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex14.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex15.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex16.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex21.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex7.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex8.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug-regex9.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/runptests.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/runtests.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tst-boost.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tst-pcre.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tst-regex2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tst-rxspencer.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` +uninstall-info-am: + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; skip=0; \ + srcdir=$(srcdir); export srcdir; \ + list='$(TESTS)'; \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + echo "XPASS: $$tst"; \ + ;; \ + *) \ + echo "PASS: $$tst"; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xfail=`expr $$xfail + 1`; \ + echo "XFAIL: $$tst"; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + echo "FAIL: $$tst"; \ + ;; \ + esac; \ + else \ + skip=`expr $$skip + 1`; \ + echo "SKIP: $$tst"; \ + fi; \ + done; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="All $$all tests passed"; \ + else \ + banner="All $$all tests behaved as expected ($$xfail expected failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all tests failed"; \ + else \ + banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \ + fi; \ + fi; \ + dashes="$$banner"; \ + skipped=""; \ + if test "$$skip" -ne 0; then \ + skipped="($$skip tests were not run)"; \ + test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$skipped"; \ + fi; \ + report=""; \ + if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \ + report="Please report to $(PACKAGE_BUGREPORT)"; \ + test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$report"; \ + fi; \ + dashes=`echo "$$dashes" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + test -z "$$skipped" || echo "$$skipped"; \ + test -z "$$report" || echo "$$report"; \ + echo "$$dashes"; \ + test "$$failed" -eq 0; \ + else :; fi + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(HEADERS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-generic clean-local mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-generic clean-local ctags distclean \ + distclean-compile distclean-generic distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-exec install-exec-am \ + install-info install-info-am install-man install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-info-am + + +clean-local: + test x$(srcdir) = x. || rm -f readin.in2 eval.in2 + +# automake makes `check' depend on $(TESTS). Declare +# dummy targets for $(TESTS) so that make does not complain. + +.PHONY: $(SEDTESTS) +$(SEDTESTS): +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/testsuite/Makefile.tests b/testsuite/Makefile.tests new file mode 100644 index 0000000..a895cc4 --- /dev/null +++ b/testsuite/Makefile.tests @@ -0,0 +1,157 @@ +# Testsuite makefile for GNU sed + +SHELL = /bin/sh + +# These are only fallback values. They are usually overridden by runtest. +srcdir = . +SED = ../sed/sed +SEDENV = LC_ALL=C $(TIME) + +#TIME=time +CMP=cmp +RM=rm -f + +enable sep inclib 8bit 8to7 newjis xabcx dollar noeol bkslashes \ +numsub head madding mac-mf empty xbxcx xbxcx3 recall recall2 xemacs \ +appquit fasts uniq manis linecnt khadafy allsub flipcase space modulo \ +y-bracket y-newline:: + $(SEDENV) $(SED) -f $(srcdir)/$@.sed \ + < $(srcdir)/$@.inp > $@.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +0range:: + $(SEDENV) $(SED) -s -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +# This checks for a bug in 3.02 and 3.02.80 +stdin:: + ($(SEDENV) $(SED) d; $(SEDENV) $(SED) G) < $(srcdir)/numsub.inp > $@.1out + $(SEDENV) cat $(srcdir)/numsub.inp | ($(SEDENV) $(SED) d; $(SEDENV) $(SED) G) > $@.2out + $(CMP) $@.1out $@.2out + @$(RM) $@.1out $@.2out + +cv-vars classes middle dc distrib factor numsub2 numsub3 numsub4 numsub5 \ +insens:: + $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +noeolw:: + $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed \ + $(srcdir)/noeol.inp $(srcdir)/noeol.inp > $@.out + $(CMP) $(srcdir)/$@.good $@.out + $(CMP) $(srcdir)/$@.1good $@.1out + $(CMP) $(srcdir)/$@.2good $@.2out + @$(RM) $@.1out $@.2out $@.out + +subwrite:: + $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.1out + $(CMP) $(srcdir)/subwrt1.good $@.1out + $(CMP) $(srcdir)/subwrt2.good $@.wout + @$(RM) $@.1out $@.wout + +bsd:: + $(SEDENV) sh $(srcdir)/$@.sh '$(SED)' bsd.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +writeout:: + $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp >$@.1out + $(CMP) $(srcdir)/wrtout1.good $@.1out + $(CMP) $(srcdir)/wrtout2.good $@.wout + @$(RM) $@.1out $@.wout + +readin.in2: $(srcdir)/readin.in2 + cat $(srcdir)/readin.in2 > $@ + +readin:: readin.in2 + $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp >$@.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +eval.in2: $(srcdir)/eval.inp + cat $(srcdir)/eval.inp > $@ + +eval:: eval.in2 + $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out + $(CMP) $(srcdir)/$@.good $@.out + @$(RM) $@.out + +binary binary2 binary3:: + $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed < $(srcdir)/binary.inp >$@.out + $(CMP) $(srcdir)/binary.good $@.out + @$(RM) $@.out + +# +# cmdlines targets +# + +help:: + $(SED) --help | $(SED) '1s/ [^ ]* / sed /' > $@.1out + $(SED) 2>&1 | $(SED) '1s/ [^ ]* / sed /' > $@.2out || : + $(CMP) $@.1out $@.2out + @$(RM) $@.1out $@.2out + +version:: + $(SEDENV) $(SED) --version > $@.out 2>&1 + $(CMP) $@.good $@.out + @$(RM) $@.out + +file:: + $(SEDENV) $(SED) --file=$(srcdir)/newjis.sed \ + < $(srcdir)/newjis.inp > $@.out + $(CMP) $(srcdir)/newjis.good $@.out + @$(RM) $@.out + +quiet:: + $(SEDENV) $(SED) --quiet -f $(srcdir)/cv-vars.sed \ + < $(srcdir)/cv-vars.inp > $@.out + $(CMP) $(srcdir)/cv-vars.good $@.out + @$(RM) $@.out + +# The following target is not used in super sed builds (only GNU sed) + +bug-regex7$(EXEEXT) bug-regex8$(EXEEXT) bug-regex9$(EXEEXT) \ +bug-regex10$(EXEEXT) bug-regex11$(EXEEXT) bug-regex12$(EXEEXT) \ +bug-regex13$(EXEEXT) bug-regex14$(EXEEXT) bug-regex15$(EXEEXT) bug-regex16$(EXEEXT) \ +bug-regex21$(EXEEXT) runtests$(EXEEXT) runptests$(EXEEXT):: + echo "$(SEDENV) ./$@ > `echo $@ | $(SED) s/$(EXEEXT)$$/.log/`" + @$(SEDENV) ./$@ > `echo $@ | $(SED) s/$(EXEEXT)$$/.log/` + +tst-pcre$(EXEEXT):: + $(SEDENV) ./tst-pcre $(srcdir)/PCRE.tests > tst-pcre.log + +tst-boost$(EXEEXT):: + $(SEDENV) ./tst-boost $(srcdir)/BOOST.tests > tst-boost.log + +tst-rxspencer$(EXEEXT):: + $(SEDENV) ./tst-rxspencer $(srcdir)/SPENCER.tests > tst-spencer.log + +tst-regex2$(EXEEXT):: + $(SEDENV) ./tst-regex2 $(srcdir)/tst-regex2.c > tst-regex2.log + +# The following target is not used in GNU sed builds (only super-sed) + +pcretest$(EXEEXT):: + $(SEDENV) ./pcretest $(srcdir)/pcre1.inp pcre1.out + $(CMP) $(srcdir)/pcre1.good pcre1.out + #$(SEDENV) ./pcretest -p $(srcdir)/pcre1.inp pcre1p.out + #$(CMP) $(srcdir)/pcre1p.good pcre1p.out + $(SEDENV) ./pcretest -P $(srcdir)/pcre2.inp pcre2.out + $(CMP) $(srcdir)/pcre2.good pcre2.out + $(SEDENV) ./pcretest -P -p $(srcdir)/pcre2.inp pcre2p.out + $(CMP) $(srcdir)/pcre2p.good pcre2p.out + $(SEDENV) ./pcretest $(srcdir)/pcre3.inp pcre3.out + $(CMP) $(srcdir)/pcre3.good pcre3.out + $(SEDENV) ./pcretest -p $(srcdir)/pcre3.inp pcre3p.out + $(CMP) $(srcdir)/pcre3p.good pcre3p.out + @$(RM) pcre*.out + +.PHONY: \ +bug-regex7$(EXEEXT) bug-regex8$(EXEEXT) bug-regex9$(EXEEXT) \ +bug-regex10$(EXEEXT) bug-regex11$(EXEEXT) bug-regex12$(EXEEXT) \ +bug-regex13$(EXEEXT) bug-regex14$(EXEEXT) bug-regex15$(EXEEXT) bug-regex16$(EXEEXT) \ +bug-regex21$(EXEEXT) runtests$(EXEEXT) runptests$(EXEEXT) tst-regex2$(EXEEXT) \ +tst-pcre$(EXEEXT) tst-boost$(EXEEXT) tst-rxspencer$(EXEEXT) pcretest$(EXEEXT) diff --git a/testsuite/PCRE.tests b/testsuite/PCRE.tests new file mode 100644 index 0000000..0fb9cad --- /dev/null +++ b/testsuite/PCRE.tests @@ -0,0 +1,2386 @@ +# PCRE version 4.4 21-August-2003 + +# Tests taken from PCRE and modified to suit glibc regex. +# +# PCRE LICENCE +# ------------ +# +# PCRE is a library of functions to support regular expressions whose syntax +# and semantics are as close as possible to those of the Perl 5 language. +# +# Written by: Philip Hazel <ph10@cam.ac.uk> +# +# University of Cambridge Computing Service, +# Cambridge, England. Phone: +44 1223 334714. +# +# Copyright (c) 1997-2003 University of Cambridge +# +# Permission is granted to anyone to use this software for any purpose on any +# computer system, and to redistribute it freely, subject to the following +# restrictions: +# +# 1. This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# 2. The origin of this software must not be misrepresented, either by +# explicit claim or by omission. In practice, this means that if you use +# PCRE in software that you distribute to others, commercially or +# otherwise, you must put a sentence like this +# +# Regular expression support is provided by the PCRE library package, +# which is open source software, written by Philip Hazel, and copyright +# by the University of Cambridge, England. +# +# somewhere reasonably visible in your documentation and in any relevant +# files or online help data or similar. A reference to the ftp site for +# the source, that is, to +# +# ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ +# +# should also be given in the documentation. However, this condition is not +# intended to apply to whole chains of software. If package A includes PCRE, +# it must acknowledge it, but if package B is software that includes package +# A, the condition is not imposed on package B (unless it uses PCRE +# independently). +# +# 3. Altered versions must be plainly marked as such, and must not be +# misrepresented as being the original software. +# +# 4. If PCRE is embedded in any software that is released under the GNU +# General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL), +# then the terms of that licence shall supersede any condition above with +# which it is incompatible. +# +# The documentation for PCRE, supplied in the "doc" directory, is distributed +# under the same terms as the software itself. +# +# End +# + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox + The quick brown FOX +No match + What do you know about the quick brown fox? + 0: the quick brown fox + What do you know about THE QUICK BROWN FOX? +No match + +/The quick brown fox/i + the quick brown fox + 0: the quick brown fox + The quick brown FOX + 0: The quick brown FOX + What do you know about the quick brown fox? + 0: the quick brown fox + What do you know about THE QUICK BROWN FOX? + 0: THE QUICK BROWN FOX + +/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/ + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + aabxyzpqrrrabbxyyyypqAzz + 0: aabxyzpqrrrabbxyyyypqAzz + aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz + aabcxyzpqrrrabbxyyyypqAzz + 0: aabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypAzz + 0: aaabcxyzpqrrrabbxyyyypAzz + aaabcxyzpqrrrabbxyyyypqAzz + 0: aaabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqAzz + aaabcxyzpqrrrabbxyyyypqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz + aaaabcxyzpqrrrabbxyyyypqAzz + 0: aaaabcxyzpqrrrabbxyyyypqAzz + abxyzzpqrrrabbxyyyypqAzz + 0: abxyzzpqrrrabbxyyyypqAzz + aabxyzzzpqrrrabbxyyyypqAzz + 0: aabxyzzzpqrrrabbxyyyypqAzz + aaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaabxyzzzzpqrrrabbxyyyypqAzz + aaaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabxyzzzzpqrrrabbxyyyypqAzz + abcxyzzpqrrrabbxyyyypqAzz + 0: abcxyzzpqrrrabbxyyyypqAzz + aabcxyzzzpqrrrabbxyyyypqAzz + 0: aabcxyzzzpqrrrabbxyyyypqAzz + aaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + aaabcxyzpqrrrabbxyyyypABzz + 0: aaabcxyzpqrrrabbxyyyypABzz + aaabcxyzpqrrrabbxyyyypABBzz + 0: aaabcxyzpqrrrabbxyyyypABBzz + >>>aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + >aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + >>>>abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz + *** Failers +No match + abxyzpqrrabbxyyyypqAzz +No match + abxyzpqrrrrabbxyyyypqAzz +No match + abxyzpqrrrabxyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyypqAzz +No match + aaabcxyzpqrrrabbxyyyypqqqqqqqAzz +No match + +/^(abc){1,2}zz/ + abczz + 0: abczz + 1: abc + abcabczz + 0: abcabczz + 1: abc + *** Failers +No match + zz +No match + abcabcabczz +No match + >>abczz +No match + +/^(b+|a){1,2}c/ + bc + 0: bc + 1: b + bbc + 0: bbc + 1: bb + bbbc + 0: bbbc + 1: bbb + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + aac + 0: aac + 1: a + abbbbbbbbbbbc + 0: abbbbbbbbbbbc + 1: bbbbbbbbbbb + bbbbbbbbbbbac + 0: bbbbbbbbbbbac + 1: a + *** Failers +No match + aaac +No match + abbbbbbbbbbbac +No match + +/^[]cde]/ + ]thing + 0: ] + cthing + 0: c + dthing + 0: d + ething + 0: e + *** Failers +No match + athing +No match + fthing +No match + +/^[^]cde]/ + athing + 0: a + fthing + 0: f + *** Failers + 0: * + ]thing +No match + cthing +No match + dthing +No match + ething +No match + +/^[0-9]+$/ + 0 + 0: 0 + 1 + 0: 1 + 2 + 0: 2 + 3 + 0: 3 + 4 + 0: 4 + 5 + 0: 5 + 6 + 0: 6 + 7 + 0: 7 + 8 + 0: 8 + 9 + 0: 9 + 10 + 0: 10 + 100 + 0: 100 + *** Failers +No match + abc +No match + +/^.*nter/ + enter + 0: enter + inter + 0: inter + uponter + 0: uponter + +/^xxx[0-9]+$/ + xxx0 + 0: xxx0 + xxx1234 + 0: xxx1234 + *** Failers +No match + xxx +No match + +/^.+[0-9][0-9][0-9]$/ + x123 + 0: x123 + xx123 + 0: xx123 + 123456 + 0: 123456 + *** Failers +No match + 123 +No match + x1234 + 0: x1234 + +/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ + abc!pqr=apquxz.ixr.zzz.ac.uk + 0: abc!pqr=apquxz.ixr.zzz.ac.uk + 1: abc + 2: pqr + *** Failers +No match + !pqr=apquxz.ixr.zzz.ac.uk +No match + abc!=apquxz.ixr.zzz.ac.uk +No match + abc!pqr=apquxz:ixr.zzz.ac.uk +No match + abc!pqr=apquxz.ixr.zzz.ac.ukk +No match + +/:/ + Well, we need a colon: somewhere + 0: : + *** Fail if we don't +No match + +/([0-9a-f:]+)$/i + 0abc + 0: 0abc + 1: 0abc + abc + 0: abc + 1: abc + fed + 0: fed + 1: fed + E + 0: E + 1: E + :: + 0: :: + 1: :: + 5f03:12C0::932e + 0: 5f03:12C0::932e + 1: 5f03:12C0::932e + fed def + 0: def + 1: def + Any old stuff + 0: ff + 1: ff + *** Failers +No match + 0zzz +No match + gzzz +No match + Any old rubbish +No match + +/^.*\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$/ + .1.2.3 + 0: .1.2.3 + 1: 1 + 2: 2 + 3: 3 + A.12.123.0 + 0: A.12.123.0 + 1: 12 + 2: 123 + 3: 0 + *** Failers +No match + .1.2.3333 +No match + 1.2.3 +No match + 1234.2.3 +No match + +/^([0-9]+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 0: 1 IN SOA non-sp1 non-sp2( + 1: 1 + 2: non-sp1 + 3: non-sp2 + 1 IN SOA non-sp1 non-sp2 ( + 0: 1 IN SOA non-sp1 non-sp2 ( + 1: 1 + 2: non-sp1 + 3: non-sp2 + *** Failers +No match + 1IN SOA non-sp1 non-sp2( +No match + +/^[a-zA-Z0-9][a-zA-Z0-9-]*(\.[a-zA-Z0-9][a-zA-z0-9-]*)*\.$/ + a. + 0: a. + Z. + 0: Z. + 2. + 0: 2. + ab-c.pq-r. + 0: ab-c.pq-r. + 1: .pq-r + sxk.zzz.ac.uk. + 0: sxk.zzz.ac.uk. + 1: .uk + x-.y-. + 0: x-.y-. + 1: .y- + *** Failers +No match + -abc.peq. +No match + +/^\*\.[a-z]([a-z0-9-]*[a-z0-9]+)?(\.[a-z]([a-z0-9-]*[a-z0-9]+)?)*$/ + *.a + 0: *.a + *.b0-a + 0: *.b0-a + 1: 0-a + *.c3-b.c + 0: *.c3-b.c + 1: 3-b + 2: .c + *.c-a.b-c + 0: *.c-a.b-c + 1: -a + 2: .b-c + 3: -c + *** Failers +No match + *.0 +No match + *.a- +No match + *.a-b.c- +No match + *.c-a.0-c +No match + +/^[0-9a-f](\.[0-9a-f])*$/i + a.b.c.d + 0: a.b.c.d + 1: .d + A.B.C.D + 0: A.B.C.D + 1: .D + a.b.c.1.2.3.C + 0: a.b.c.1.2.3.C + 1: .C + +/^".*"\s*(;.*)?$/ + "1234" + 0: "1234" + "abcd" ; + 0: "abcd" ; + 1: ; + "" ; rhubarb + 0: "" ; rhubarb + 1: ; rhubarb + *** Failers +No match + "1234" : things +No match + +/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/ + abcdefhijklm + 0: abcdefhijklm + 1: abc + 2: bc + 3: c + 4: def + 5: ef + 6: f + 7: hij + 8: ij + 9: j +10: klm +11: lm +12: m + +/^a*\w/ + z + 0: z + az + 0: az + aaaz + 0: aaaz + a + 0: a + aa + 0: aa + aaaa + 0: aaaa + a+ + 0: a + aa+ + 0: aa + +/^a+\w/ + az + 0: az + aaaz + 0: aaaz + aa + 0: aa + aaaa + 0: aaaa + aa+ + 0: aa + +/^[0-9]{8}\w{2,}/ + 1234567890 + 0: 1234567890 + 12345678ab + 0: 12345678ab + 12345678__ + 0: 12345678__ + *** Failers +No match + 1234567 +No match + +/^[aeiou0-9]{4,5}$/ + uoie + 0: uoie + 1234 + 0: 1234 + 12345 + 0: 12345 + aaaaa + 0: aaaaa + *** Failers +No match + 123456 +No match + +/\`(abc|def)=(\1){2,3}\'/ + abc=abcabc + 0: abc=abcabc + 1: abc + 2: abc + def=defdefdef + 0: def=defdefdef + 1: def + 2: def + *** Failers +No match + abc=defdef +No match + +/(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + cataract cataract23 + 0: cataract cataract23 + 1: cataract + 2: aract + 3: ract + 4: + 5: 3 + catatonic catatonic23 + 0: catatonic catatonic23 + 1: catatonic + 2: atonic + 3: tonic + 4: + 5: 3 + caterpillar caterpillar23 + 0: caterpillar caterpillar23 + 1: caterpillar + 2: erpillar + 3: <unset> + 4: + 5: 3 + + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + 1: abcd + +/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}[0-9]{1,2}\s+[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + 1: Sep + From abcd Mon Sep 1 12:33:02 1997 + 0: From abcd Mon Sep 1 12:33 + 1: Sep + *** Failers +No match + From abcd Sep 01 12:33:02 1997 +No match + +/^(a)\1{2,3}(.)/ + aaab + 0: aaab + 1: a + 2: b + aaaab + 0: aaaab + 1: a + 2: b + aaaaab + 0: aaaaa + 1: a + 2: a + aaaaaab + 0: aaaaa + 1: a + 2: a + +/^[ab]{1,3}(ab*|b)/ + aabbbbb + 0: aabbbbb + 1: abbbbb + +/^(cow|)\1(bell)/ + cowcowbell + 0: cowcowbell + 1: cow + 2: bell + bell + 0: bell + 1: + 2: bell + *** Failers +No match + cowbell +No match + +/^(a|)\1+b/ + aab + 0: aab + 1: a + aaaab + 0: aaaab + 1: a + b + 0: b + 1: + *** Failers +No match + ab +No match + +/^(a|)\1{2}b/ + aaab + 0: aaab + 1: a + b + 0: b + 1: + *** Failers +No match + ab +No match + aab +No match + aaaab +No match + +/^(a|)\1{2,3}b/ + aaab + 0: aaab + 1: a + aaaab + 0: aaaab + 1: a + b + 0: b + 1: + *** Failers +No match + ab +No match + aab +No match + aaaaab +No match + +/ab{1,3}bc/ + abbbbc + 0: abbbbc + abbbc + 0: abbbc + abbc + 0: abbc + *** Failers +No match + abc +No match + abbbbbc +No match + +/([^.]*)\.([^:]*):[T ]+(.*)/ + track1.title:TBlah blah blah + 0: track1.title:TBlah blah blah + 1: track1 + 2: title + 3: Blah blah blah + +/([^.]*)\.([^:]*):[T ]+(.*)/i + track1.title:TBlah blah blah + 0: track1.title:TBlah blah blah + 1: track1 + 2: title + 3: Blah blah blah + +/([^.]*)\.([^:]*):[t ]+(.*)/i + track1.title:TBlah blah blah + 0: track1.title:TBlah blah blah + 1: track1 + 2: title + 3: Blah blah blah + +/^abc$/ + abc + 0: abc + *** Failers +No match + +/[-az]+/ + az- + 0: az- + *** Failers + 0: a + b +No match + +/[az-]+/ + za- + 0: za- + *** Failers + 0: a + b +No match + +/[a-z]+/ + abcdxyz + 0: abcdxyz + +/[0-9-]+/ + 12-34 + 0: 12-34 + *** Failers +No match + aaa +No match + +/(abc)\1/i + abcabc + 0: abcabc + 1: abc + ABCabc + 0: ABCabc + 1: ABC + abcABC + 0: abcABC + 1: abc + +/a{0}bc/ + bc + 0: bc + +/^([^a])([^b])([^c]*)([^d]{3,4})/ + baNOTccccd + 0: baNOTcccc + 1: b + 2: a + 3: NOT + 4: cccc + baNOTcccd + 0: baNOTccc + 1: b + 2: a + 3: NOT + 4: ccc + baNOTccd + 0: baNOTcc + 1: b + 2: a + 3: NO + 4: Tcc + bacccd + 0: baccc + 1: b + 2: a + 3: + 4: ccc + *** Failers + 0: *** Failers + 1: * + 2: * + 3: * Fail + 4: ers + anything +No match + baccd +No match + +/[^a]/ + Abc + 0: A + +/[^a]/i + Abc + 0: b + +/[^a]+/ + AAAaAbc + 0: AAA + +/[^a]+/i + AAAaAbc + 0: bc + +/[^k]$/ + abc + 0: c + *** Failers + 0: s + abk +No match + +/[^k]{2,3}$/ + abc + 0: abc + kbc + 0: bc + kabc + 0: abc + *** Failers + 0: ers + abk +No match + akb +No match + akk +No match + +/^[0-9]{8,}@.+[^k]$/ + 12345678@a.b.c.d + 0: 12345678@a.b.c.d + 123456789@x.y.z + 0: 123456789@x.y.z + *** Failers +No match + 12345678@x.y.uk +No match + 1234567@a.b.c.d +No match + +/(a)\1{8,}/ + aaaaaaaaa + 0: aaaaaaaaa + 1: a + aaaaaaaaaa + 0: aaaaaaaaaa + 1: a + *** Failers +No match + aaaaaaa +No match + +/[^a]/ + aaaabcd + 0: b + aaAabcd + 0: A + +/[^a]/i + aaaabcd + 0: b + aaAabcd + 0: b + +/[^az]/ + aaaabcd + 0: b + aaAabcd + 0: A + +/[^az]/i + aaaabcd + 0: b + aaAabcd + 0: b + +/P[^*]TAIRE[^*]{1,6}LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/P[^*]TAIRE[^*]{1,}LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/(\.[0-9][0-9][1-9]?)[0-9]+/ + 1.230003938 + 0: .230003938 + 1: .23 + 1.875000282 + 0: .875000282 + 1: .875 + 1.235 + 0: .235 + 1: .23 + +/\b(foo)\s+(\w+)/i + Food is on the foo table + 0: foo table + 1: foo + 2: table + +/foo(.*)bar/ + The food is under the bar in the barn. + 0: food is under the bar in the bar + 1: d is under the bar in the + +/(.*)([0-9]*)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 53147 + 2: + +/(.*)([0-9]+)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: 7 + +/(.*)([0-9]+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: 7 + +/(.*)\b([0-9]+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: + 2: 53147 + +/(.*[^0-9])([0-9]+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: + 2: 53147 + +/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]][[:digit:]][[:digit:]]/ + 01/01/2000 + 0: 01/01/2000 + +/^(a){0,0}/ + bcd + 0: + abc + 0: + aab + 0: + +/^(a){0,1}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: a + 1: a + +/^(a){0,2}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + +/^(a){0,3}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + +/^(a){0,}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + aaaaaaaa + 0: aaaaaaaa + 1: a + +/^(a){1,1}/ + bcd +No match + abc + 0: a + 1: a + aab + 0: a + 1: a + +/^(a){1,2}/ + bcd +No match + abc + 0: a + 1: a + aab + 0: aa + 1: a + +/^(a){1,3}/ + bcd +No match + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + +/^(a){1,}/ + bcd +No match + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + aaaaaaaa + 0: aaaaaaaa + 1: a + +/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/ + 123456654321 + 0: 123456654321 + +/^[[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]]/ + 123456654321 + 0: 123456654321 + +/^[abc]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^[a-c]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^(a|b|c){12}/ + abcabcabcabc + 0: abcabcabcabc + 1: c + +/^[abcdefghijklmnopqrstuvwxy0123456789]/ + n + 0: n + *** Failers +No match + z +No match + +/abcde{0,0}/ + abcd + 0: abcd + *** Failers +No match + abce +No match + +/ab[cd]{0,0}e/ + abe + 0: abe + *** Failers +No match + abcde +No match + +/ab(c){0,0}d/ + abd + 0: abd + *** Failers +No match + abcd +No match + +/a(b*)/ + a + 0: a + 1: + ab + 0: ab + 1: b + abbbb + 0: abbbb + 1: bbbb + *** Failers + 0: a + 1: + bbbbb +No match + +/ab[0-9]{0}e/ + abe + 0: abe + *** Failers +No match + ab1e +No match + +/(A|B)*CD/ + CD + 0: CD + +/(AB)*\1/ + ABABAB + 0: ABABAB + 1: AB + +/([0-9]+)(\w)/ + 12345a + 0: 12345a + 1: 12345 + 2: a + 12345+ + 0: 12345 + 1: 1234 + 2: 5 + +/(abc|)+/ + abc + 0: abc + 1: abc + abcabc + 0: abcabc + 1: abc + abcabcabc + 0: abcabcabc + 1: abc + xyz + 0: + 1: + +/([a]*)*/ + a + 0: a + 1: a + aaaaa + 0: aaaaa + 1: aaaaa + +/([ab]*)*/ + a + 0: a + 1: a + b + 0: b + 1: b + ababab + 0: ababab + 1: ababab + aaaabcde + 0: aaaab + 1: aaaab + bbbb + 0: bbbb + 1: bbbb + +/([^a]*)*/ + b + 0: b + 1: b + bbbb + 0: bbbb + 1: bbbb + aaa + 0: + +/([^ab]*)*/ + cccc + 0: cccc + 1: cccc + abab + 0: + +/abc/ + abc + 0: abc + xabcy + 0: abc + ababc + 0: abc + *** Failers +No match + xbc +No match + axc +No match + abx +No match + +/ab*c/ + abc + 0: abc + +/ab*bc/ + abc + 0: abc + abbc + 0: abbc + abbbbc + 0: abbbbc + +/.{1}/ + abbbbc + 0: a + +/.{3,4}/ + abbbbc + 0: abbb + +/ab{0,}bc/ + abbbbc + 0: abbbbc + +/ab+bc/ + abbc + 0: abbc + *** Failers +No match + abc +No match + abq +No match + +/ab+bc/ + abbbbc + 0: abbbbc + +/ab{1,}bc/ + abbbbc + 0: abbbbc + +/ab{1,3}bc/ + abbbbc + 0: abbbbc + +/ab{3,4}bc/ + abbbbc + 0: abbbbc + +/ab{4,5}bc/ + *** Failers +No match + abq +No match + abbbbc +No match + +/ab?bc/ + abbc + 0: abbc + abc + 0: abc + +/ab{0,1}bc/ + abc + 0: abc + +/ab?c/ + abc + 0: abc + +/ab{0,1}c/ + abc + 0: abc + +/^abc$/ + abc + 0: abc + *** Failers +No match + abbbbc +No match + abcc +No match + +/^abc/ + abcc + 0: abc + +/abc$/ + aabc + 0: abc + *** Failers +No match + aabc + 0: abc + aabcd +No match + +/^/ + abc + 0: + +/$/ + abc + 0: + +/a.c/ + abc + 0: abc + axc + 0: axc + +/a.*c/ + axyzc + 0: axyzc + +/a[bc]d/ + abd + 0: abd + *** Failers +No match + axyzd +No match + abc +No match + +/a[b-d]e/ + ace + 0: ace + +/a[b-d]/ + aac + 0: ac + +/a[-b]/ + a- + 0: a- + +/a[b-]/ + a- + 0: a- + +/a[]]b/ + a]b + 0: a]b + +/a[^bc]d/ + aed + 0: aed + *** Failers +No match + abd +No match + abd +No match + +/a[^-b]c/ + adc + 0: adc + +/a[^]b]c/ + adc + 0: adc + *** Failers +No match + a-c + 0: a-c + a]c +No match + +/\ba\b/ + a- + 0: a + -a + 0: a + -a- + 0: a + +/\by\b/ + *** Failers +No match + xy +No match + yz +No match + xyz +No match + +/\Ba\B/ + *** Failers + 0: a + a- +No match + -a +No match + -a- +No match + +/\By\b/ + xy + 0: y + +/\by\B/ + yz + 0: y + +/\By\B/ + xyz + 0: y + +/\w/ + a + 0: a + +/\W/ + - + 0: - + *** Failers + 0: * + - + 0: - + a +No match + +/a\sb/ + a b + 0: a b + +/a\Sb/ + a-b + 0: a-b + *** Failers +No match + a-b + 0: a-b + a b +No match + +/[0-9]/ + 1 + 0: 1 + +/[^0-9]/ + - + 0: - + *** Failers + 0: * + - + 0: - + 1 +No match + +/ab|cd/ + abc + 0: ab + abcd + 0: ab + +/()ef/ + def + 0: ef + 1: + +/a\(b/ + a(b + 0: a(b + +/a\(*b/ + ab + 0: ab + a((b + 0: a((b + +/((a))/ + abc + 0: a + 1: a + 2: a + +/(a)b(c)/ + abc + 0: abc + 1: a + 2: c + +/a+b+c/ + aabbabc + 0: abc + +/a{1,}b{1,}c/ + aabbabc + 0: abc + +/(a+|b)*/ + ab + 0: ab + 1: b + +/(a+|b){0,}/ + ab + 0: ab + 1: b + +/(a+|b)+/ + ab + 0: ab + 1: b + +/(a+|b){1,}/ + ab + 0: ab + 1: b + +/(a+|b)?/ + ab + 0: a + 1: a + +/(a+|b){0,1}/ + ab + 0: a + 1: a + +/[^ab]*/ + cde + 0: cde + +/abc/ + *** Failers +No match + b +No match + + +/a*/ + + +/([abc])*d/ + abbbcd + 0: abbbcd + 1: c + +/([abc])*bcd/ + abcd + 0: abcd + 1: a + +/a|b|c|d|e/ + e + 0: e + +/(a|b|c|d|e)f/ + ef + 0: ef + 1: e + +/abcd*efg/ + abcdefg + 0: abcdefg + +/ab*/ + xabyabbbz + 0: ab + xayabbbz + 0: a + +/(ab|cd)e/ + abcde + 0: cde + 1: cd + +/[abhgefdc]ij/ + hij + 0: hij + +/(abc|)ef/ + abcdef + 0: ef + 1: + +/(a|b)c*d/ + abcd + 0: bcd + 1: b + +/(ab|ab*)bc/ + abc + 0: abc + 1: a + +/a([bc]*)c*/ + abc + 0: abc + 1: bc + +/a([bc]*)(c*d)/ + abcd + 0: abcd + 1: bc + 2: d + +/a([bc]+)(c*d)/ + abcd + 0: abcd + 1: bc + 2: d + +/a([bc]*)(c+d)/ + abcd + 0: abcd + 1: b + 2: cd + +/a[bcd]*dcdcde/ + adcdcde + 0: adcdcde + +/a[bcd]+dcdcde/ + *** Failers +No match + abcde +No match + adcdcde +No match + +/(ab|a)b*c/ + abc + 0: abc + 1: ab + +/((a)(b)c)(d)/ + abcd + 0: abcd + 1: abc + 2: a + 3: b + 4: d + +/[a-zA-Z_][a-zA-Z0-9_]*/ + alpha + 0: alpha + +/^a(bc+|b[eh])g|.h$/ + abh + 0: bh + +/(bc+d$|ef*g.|h?i(j|k))/ + effgz + 0: effgz + 1: effgz + ij + 0: ij + 1: ij + 2: j + reffgz + 0: effgz + 1: effgz + *** Failers +No match + effg +No match + bcdd +No match + +/((((((((((a))))))))))/ + a + 0: a + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a + +/((((((((((a))))))))))\9/ + aa + 0: aa + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a + +/(((((((((a)))))))))/ + a + 0: a + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a + +/multiple words of text/ + *** Failers +No match + aa +No match + uh-uh +No match + +/multiple words/ + multiple words, yeah + 0: multiple words + +/(.*)c(.*)/ + abcde + 0: abcde + 1: ab + 2: de + +/\((.*), (.*)\)/ + (a, b) + 0: (a, b) + 1: a + 2: b + +/abcd/ + abcd + 0: abcd + +/a(bc)d/ + abcd + 0: abcd + 1: bc + +/a[-]?c/ + ac + 0: ac + +/(abc)\1/ + abcabc + 0: abcabc + 1: abc + +/([a-c]*)\1/ + abcabc + 0: abcabc + 1: abc + +/(a)|\1/ + a + 0: a + 1: a + *** Failers + 0: a + 1: a + ab + 0: a + 1: a + x +No match + +/abc/i + ABC + 0: ABC + XABCY + 0: ABC + ABABC + 0: ABC + *** Failers +No match + aaxabxbaxbbx +No match + XBC +No match + AXC +No match + ABX +No match + +/ab*c/i + ABC + 0: ABC + +/ab*bc/i + ABC + 0: ABC + ABBC + 0: ABBC + +/ab+bc/i + *** Failers +No match + ABC +No match + ABQ +No match + +/ab+bc/i + ABBBBC + 0: ABBBBC + +/^abc$/i + ABC + 0: ABC + *** Failers +No match + ABBBBC +No match + ABCC +No match + +/^abc/i + ABCC + 0: ABC + +/abc$/i + AABC + 0: ABC + +/^/i + ABC + 0: + +/$/i + ABC + 0: + +/a.c/i + ABC + 0: ABC + AXC + 0: AXC + +/a.*c/i + *** Failers +No match + AABC + 0: AABC + AXYZD +No match + +/a[bc]d/i + ABD + 0: ABD + +/a[b-d]e/i + ACE + 0: ACE + *** Failers +No match + ABC +No match + ABD +No match + +/a[b-d]/i + AAC + 0: AC + +/a[-b]/i + A- + 0: A- + +/a[b-]/i + A- + 0: A- + +/a[]]b/i + A]B + 0: A]B + +/a[^bc]d/i + AED + 0: AED + +/a[^-b]c/i + ADC + 0: ADC + *** Failers +No match + ABD +No match + A-C +No match + +/a[^]b]c/i + ADC + 0: ADC + +/ab|cd/i + ABC + 0: AB + ABCD + 0: AB + +/()ef/i + DEF + 0: EF + 1: + +/$b/i + *** Failers +No match + A]C +No match + B +No match + +/a\(b/i + A(B + 0: A(B + +/a\(*b/i + AB + 0: AB + A((B + 0: A((B + +/((a))/i + ABC + 0: A + 1: A + 2: A + +/(a)b(c)/i + ABC + 0: ABC + 1: A + 2: C + +/a+b+c/i + AABBABC + 0: ABC + +/a{1,}b{1,}c/i + AABBABC + 0: ABC + +/(a+|b)*/i + AB + 0: AB + 1: B + +/(a+|b){0,}/i + AB + 0: AB + 1: B + +/(a+|b)+/i + AB + 0: AB + 1: B + +/(a+|b){1,}/i + AB + 0: AB + 1: B + +/(a+|b)?/i + AB + 0: A + 1: A + +/(a+|b){0,1}/i + AB + 0: A + 1: A + +/[^ab]*/i + CDE + 0: CDE + +/([abc])*d/i + ABBBCD + 0: ABBBCD + 1: C + +/([abc])*bcd/i + ABCD + 0: ABCD + 1: A + +/a|b|c|d|e/i + E + 0: E + +/(a|b|c|d|e)f/i + EF + 0: EF + 1: E + +/abcd*efg/i + ABCDEFG + 0: ABCDEFG + +/ab*/i + XABYABBBZ + 0: AB + XAYABBBZ + 0: A + +/(ab|cd)e/i + ABCDE + 0: CDE + 1: CD + +/[abhgefdc]ij/i + HIJ + 0: HIJ + +/^(ab|cd)e/i + ABCDE +No match + +/(abc|)ef/i + ABCDEF + 0: EF + 1: + +/(a|b)c*d/i + ABCD + 0: BCD + 1: B + +/(ab|ab*)bc/i + ABC + 0: ABC + 1: A + +/a([bc]*)c*/i + ABC + 0: ABC + 1: BC + +/a([bc]*)(c*d)/i + ABCD + 0: ABCD + 1: BC + 2: D + +/a([bc]+)(c*d)/i + ABCD + 0: ABCD + 1: BC + 2: D + +/a([bc]*)(c+d)/i + ABCD + 0: ABCD + 1: B + 2: CD + +/a[bcd]*dcdcde/i + ADCDCDE + 0: ADCDCDE + +/a[bcd]+dcdcde/i + +/(ab|a)b*c/i + ABC + 0: ABC + 1: AB + +/((a)(b)c)(d)/i + ABCD + 0: ABCD + 1: ABC + 2: A + 3: B + 4: D + +/[a-zA-Z_][a-zA-Z0-9_]*/i + ALPHA + 0: ALPHA + +/^a(bc+|b[eh])g|.h$/i + ABH + 0: BH + +/(bc+d$|ef*g.|h?i(j|k))/i + EFFGZ + 0: EFFGZ + 1: EFFGZ + IJ + 0: IJ + 1: IJ + 2: J + REFFGZ + 0: EFFGZ + 1: EFFGZ + *** Failers +No match + ADCDCDE +No match + EFFG +No match + BCDD +No match + +/((((((((((a))))))))))/i + A + 0: A + 1: A + 2: A + 3: A + 4: A + 5: A + 6: A + 7: A + 8: A + 9: A +10: A + +/((((((((((a))))))))))\9/i + AA + 0: AA + 1: A + 2: A + 3: A + 4: A + 5: A + 6: A + 7: A + 8: A + 9: A +10: A + +/(((((((((a)))))))))/i + A + 0: A + 1: A + 2: A + 3: A + 4: A + 5: A + 6: A + 7: A + 8: A + 9: A + +/multiple words of text/i + *** Failers +No match + AA +No match + UH-UH +No match + +/multiple words/i + MULTIPLE WORDS, YEAH + 0: MULTIPLE WORDS + +/(.*)c(.*)/i + ABCDE + 0: ABCDE + 1: AB + 2: DE + +/\((.*), (.*)\)/i + (A, B) + 0: (A, B) + 1: A + 2: B + +/abcd/i + ABCD + 0: ABCD + +/a(bc)d/i + ABCD + 0: ABCD + 1: BC + +/a[-]?c/i + AC + 0: AC + +/(abc)\1/i + ABCABC + 0: ABCABC + 1: ABC + +/([a-c]*)\1/i + ABCABC + 0: ABCABC + 1: ABC + +/((foo)|(bar))*/ + foobar + 0: foobar + 1: bar + 2: foo + 3: bar + +/^(.+)?B/ + AB + 0: AB + 1: A + +/^([^a-z])|(\^)$/ + . + 0: . + 1: . + +/^[<>]&/ + <&OUT + 0: <& + +/^(){3,5}/ + abc + 0: + 1: + +/^(a+)*ax/ + aax + 0: aax + 1: a + +/^((a|b)+)*ax/ + aax + 0: aax + 1: a + 2: a + +/^((a|bc)+)*ax/ + aax + 0: aax + 1: a + 2: a + +/(a|x)*ab/ + cab + 0: ab + +/(a)*ab/ + cab + 0: ab + +/(ab)[0-9]\1/i + Ab4ab + 0: Ab4ab + 1: Ab + ab4Ab + 0: ab4Ab + 1: ab + +/foo\w*[0-9]{4}baz/ + foobar1234baz + 0: foobar1234baz + +/(\w+:)+/ + one: + 0: one: + 1: one: + +/((\w|:)+::)?(\w+)$/ + abcd + 0: abcd + 1: <unset> + 2: <unset> + 3: abcd + xy:z:::abcd + 0: xy:z:::abcd + 1: xy:z::: + 2: : + 3: abcd + +/^[^bcd]*(c+)/ + aexycd + 0: aexyc + 1: c + +/(a*)b+/ + caab + 0: aab + 1: aa + +/((\w|:)+::)?(\w+)$/ + abcd + 0: abcd + 1: <unset> + 2: <unset> + 3: abcd + xy:z:::abcd + 0: xy:z:::abcd + 1: xy:z::: + 2: : + 3: abcd + *** Failers + 0: Failers + 1: <unset> + 2: <unset> + 3: Failers + abcd: +No match + abcd: +No match + +/^[^bcd]*(c+)/ + aexycd + 0: aexyc + 1: c + +/((Z)+|A)*/ + ZABCDEFG + 0: ZA + 1: A + 2: Z + +/(Z()|A)*/ + ZABCDEFG + 0: ZA + 1: A + 2: + +/(Z(())|A)*/ + ZABCDEFG + 0: ZA + 1: A + 2: + 3: + +/(.*)[0-9]+\1/ + abc123abc + 0: abc123abc + 1: abc + abc123bc + 0: bc123bc + 1: bc + +/((.*))[0-9]+\1/ + abc123abc + 0: abc123abc + 1: abc + 2: abc + abc123bc + 0: bc123bc + 1: bc + 2: bc + +/^a{2,5}$/ + aa + 0: aa + aaa + 0: aaa + aaaa + 0: aaaa + aaaaa + 0: aaaaa + *** Failers +No match + a +No match + b +No match + aaaaab +No match + aaaaaa diff --git a/testsuite/SPENCER.tests b/testsuite/SPENCER.tests new file mode 100644 index 0000000..b84a270 --- /dev/null +++ b/testsuite/SPENCER.tests @@ -0,0 +1,538 @@ +# regular expression test set +# Lines are at least three fields, separated by one or more tabs. "" stands +# for an empty field. First field is an RE. Second field is flags. If +# C flag given, regcomp() is expected to fail, and the third field is the +# error name (minus the leading REG_). +# +# Otherwise it is expected to succeed, and the third field is the string to +# try matching it against. If there is no fourth field, the match is +# expected to fail. If there is a fourth field, it is the substring that +# the RE is expected to match. If there is a fifth field, it is a comma- +# separated list of what the subexpressions should match, with - indicating +# no match for that one. In both the fourth and fifth fields, a (sub)field +# starting with @ indicates that the (sub)expression is expected to match +# a null string followed by the stuff after the @; this provides a way to +# test where null strings match. The character `N' in REs and strings +# is newline, `S' is space, `T' is tab, `Z' is NUL. +# +# The full list of flags: +# - placeholder, does nothing +# b RE is a BRE, not an ERE +# & try it as both an ERE and a BRE +# C regcomp() error expected, third field is error name +# i REG_ICASE +# m ("mundane") REG_NOSPEC +# s REG_NOSUB (not really testable) +# n REG_NEWLINE +# ^ REG_NOTBOL +# $ REG_NOTEOL +# # REG_STARTEND (see below) +# p REG_PEND +# +# For REG_STARTEND, the start/end offsets are those of the substring +# enclosed in (). + +# basics +a & a a +abc & abc abc +abc|de - abc abc +a|b|c - abc a + +# parentheses and perversions thereof +a(b)c - abc abc +a\(b\)c b abc abc +a( C EPAREN +a( b a( a( +a\( - a( a( +a\( bC EPAREN +a\(b bC EPAREN +a(b C EPAREN +a(b b a(b a(b +# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) +a) - a) a) +) - ) ) +# end gagging (in a just world, those *should* give EPAREN) +a) b a) a) +a\) bC EPAREN +\) bC EPAREN +a()b - ab ab +a\(\)b b ab ab + +# anchoring and REG_NEWLINE +^abc$ & abc abc +a^b - a^b +a^b b a^b a^b +a$b - a$b +a$b b a$b a$b +^ & abc @abc +$ & abc @ +^$ & "" @ +$^ - "" @ +\($\)\(^\) b "" @ +# stop retching, those are legitimate (although disgusting) +^^ - "" @ +$$ - "" @ +b$ & abNc +b$ &n abNc b +^b$ & aNbNc +^b$ &n aNbNc b +^$ &n aNNb @Nb +^$ n abc +^$ n abcN @ +$^ n aNNb @Nb +\($\)\(^\) bn aNNb @Nb +^^ n^ aNNb @Nb +$$ n aNNb @NN +^a ^ a +a$ $ a +^a ^n aNb +^b ^n aNb b +a$ $n bNa +b$ $n bNa b +a*(^b$)c* - b b +a*\(^b$\)c* b b b + +# certain syntax errors and non-errors +| C EMPTY +| b | | +* C BADRPT +* b * * ++ C BADRPT +? C BADRPT +"" &C EMPTY +() - abc @abc +\(\) b abc @abc +a||b C EMPTY +|ab C EMPTY +ab| C EMPTY +(|a)b C EMPTY +(a|)b C EMPTY +(*a) C BADRPT +(+a) C BADRPT +(?a) C BADRPT +({1}a) C BADRPT +\(\{1\}a\) bC BADRPT +(a|*b) C BADRPT +(a|+b) C BADRPT +(a|?b) C BADRPT +(a|{1}b) C BADRPT +^* C BADRPT +^* b * * +^+ C BADRPT +^? C BADRPT +^{1} C BADRPT +^\{1\} bC BADRPT + +# metacharacters, backslashes +a.c & abc abc +a[bc]d & abd abd +a\*c & a*c a*c +a\\b & a\b a\b +a\\\*b & a\*b a\*b +# The following test is wrong. Using \b in an BRE or ERE is undefined. +# a\bc & abc abc +a\ &C EESCAPE +a\\bc & a\bc a\bc +\{ bC BADRPT +a\[b & a[b a[b +a[b &C EBRACK +# trailing $ is a peculiar special case for the BRE code +a$ & a a +a$ & a$ +a\$ & a +a\$ & a$ a$ +a\\$ & a +a\\$ & a$ +a\\$ & a\$ +a\\$ & a\ a\ + +# back references, ugh +a\(b\)\2c bC ESUBREG +a\(b\1\)c bC ESUBREG +a\(b*\)c\1d b abbcbbd abbcbbd bb +a\(b*\)c\1d b abbcbd +a\(b*\)c\1d b abbcbbbd +^\(.\)\1 b abc +a\([bc]\)\1d b abcdabbd abbd b +a\(\([bc]\)\2\)*d b abbccd abbccd +a\(\([bc]\)\2\)*d b abbcbd +# actually, this next one probably ought to fail, but the spec is unclear +a\(\(b\)*\2\)*d b abbbd abbbd +# here is a case that no NFA implementation does right +\(ab*\)[ab]*\1 b ababaaa ababaaa a +# check out normal matching in the presence of back refs +\(a\)\1bcd b aabcd aabcd +\(a\)\1bc*d b aabcd aabcd +\(a\)\1bc*d b aabd aabd +\(a\)\1bc*d b aabcccd aabcccd +\(a\)\1bc*[ce]d b aabcccd aabcccd +^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd + +# ordinary repetitions +ab*c & abc abc +ab+c - abc abc +ab?c - abc abc +a\(*\)b b a*b a*b +a\(**\)b b ab ab +a\(***\)b bC BADRPT +*a b *a *a +**a b a a +***a bC BADRPT + +# the dreaded bounded repetitions +# The following two tests are not correct: +#{ & { { +#{abc & {abc {abc +# '{' is always a special char outside bracket expressions. So test ony BRE: +{ b { { +{abc b {abc {abc +{1 C BADRPT +{1} C BADRPT +# Same reason as for the two tests above: +#a{b & a{b a{b +a{b b a{b a{b +a{1}b - ab ab +a\{1\}b b ab ab +a{1,}b - ab ab +a\{1,\}b b ab ab +a{1,2}b - aab aab +a\{1,2\}b b aab aab +a{1 C EBRACE +a\{1 bC EBRACE +a{1a C EBRACE +a\{1a bC EBRACE +a{1a} C BADBR +a\{1a\} bC BADBR +# These four tests checks for undefined behavior. Our implementation does +# something different. +#a{,2} - a{,2} a{,2} +#a\{,2\} bC BADBR +#a{,} - a{,} a{,} +#a\{,\} bC BADBR +a{1,x} C BADBR +a\{1,x\} bC BADBR +a{1,x C EBRACE +a\{1,x bC EBRACE +# These two tests probably fails due to an arbitrary limit on the number of +# repetitions in the other implementation. +#a{300} C BADBR +#a\{300\} bC BADBR +a{1,0} C BADBR +a\{1,0\} bC BADBR +ab{0,0}c - abcac ac +ab\{0,0\}c b abcac ac +ab{0,1}c - abcac abc +ab\{0,1\}c b abcac abc +ab{0,3}c - abbcac abbc +ab\{0,3\}c b abbcac abbc +ab{1,1}c - acabc abc +ab\{1,1\}c b acabc abc +ab{1,3}c - acabc abc +ab\{1,3\}c b acabc abc +ab{2,2}c - abcabbc abbc +ab\{2,2\}c b abcabbc abbc +ab{2,4}c - abcabbc abbc +ab\{2,4\}c b abcabbc abbc +((a{1,10}){1,10}){1,10} - a a a,a + +# multiple repetitions +# Wow, there is serious disconnect here. The ERE grammar is like this: +# ERE_expression : one_char_or_coll_elem_ERE +# | '^' +# | '$' +# | '(' extended_reg_exp ')' +# | ERE_expression ERE_dupl_symbol +# ; +# where ERE_dupl_symbol is any of the repetition methods. It is clear from +# this that consecutive repetition is OK. On top of this, the one test not +# marked as failing must fail. For BREs the situation is different, so we +# use the four tests. +#a** &C BADRPT +a** bC BADRPT +#a++ C BADRPT +#a?? C BADRPT +#a*+ C BADRPT +#a*? C BADRPT +#a+* C BADRPT +#a+? C BADRPT +#a?* C BADRPT +#a?+ C BADRPT +#a{1}{1} C BADRPT +#a*{1} C BADRPT +#a+{1} C BADRPT +#a?{1} C BADRPT +#a{1}* C BADRPT +#a{1}+ C BADRPT +#a{1}? C BADRPT +#a*{b} - a{b} a{b} +a\{1\}\{1\} bC BADRPT +a*\{1\} bC BADRPT +a\{1\}* bC BADRPT + +# brackets, and numerous perversions thereof +a[b]c & abc abc +a[ab]c & abc abc +a[^ab]c & adc adc +a[]b]c & a]c a]c +a[[b]c & a[c a[c +a[-b]c & a-c a-c +a[^]b]c & adc adc +a[^-b]c & adc adc +a[b-]c & a-c a-c +a[b &C EBRACK +a[] &C EBRACK +a[1-3]c & a2c a2c +a[3-1]c &C ERANGE +a[1-3-5]c &C ERANGE +a[[.-.]--]c & a-c a-c +# I don't thing the error value should be ERANGE since a[1-] would be +# valid, too. Expect EBRACK. +#a[1- &C ERANGE +a[1- &C EBRACK +a[[. &C EBRACK +a[[.x &C EBRACK +a[[.x. &C EBRACK +a[[.x.] &C EBRACK +a[[.x.]] & ax ax +a[[.x,.]] &C ECOLLATE +# This test is invalid. "one" is no collating symbol in any standardized +# locale. +# a[[.one.]]b & a1b a1b +a[[.notdef.]]b &C ECOLLATE +a[[.].]]b & a]b a]b +a[[:alpha:]]c & abc abc +a[[:notdef:]]c &C ECTYPE +a[[: &C EBRACK +a[[:alpha &C EBRACK +a[[:alpha:] &C EBRACK +a[[:alpha,:] &C ECTYPE +a[[:]:]]b &C ECTYPE +a[[:-:]]b &C ECTYPE +a[[:alph:]] &C ECTYPE +a[[:alphabet:]] &C ECTYPE +[[:alnum:]]+ - -%@a0X- a0X +[[:alpha:]]+ - -%@aX0- aX +[[:blank:]]+ - aSSTb SST +[[:cntrl:]]+ - aNTb NT +[[:digit:]]+ - a019b 019 +[[:graph:]]+ - Sa%bS a%b +[[:lower:]]+ - AabC ab +[[:print:]]+ - NaSbN aSb +[[:punct:]]+ - S%-&T %-& +[[:space:]]+ - aSNTb SNT +[[:upper:]]+ - aBCd BC +[[:xdigit:]]+ - p0f3Cq 0f3C +a[[=b=]]c & abc abc +a[[= &C EBRACK +a[[=b &C EBRACK +a[[=b= &C EBRACK +a[[=b=] &C EBRACK +a[[=b,=]] &C ECOLLATE +# This test is invalid. "one" is no collating symbol in any standardized +# locale. +#a[[=one=]]b & a1b a1b + +# complexities +a(((b)))c - abc abc +a(b|(c))d - abd abd +a(b*|c)d - abbd abbd +# just gotta have one DFA-buster, of course +a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and an inline expansion in case somebody gets tricky +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab +# and in case somebody just slips in an NFA... +a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights +# fish for anomalies as the number of states passes 32 +12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 +123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 +1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 +12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 +123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 +# and one really big one, beyond any plausible word width +1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 +# fish for problems as brackets go past 8 +[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm +[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo +[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq +[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq + +# subtleties of matching +abc & xabcy abc +a\(b\)?c\1d b acd +aBc i Abc Abc +a[Bc]*d i abBCcd abBCcd +0[[:upper:]]1 &i 0a1 0a1 +0[[:lower:]]1 &i 0A1 0A1 +a[^b]c &i abc +a[^b]c &i aBc +a[^b]c &i adc adc +[a]b[c] - abc abc +[a]b[a] - aba aba +[abc]b[abc] - abc abc +[abc]b[abd] - abd abd +a(b?c)+d - accd accd +(wee|week)(knights|night) - weeknights weeknights +(we|wee|week|frob)(knights|night|day) - weeknights weeknights +a[bc]d - xyzaaabcaababdacd abd +a[ab]c - aaabc abc +abc s abc abc +() s abc @abc +a* & b @b + +# Let's have some fun -- try to match a C comment. +# first the obvious, which looks okay at first glance... +/\*.*\*/ - /*x*/ /*x*/ +# but... +/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ +# okay, we must not match */ inside; try to do that... +/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ +/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ +# but... +/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ +# and a still fancier version, which does it right (I think)... +/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ +/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ +/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ +/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ + +# subexpressions +.* - abc abc - +a(b)(c)d - abcd abcd b,c +a(((b)))c - abc abc b,b,b +a(b|(c))d - abd abd b,- +a(b*|c|e)d - abbd abbd bb +a(b*|c|e)d - acd acd c +a(b*|c|e)d - ad ad @d +a(b?)c - abc abc b +a(b?)c - ac ac @c +a(b+)c - abc abc b +a(b+)c - abbbc abbbc bbb +a(b*)c - ac ac @c +(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de +# the regression tester only asks for 9 subexpressions +a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j +a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k +a([bc]?)c - abc abc b +a([bc]?)c - ac ac @c +a([bc]+)c - abc abc b +a([bc]+)c - abcc abcc bc +a([bc]+)bc - abcbc abcbc bc +a(bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abb abb b +a(bbb+|bb+|b)b - abbb abbb bb +a(bbb+|bb+|b)bb - abbb abbb b +(.*).* - abcdef abcdef abcdef +(a*)* - bc @b @b + +# do we get the right subexpression when it is used more than once? +a(b|c)*d - ad ad - +a(b|c)*d - abcd abcd c +a(b|c)+d - abd abd b +a(b|c)+d - abcd abcd c +a(b|c?)+d - ad ad @d +a(b|c?)+d - abcd abcd c +a(b|c){0,0}d - ad ad - +a(b|c){0,1}d - ad ad - +a(b|c){0,1}d - abd abd b +a(b|c){0,2}d - ad ad - +a(b|c){0,2}d - abcd abcd c +a(b|c){0,}d - ad ad - +a(b|c){0,}d - abcd abcd c +a(b|c){1,1}d - abd abd b +a(b|c){1,1}d - acd acd c +a(b|c){1,2}d - abd abd b +a(b|c){1,2}d - abcd abcd c +a(b|c){1,}d - abd abd b +a(b|c){1,}d - abcd abcd c +a(b|c){2,2}d - acbd acbd b +a(b|c){2,2}d - abcd abcd c +a(b|c){2,4}d - abcd abcd c +a(b|c){2,4}d - abcbd abcbd b +a(b|c){2,4}d - abcbcd abcbcd c +a(b|c){2,}d - abcd abcd c +a(b|c){2,}d - abcbd abcbd b +a(b+|((c)*))+d - abd abd b,-,- +a(b+|((c)*))+d - abcd abcd c,c,c + +# check out the STARTEND option +[abc] &# a(b)c b +[abc] &# a(d)c +[abc] &# a(bc)d b +[abc] &# a(dc)d c +. &# a()c +b.*c &# b(bc)c bc +b.* &# b(bc)c bc +.*c &# b(bc)c bc + +# plain strings, with the NOSPEC flag +abc m abc abc +abc m xabcy abc +abc m xyz +a*b m aba*b a*b +a*b m ab +"" mC EMPTY + +# cases involving NULs +aZb & a a +aZb &p a +aZb &p# (aZb) aZb +aZ*b &p# (ab) ab +a.b &# (aZb) aZb +a.* &# (aZb)c aZb + +# word boundaries (ick) +[[:<:]]a & a a +[[:<:]]a & ba +[[:<:]]a & -a a +a[[:>:]] & a a +a[[:>:]] & ab +a[[:>:]] & a- a +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc +[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc +[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc +[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc +[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ +[[:<:]]a_b[[:>:]] & x_a_b + +# past problems, and suspected problems +(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 +abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop +abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv +(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 +CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 +Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz +a?b - ab ab +-\{0,1\}[0-9]*$ b -5 -5 +a*a*a*a*a*a*a* & aaaaaa aaaaaa +(\b){0} - x @x - +\(\b\)\{0,0\} b abc @abc - +a(\b){0}c - ac ac - +a(.*)b(\1){0}c - abc abc @bc,- +a(.*)b(\1){0}c - axbc axbc x,- + +a\(\(b*\)\)c\1d b abbcbbd abbcbbd bb,bb +a\(\([bc]\)\)\2d b abcdabbd abbd b,b +a\(\(\(\([bc]\)\)\3\)\)*d b abbccd abbccd cc,cc,c,c +a(b)(c)d - abcd abcd b,c +a(((b)))c - abc abc b,b,b +a(((b|(((c))))))d - abd abd b,b,b,-,-,- +a(((b*|c|e)))d - abbd abbd bb,bb,bb +a((b|c)){0,0}d - ad ad -,- +a((b|c)){0,1}d - abd abd b,b +a((b|c)){0,2}d - abcd abcd c,c +a((b+|((c)*)))+d - abd abd b,b,-,- +a((b+|((c)*)))+d - abcd abcd c,c,c,c +(((\b))){0} - x @x -,-,- +a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,- +a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,- + +\b & SaT @aT +\b & aT @aT +a.*\b & abT ab +\b & STSS +\B & abc @bc +\B & aSbTc +\B & SaT @SaT +\B & aSTSb @TSb diff --git a/testsuite/allsub.good b/testsuite/allsub.good new file mode 100644 index 0000000..234e159 --- /dev/null +++ b/testsuite/allsub.good @@ -0,0 +1 @@ +bar bar fo oo f oo bar bar bar bar bar bar bar bar bar bar bar bar bar diff --git a/testsuite/allsub.inp b/testsuite/allsub.inp new file mode 100644 index 0000000..f75655f --- /dev/null +++ b/testsuite/allsub.inp @@ -0,0 +1 @@ +foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo diff --git a/testsuite/allsub.sed b/testsuite/allsub.sed new file mode 100644 index 0000000..8aa29c1 --- /dev/null +++ b/testsuite/allsub.sed @@ -0,0 +1 @@ +s/foo/bar/g diff --git a/testsuite/appquit.good b/testsuite/appquit.good new file mode 100644 index 0000000..0742c8e --- /dev/null +++ b/testsuite/appquit.good @@ -0,0 +1,2 @@ +doh +ok diff --git a/testsuite/appquit.inp b/testsuite/appquit.inp new file mode 100644 index 0000000..a2300c9 --- /dev/null +++ b/testsuite/appquit.inp @@ -0,0 +1 @@ +doh diff --git a/testsuite/appquit.sed b/testsuite/appquit.sed new file mode 100644 index 0000000..fc11774 --- /dev/null +++ b/testsuite/appquit.sed @@ -0,0 +1,4 @@ +# Test appending quit +a\ +ok +q diff --git a/testsuite/binary.good b/testsuite/binary.good new file mode 100644 index 0000000..788024d --- /dev/null +++ b/testsuite/binary.good @@ -0,0 +1,8 @@ +192 +168 +1 +0 +192 +168 +1 +255 diff --git a/testsuite/binary.inp b/testsuite/binary.inp new file mode 100644 index 0000000..06bf77c --- /dev/null +++ b/testsuite/binary.inp @@ -0,0 +1,4 @@ +192.168.1.2 br b8<r b16<r b24< R|R|R| D +255.255.255.0 br b8<r b16<r b24< R|R|R| D~r +& DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +| DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP diff --git a/testsuite/binary.sed b/testsuite/binary.sed new file mode 100644 index 0000000..92a6a06 --- /dev/null +++ b/testsuite/binary.sed @@ -0,0 +1,189 @@ +# A kind of clone of dc geared towards binary operations. +# by Paolo Bonzini +# +# commands available: +# conversion commands +# b convert decimal to binary +# d convert binary to decimal +# +# arithmetic commands +# < shift left binary by decimal number of bits (11 3< gives 11000) +# > shift right binary by decimal number of bits (1011 2> gives 10) +# & binary AND (between two binary operands) +# | binary OR (between two binary operands) +# ^ binary XOR (between two binary operands) +# ~ binary NOT (between one binary operand) +# +# stack manipulation commands +# c clear stack +# P pop stack top +# D duplicate stack top +# x exchange top two elements +# r rotate stack counter-clockwise (second element becomes first) +# R rotate stack clockwise (last element becomes first) +# +# other commands +# l print stack (stack top is first) +# p print stack top +# q quit, print stack top if any (cq is quiet quit) +# +# The only shortcoming is that you'd better not attempt conversions of +# values above 1000 or so. +# +# This version does everything in pattern space (a la dc.sed). +# -------------------------------------------------------------------------- +# This was actually used in a one-disk distribution of Linux to compute +# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes +# 255.255.255.0; 2 parameters => given host address and netmask compute +# network and broadcast addresses): +# +# if [ $# = 1 ]; then +# OUTPUT='$1.$2.$3.$4' +# set 255.255.255.255 $1 +# else +# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8' +# fi +# +# if [ `expr $2 : ".*\\."` -gt 0 ]; then +# MASK="$2 br b8<r b16<r b24< R|R|R|" +# else +# MASK="$2b 31b ^d D +# 11111111111111111111111111111111 x>1> x<1<" +# fi +# +# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address +# $MASK D ~r # Load mask +# +# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# " | sed -f binary.sed` +# +# eval echo $OUTPUT +# -------------------------------------------------------------------------- + + +1s/^/%%/ + +:cmd +s/\(.*%%\) *\([0-9][0-9]*\)/\2\ +\1/ +tcmd +s/%% *#.*/%%/ +/%%$/ { + $b quit + N +} + +/^.*%%D/ s/^[^\n]*\n/&&/ +/^.*%%P/ s/^[^\n]*\n// +/^.*%%x/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/ +/^.*%%r/ s/^\([^\n]*\n\)\([^%]*\)/\2\1/ +/^.*%%R/ s/^\([^%]*\n\)\([^\n]*\n\)/\2\1/ +/^.*%%c/ s/^.*%%/%%/ +/^.*%%p/ P + +/^.*%%l/ { + h + s/.%%.*// + p + g +} + +/^.*%%q/ { + :quit + /^%%/!P + d +} + +/^.*%%b/ { + # Decimal to binary via analog form + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :d2bloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t d2bloop1 + s/-;9876543210aaaaaaaaa/;a01!/ + :d2bloop2 + s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/ + /^a/b d2bloop2 + s/[^!]*!// +} + +/^.*%%d/ { + # Binary to decimal via analog form + s/^\([^\n]*\)/-&;10a/ + :b2dloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/ + t b2dloop1 + s/-;10a/;aaaaaaaaa0123456789!/ + :b2dloop2 + s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/ + /^a/b b2dloop2 + s/[^!]*!// +} + +/^.*%%&/ { + # Binary AND + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/ + :andloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t andloop + s/^0*\([^-]*\)-[^\n]*/\1/ + s/^\n/0&/ +} + +/^.*%%^/ { + # Binary XOR + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/ + b orloop +} + +/^.*%%|/ { + # Binary OR + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/ + :orloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t orloop + s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/ +} + +/^.*%%~/ { + # Binary NOT + s/^\(.\)\([^\n]*\n\)/\1-010-\2/ + :notloop + s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/ + t notloop + + # If result is 00001..., \3 does not match (it looks for -10) and we just + # remove the table and leading zeros. If result is 0000...0, \3 matches + # (it looks for -0), \4 is a zero and we leave a lone zero as top of the + # stack. + + s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/ +} + +/^.*%%</ { + # Left shift, convert to analog and add a binary digit for each analog digit + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :lshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t lshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + s/a/0/g +} + +/^.*%%>/ { + # Right shift, convert to analog and remove a binary digit for each analog digit + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :rshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t rshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + :rshloop2 + s/.a// + s/^aa*/0/ + /a\n/b rshloop2 +} + + +s/%%./%%/ +tcmd diff --git a/testsuite/binary2.sed b/testsuite/binary2.sed new file mode 100644 index 0000000..daf7706 --- /dev/null +++ b/testsuite/binary2.sed @@ -0,0 +1,226 @@ +# A kind of clone of dc geared towards binary operations. +# by Paolo Bonzini +# +# commands available: +# conversion commands +# b convert decimal to binary +# d convert binary to decimal +# +# arithmetic commands +# < shift left binary by decimal number of bits (11 3< gives 11000) +# > shift right binary by decimal number of bits (1011 2> gives 10) +# & binary AND (between two binary operands) +# | binary OR (between two binary operands) +# ^ binary XOR (between two binary operands) +# ~ binary NOT (between one binary operand) +# +# stack manipulation commands +# c clear stack +# P pop stack top +# D duplicate stack top +# x exchange top two elements +# r rotate stack counter-clockwise (second element becomes first) +# R rotate stack clockwise (last element becomes first) +# +# other commands +# l print stack (stack top is first) +# p print stack top +# q quit, print stack top if any (cq is quiet quit) +# +# The only shortcoming is that you'd better not attempt conversions of +# values above 1000 or so. +# +# This version keeps the stack in hold space and the command in pattern +# space; it is the fastest one (though the gap with binary3.sed is small). +# -------------------------------------------------------------------------- +# This was actually used in a one-disk distribution of Linux to compute +# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes +# 255.255.255.0; 2 parameters => given host address and netmask compute +# network and broadcast addresses): +# +# if [ $# = 1 ]; then +# OUTPUT='$1.$2.$3.$4' +# set 255.255.255.255 $1 +# else +# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8' +# fi +# +# if [ `expr $2 : ".*\\."` -gt 0 ]; then +# MASK="$2 br b8<r b16<r b24< R|R|R|" +# else +# MASK="$2b 31b ^d D +# 11111111111111111111111111111111 x>1> x<1<" +# fi +# +# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address +# $MASK D ~r # Load mask +# +# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# " | sed -f binary.sed` +# +# eval echo $OUTPUT +# -------------------------------------------------------------------------- + +:cmd +s/^[\n\t ]*// +s/^#.*// +/^$/ { + $b quit + N + t cmd +} +/^[0-9][0-9]*/ { + G + h + s/^[0-9][0-9]* *\([^\n]*\).*/\1/ + x + s/^\([0-9][0-9]*\)[^\n]*/\1/ + x + t cmd +} + +/^[^DPxrRcplqbd&|^~<>]/b bad + +/^D/ { + x + s/^[^\n]*\n/&&/ +} +/^P/ { + x + s/^[^\n]*\n// +} +/^x/ { + x + s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/ +} +/^r/ { + x + s/^\([^\n]*\n\)\(.*\)/\2\1/ +} +/^R/ { + x + s/^\(.*\n\)\([^\n]*\n\)/\2\1/ +} +/^c/ { + x + s/.*// +} +/^p/ { + x + P +} + +/^l/ { + x + p +} + +/^q/ { + :quit + x + /./P + d +} + +/^b/ { + # Decimal to binary via analog form + x + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :d2bloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t d2bloop1 + s/-;9876543210aaaaaaaaa/;a01!/ + :d2bloop2 + s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/ + /^a/b d2bloop2 + s/[^!]*!// +} + +/^d/ { + # Binary to decimal via analog form + x + s/^\([^\n]*\)/-&;10a/ + :b2dloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/ + t b2dloop1 + s/-;10a/;aaaaaaaaa0123456789!/ + :b2dloop2 + s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/ + /^a/b b2dloop2 + s/[^!]*!// +} + +/^&/ { + # Binary AND + x + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/ + :andloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t andloop + s/^0*\([^-]*\)-[^\n]*/\1/ + s/^\n/0&/ +} + +/^\^/ { + # Binary XOR + x + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/ + b orloop +} + +/^|/ { + # Binary OR + x + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/ + :orloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t orloop + s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/ +} + +/^~/ { + # Binary NOT + x + s/^\(.\)\([^\n]*\n\)/\1-010-\2/ + :notloop + s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/ + t notloop + + # If result is 00001..., \3 does not match (it looks for -10) and we just + # remove the table and leading zeros. If result is 0000...0, \3 matches + # (it looks for -0), \4 is a zero and we leave a lone zero as top of the + # stack. + + s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/ +} + +/^</ { + # Left shift, convert to analog and add a binary digit for each analog digit + x + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :lshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t lshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + s/a/0/g +} + +/^>/ { + # Right shift, convert to analog and remove a binary digit for each analog digit + x + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :rshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t rshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + :rshloop2 + s/.a// + s/^aa*/0/ + /a\n/b rshloop2 +} + +x +:bad +s/^.// +tcmd diff --git a/testsuite/binary3.sed b/testsuite/binary3.sed new file mode 100644 index 0000000..b877f14 --- /dev/null +++ b/testsuite/binary3.sed @@ -0,0 +1,204 @@ +# A kind of clone of dc geared towards binary operations. +# by Paolo Bonzini +# +# commands available: +# conversion commands +# b convert decimal to binary +# d convert binary to decimal +# +# arithmetic commands +# < shift left binary by decimal number of bits (11 3< gives 11000) +# > shift right binary by decimal number of bits (1011 2> gives 10) +# & binary AND (between two binary operands) +# | binary OR (between two binary operands) +# ^ binary XOR (between two binary operands) +# ~ binary NOT (between one binary operand) +# +# stack manipulation commands +# c clear stack +# P pop stack top +# D duplicate stack top +# x exchange top two elements +# r rotate stack counter-clockwise (second element becomes first) +# R rotate stack clockwise (last element becomes first) +# +# other commands +# l print stack (stack top is first) +# p print stack top +# q quit, print stack top if any (cq is quiet quit) +# +# The only shortcoming is that you'd better not attempt conversions of +# values above 1000 or so. +# +# This version keeps the stack and the current command in hold space and +# the commands in pattern space; it is just a bit slower than binary2.sed +# but more size optimized for broken seds which have a 199-command limit +# (though binary2.sed does not have this much). +# +# -------------------------------------------------------------------------- +# This was actually used in a one-disk distribution of Linux to compute +# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes +# 255.255.255.0; 2 parameters => given host address and netmask compute +# network and broadcast addresses): +# +# if [ $# = 1 ]; then +# OUTPUT='$1.$2.$3.$4' +# set 255.255.255.255 $1 +# else +# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8' +# fi +# +# if [ `expr $2 : ".*\\."` -gt 0 ]; then +# MASK="$2 br b8<r b16<r b24< R|R|R|" +# else +# MASK="$2b 31b ^d D +# 11111111111111111111111111111111 x>1> x<1<" +# fi +# +# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address +# $MASK D ~r # Load mask +# +# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP +# " | sed -f binary.sed` +# +# eval echo $OUTPUT +# -------------------------------------------------------------------------- + +:cmd +s/^[\n\t ]*// +s/^#.*// +/^$/ { + $b quit + N + t cmd +} +/^[0-9][0-9]*/ { + G + h + s/^[0-9][0-9]* *\([^\n]*\).*/\1/ + x + s/^\([0-9][0-9]*\)[^\n]*/\1/ + x + t cmd +} + +/^[^DPxrRcplqbd&|^~<>]/bbad + +H +x +s/\(\n[^\n]\)[^\n]*$/\1/ + +/D$/ s/^[^\n]*\n/&&/ +/P$/ s/^[^\n]*\n// +/x$/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/ +/r$/ s/^\([^\n]*\n\)\(.*\)\(..\)/\2\1\3/ +/R$/ s/^\(.*\n\)\([^\n]*\n\)\(..\)/\2\1\3/ +/c$/ s/.*// +/p$/ P +/l$/ { + s/...$// + p + t cmd +} + +/q$/ { + :quit + /.../P + d +} + +/b$/ { + # Decimal to binary via analog form + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :d2bloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t d2bloop1 + s/-;9876543210aaaaaaaaa/;a01!/ + :d2bloop2 + s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/ + /^a/b d2bloop2 + s/[^!]*!// +} + +/d$/ { + # Binary to decimal via analog form + s/^\([^\n]*\)/-&;10a/ + :b2dloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/ + t b2dloop1 + s/-;10a/;aaaaaaaaa0123456789!/ + :b2dloop2 + s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/ + /^a/b b2dloop2 + s/[^!]*!// +} + +/&$/ { + # Binary AND + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/ + :andloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t andloop + s/^0*\([^-]*\)-[^\n]*/\1/ + s/^\n/0&/ +} + +/\^$/ { + # Binary XOR + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/ + b orloop +} + +/|$/ { + # Binary OR + s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/ + :orloop + s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/ + t orloop + s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/ +} + +/~$/ { + # Binary NOT + s/^\(.\)\([^\n]*\n\)/\1-010-\2/ + :notloop + s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/ + t notloop + + # If result is 00001..., \3 does not match (it looks for -10) and we just + # remove the table and leading zeros. If result is 0000...0, \3 matches + # (it looks for -0), \4 is a zero and we leave a lone zero as top of the + # stack. + + s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/ +} + +/<$/ { + # Left shift, convert to analog and add a binary digit for each analog digit + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :lshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t lshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + s/a/0/g +} + +/>$/ { + # Right shift, convert to analog and remove a binary digit for each analog digit + s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/ + :rshloop1 + s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/ + t rshloop1 + s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/ + :rshloop2 + s/.a// + s/^aa*/0/ + /a\n/b rshloop2 +} + +s/..$// +x +:bad +s/^.// +tcmd diff --git a/testsuite/bkslashes.good b/testsuite/bkslashes.good new file mode 100644 index 0000000..770d1e6 --- /dev/null +++ b/testsuite/bkslashes.good @@ -0,0 +1,2 @@ +a\ + diff --git a/testsuite/bkslashes.inp b/testsuite/bkslashes.inp new file mode 100644 index 0000000..7898192 --- /dev/null +++ b/testsuite/bkslashes.inp @@ -0,0 +1 @@ +a diff --git a/testsuite/bkslashes.sed b/testsuite/bkslashes.sed new file mode 100644 index 0000000..aa8c66c --- /dev/null +++ b/testsuite/bkslashes.sed @@ -0,0 +1,3 @@ +# bug in sed 4.0b +s/$/\\\ +/ diff --git a/testsuite/bsd.good b/testsuite/bsd.good new file mode 100644 index 0000000..0e21b0f --- /dev/null +++ b/testsuite/bsd.good @@ -0,0 +1,1737 @@ +============ +Test 1.1:101 +============ +Testing argument parsing +First type +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 + +============ +Test 1.2:102 +============ +e1_l1_1 +e1_l1_2 +e1_l1_3 +e1_l1_4 +e1_l1_5 +e1_l1_6 +e1_l1_7 +e1_l1_8 +e1_l1_9 +e1_l1_10 +e1_l1_11 +e1_l1_12 +e1_l1_13 +e1_l1_14 + +============ +Test 1.3:103 +============ +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 + +============ +Test 1.4:104 +============ +e1_l1_1 +e1_l1_2 +e1_l1_3 +e1_l1_4 +e1_l1_5 +e1_l1_6 +e1_l1_7 +e1_l1_8 +e1_l1_9 +e1_l1_10 +e1_l1_11 +e1_l1_12 +e1_l1_13 +e1_l1_14 +Second type + +============== +Test 1.4.1:105 +============== +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 1.5:106 +============ +s1_l1_1 +s1_l1_1 +s1_l1_2 +s1_l1_2 +s1_l1_3 +s1_l1_3 +s1_l1_4 +s1_l1_4 +s1_l1_5 +s1_l1_5 +s1_l1_6 +s1_l1_6 +s1_l1_7 +s1_l1_7 +s1_l1_8 +s1_l1_8 +s1_l1_9 +s1_l1_9 +s1_l1_10 +s1_l1_10 +s1_l1_11 +s1_l1_11 +s1_l1_12 +s1_l1_12 +s1_l1_13 +s1_l1_13 +s1_l1_14 +s1_l1_14 + +============ +Test 1.6:107 +============ +s1_l1_1 +s1_l1_1 +s1_l1_2 +s1_l1_2 +s1_l1_3 +s1_l1_3 +s1_l1_4 +s1_l1_4 +s1_l1_5 +s1_l1_5 +s1_l1_6 +s1_l1_6 +s1_l1_7 +s1_l1_7 +s1_l1_8 +s1_l1_8 +s1_l1_9 +s1_l1_9 +s1_l1_10 +s1_l1_10 +s1_l1_11 +s1_l1_11 +s1_l1_12 +s1_l1_12 +s1_l1_13 +s1_l1_13 +s1_l1_14 +s1_l1_14 + +============ +Test 1.7:108 +============ +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 + +============ +Test 1.8:109 +============ +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 + +============ +Test 1.9:110 +============ +s1_l1_1 +s1_l1_2 +s1_l1_3 +s1_l1_4 +s1_l1_5 +s1_l1_6 +s1_l1_7 +s1_l1_8 +s1_l1_9 +s1_l1_10 +s1_l1_11 +s1_l1_12 +s1_l1_13 +s1_l1_14 + +============= +Test 1.10:111 +============= +s1_l1_1 +s1_l1_2 +s1_l1_3 +s1_l1_4 +s1_l1_5 +s1_l1_6 +s1_l1_7 +s1_l1_8 +s1_l1_9 +s1_l1_10 +s1_l1_11 +s1_l1_12 +s1_l1_13 +s1_l1_14 + +============= +Test 1.11:112 +============= +e1_l1_1 +e1_l1_2 +e1_l1_3 +e1_l1_4 +e1_l1_5 +e1_l1_6 +e1_l1_7 +e1_l1_8 +e1_l1_9 +e1_l1_10 +e1_l1_11 +e1_l1_12 +e1_l1_13 +e1_l1_14 + +============= +Test 1.12:113 +============= +e1_l1_1 +e1_l1_2 +e1_l1_3 +e1_l1_4 +e1_l1_5 +e1_l1_6 +e1_l1_7 +e1_l1_8 +e1_l1_9 +e1_l1_10 +e1_l1_11 +e1_l1_12 +e1_l1_13 +e1_l1_14 + +============= +Test 1.13:114 +============= +e1_l1_1 +e2_e1_l1_1 +e2_e1_l1_1 +e1_l1_2 +e2_e1_l1_2 +e2_e1_l1_2 +e1_l1_3 +e2_e1_l1_3 +e2_e1_l1_3 +e1_l1_4 +e2_e1_l1_4 +e2_e1_l1_4 +e1_l1_5 +e2_e1_l1_5 +e2_e1_l1_5 +e1_l1_6 +e2_e1_l1_6 +e2_e1_l1_6 +e1_l1_7 +e2_e1_l1_7 +e2_e1_l1_7 +e1_l1_8 +e2_e1_l1_8 +e2_e1_l1_8 +e1_l1_9 +e2_e1_l1_9 +e2_e1_l1_9 +e1_l1_10 +e2_e1_l1_10 +e2_e1_l1_10 +e1_l1_11 +e2_e1_l1_11 +e2_e1_l1_11 +e1_l1_12 +e2_e1_l1_12 +e2_e1_l1_12 +e1_l1_13 +e2_e1_l1_13 +e2_e1_l1_13 +e1_l1_14 +e2_e1_l1_14 +e2_e1_l1_14 + +============= +Test 1.14:115 +============= +s1_l1_1 +s2_s1_l1_1 +s2_s1_l1_1 +s1_l1_2 +s2_s1_l1_2 +s2_s1_l1_2 +s1_l1_3 +s2_s1_l1_3 +s2_s1_l1_3 +s1_l1_4 +s2_s1_l1_4 +s2_s1_l1_4 +s1_l1_5 +s2_s1_l1_5 +s2_s1_l1_5 +s1_l1_6 +s2_s1_l1_6 +s2_s1_l1_6 +s1_l1_7 +s2_s1_l1_7 +s2_s1_l1_7 +s1_l1_8 +s2_s1_l1_8 +s2_s1_l1_8 +s1_l1_9 +s2_s1_l1_9 +s2_s1_l1_9 +s1_l1_10 +s2_s1_l1_10 +s2_s1_l1_10 +s1_l1_11 +s2_s1_l1_11 +s2_s1_l1_11 +s1_l1_12 +s2_s1_l1_12 +s2_s1_l1_12 +s1_l1_13 +s2_s1_l1_13 +s2_s1_l1_13 +s1_l1_14 +s2_s1_l1_14 +s2_s1_l1_14 + +============= +Test 1.15:116 +============= +e1_l1_1 +s1_e1_l1_1 +s1_e1_l1_1 +e1_l1_2 +s1_e1_l1_2 +s1_e1_l1_2 +e1_l1_3 +s1_e1_l1_3 +s1_e1_l1_3 +e1_l1_4 +s1_e1_l1_4 +s1_e1_l1_4 +e1_l1_5 +s1_e1_l1_5 +s1_e1_l1_5 +e1_l1_6 +s1_e1_l1_6 +s1_e1_l1_6 +e1_l1_7 +s1_e1_l1_7 +s1_e1_l1_7 +e1_l1_8 +s1_e1_l1_8 +s1_e1_l1_8 +e1_l1_9 +s1_e1_l1_9 +s1_e1_l1_9 +e1_l1_10 +s1_e1_l1_10 +s1_e1_l1_10 +e1_l1_11 +s1_e1_l1_11 +s1_e1_l1_11 +e1_l1_12 +s1_e1_l1_12 +s1_e1_l1_12 +e1_l1_13 +s1_e1_l1_13 +s1_e1_l1_13 +e1_l1_14 +s1_e1_l1_14 +s1_e1_l1_14 + +============= +Test 1.16:117 +============= +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 +e1_l1_1 +e1_l1_1 +e1_l1_2 +e1_l1_2 +e1_l1_3 +e1_l1_3 +e1_l1_4 +e1_l1_4 +e1_l1_5 +e1_l1_5 +e1_l1_6 +e1_l1_6 +e1_l1_7 +e1_l1_7 +e1_l1_8 +e1_l1_8 +e1_l1_9 +e1_l1_9 +e1_l1_10 +e1_l1_10 +e1_l1_11 +e1_l1_11 +e1_l1_12 +e1_l1_12 +e1_l1_13 +e1_l1_13 +e1_l1_14 +e1_l1_14 + +============= +Test 1.17:118 +============= +l1_1 +l1_1 +l1_2 +l1_2 +l1_3 +l1_3 +l1_4 +l1_4 +l1_5 +l1_5 +l1_6 +l1_6 +l1_7 +l1_7 +l1_8 +l1_8 +l1_9 +l1_9 +l1_10 +l1_10 +l1_11 +l1_11 +l1_12 +l1_12 +l1_13 +l1_13 +l1_14 +l1_14 + +============= +Test 1.18:119 +============= +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +Testing address ranges + +============ +Test 2.1:120 +============ +l1_4 + +============ +Test 2.2:121 +============ +l2_6 + +============ +Test 2.3:122 +============ +l1_14 + +============ +Test 2.4:123 +============ +l2_9 + +============ +Test 2.5:124 +============ + +============ +Test 2.6:125 +============ +l2_9 + +============ +Test 2.7:126 +============ + +============ +Test 2.9:127 +============ +l1_7 + +============= +Test 2.10:128 +============= +l1_7 + +============= +Test 2.11:129 +============= +l1_7 + +============= +Test 2.12:130 +============= +l1_1 +l1_2 +l1_3 +l1_4 + +============= +Test 2.13:131 +============= +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 + +============= +Test 2.14:132 +============= +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 + +============= +Test 2.15:133 +============= +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 + +============= +Test 2.16:134 +============= +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 + +============= +Test 2.17:135 +============= +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_14 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 + +============= +Test 2.18:136 +============= +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 + +============= +Test 2.19:137 +============= +l1_12 + +============= +Test 2.20:138 +============= +l1_7 +Brace and other grouping + +============ +Test 3.1:139 +============ +l1_1 +l1_2 +l1_3 +^l1T4$ +^l1T5$ +^l1T6$ +^l1T7$ +^l1T8$ +^l1T9$ +^l1T10$ +^l1T11$ +^l1T12$ +l1_13 +l1_14 + +============ +Test 3.2:140 +============ +l1_1 +l1_2 +l1_3 +^l1_4 +^l1_5 +^l1_6$ +^l1_7$ +^l1T8$ +^l1_9$ +^l1_10$ +^l1_11 +^l1_12 +l1_13 +l1_14 + +============ +Test 3.3:141 +============ +^l1T1$ +^l1T2$ +^l1T3$ +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1T13$ +^l1T14$ + +============ +Test 3.4:142 +============ +^l1_1 +^l1_2 +^l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +^l1_13 +^l1_14 +Testing a c d and i commands + +============ +Test 4.1:143 +============ +before_il1_1 +after_ibefore_il1_1 +before_il1_2 +after_ibefore_il1_2 +before_il1_3 +after_ibefore_il1_3 +before_il1_4 +after_ibefore_il1_4 +before_il1_5 +after_ibefore_il1_5 +before_il1_6 +after_ibefore_il1_6 +before_il1_7 +after_ibefore_il1_7 +before_il1_8 +after_ibefore_il1_8 +before_il1_9 +after_ibefore_il1_9 +before_il1_10 +after_ibefore_il1_10 +before_il1_11 +after_ibefore_il1_11 +before_il1_12 +after_ibefore_il1_12 +before_il1_13 +after_ibefore_il1_13 +before_il1_14 +after_ibefore_il1_14 +before_il2_1 +after_ibefore_il2_1 +before_il2_2 +after_ibefore_il2_2 +before_il2_3 +after_ibefore_il2_3 +before_il2_4 +after_ibefore_il2_4 +before_il2_5 +after_ibefore_il2_5 +before_il2_6 +inserted +after_ibefore_il2_6 +before_il2_7 +after_ibefore_il2_7 +before_il2_8 +after_ibefore_il2_8 +before_il2_9 +after_ibefore_il2_9 + +============ +Test 4.2:144 +============ +before_al1_1 +after_abefore_al1_1 +before_al1_2 +after_abefore_al1_2 +before_al1_3 +after_abefore_al1_3 +before_al1_4 +after_abefore_al1_4 +before_a5-12l1_5 +after_abefore_a5-12l1_5 +appended +before_a5-12l1_6 +after_abefore_a5-12l1_6 +appended +before_a5-12l1_7 +after_abefore_a5-12l1_7 +appended +before_a5-12l1_8 +after_abefore_a5-12l1_8 +appended +before_a5-12l1_9 +after_abefore_a5-12l1_9 +appended +before_a5-12l1_10 +after_abefore_a5-12l1_10 +appended +before_a5-12l1_11 +after_abefore_a5-12l1_11 +appended +before_a5-12l1_12 +after_abefore_a5-12l1_12 +appended +before_al1_13 +after_abefore_al1_13 +before_al1_14 +after_abefore_al1_14 +before_al2_1 +after_abefore_al2_1 +before_al2_2 +after_abefore_al2_2 +before_al2_3 +after_abefore_al2_3 +before_al2_4 +after_abefore_al2_4 +before_al2_5 +after_abefore_al2_5 +before_al2_6 +after_abefore_al2_6 +before_al2_7 +after_abefore_al2_7 +before_al2_8 +after_abefore_al2_8 +before_al2_9 +after_abefore_al2_9 + +============ +Test 4.3:145 +============ +^l1_1 +^l1_1$ +appended +^l1_2 +^l1_2$ +appended +^l1_3 +^l1_3$ +appended +^l1_4 +^l1_4$ +appended +^l1_5 +^l1_5$ +appended +^l1_6 +^l1_6$ +appended +^l1_7 +^l1_7$ +appended +^l1_8 +appended +^l1_8 +l1_9$ +^l1_10 +appended +^l1_10 +l1_11$ +^l1_12 +^l1_12$ +appended +^l1_13 +^l1_13$ +appended +^l1_14 +^l1_14$ +appended +^l2_1 +^l2_1$ +^l2_2 +^l2_2$ +^l2_3 +^l2_3$ +^l2_4 +^l2_4$ +^l2_5 +^l2_5$ +^l2_6 +^l2_6$ +^l2_7 +^l2_7$ +^l2_8 +^l2_8$ +^l2_9 +^l2_9$ + +============ +Test 4.4:146 +============ +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello + +============ +Test 4.5:147 +============ +hello + +============ +Test 4.6:148 +============ +hello + +============ +Test 4.7:149 +============ +hello + +============ +Test 4.8:150 +============ +Testing labels and branching + +============ +Test 5.1:151 +============ +label2_l1_1 +label3_label2_l1_1 +label1_l1_2 +label1_l1_3 +label1_l1_4 +label1_l1_5 +label1_l1_6 +label1_l1_7 +label1_l1_8 +label1_l1_9 +label1_l1_10 +label1_l1_11 +label1_l1_12 +label2_l1_13 +label3_label2_l1_13 +label2_l1_14 +label3_label2_l1_14 + +============ +Test 5.2:152 +============ +tested l2_1 +tested l2_2 +tested l2_3 +tested l2_4 +tested l2_5 +tested l2_6 +tested l2_7 +tested l2_8 +tested l2_9 +tested l2_10 +tested l2_11 +tested l2_12 +tested l2_13 +tested l2_14 + +============ +Test 5.3:153 +============ +^l1_1 +^l1_1$ +^l1_2 +^l1_2$ +^l1_3 +^l1_3$ +^l1_4 +^l1_4$ +l1_5$ +l1_6$ +l1_7$ +l1_8$ + +============ +Test 5.4:154 +============ +^l1_1$ +^l1_2$ +^l1_3$ +^l1_4$ +^l1_5$ +^l1_6$ +^l1_7$ +^l1_8$ +l1_9$ +l1_10$ +l1_11$ +l1_12$ +l1_13$ +l1_14$ + +============ +Test 5.5:155 +============ +^l1_1 +^l1_2 +^l1_4 +^l1_6 +^l1_8 + +============ +Test 5.6:156 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 + +============ +Test 5.7:157 +============ +l1_1 +l1_2 +l1_3 +l1_4 +hello +l1_5 + +============ +Test 5.8:158 +============ +m1_1 +m1_2 +m1_3 +m1_4 +m1_5 +m1_6 +m1_7 +m1_8 +m1_9 +m1_10 +m1_11 +m1_12 +m1_13 +m1_14 +Pattern space commands + +============ +Test 6.1:159 +============ +changed +changed +changed +changed +changed +changed +changed +changed +changed +changed +changed +changed +changed +changed + +============ +Test 6.2:160 +============ +l1_1 +l1_2 +l1_3 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 6.3:161 +============ +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 6.4:162 +============ +l1_1 +l1_2 +l1_3 +l1_2 +l1_3 +l1_5 +l1_2 +l1_3 +l1_2 +l1_3 +l1_6 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 6.5:163 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 6.6:164 +============ +Testing print and file routines + +============ +Test 7.1:165 +============ +\001\002\003\004\005\006\a\b\t$ +\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\ +\035\036\037 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX\ +YZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\ +\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\ +\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\ +\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\ +\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\ +\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\ +\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\ +\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\ +\375\376\377$ +$ + +============ +Test 7.2:166 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +15 +l2_1 +16 +l2_2 +17 +l2_3 +18 +l2_4 +19 +l2_5 +20 +l2_6 +21 +l2_7 +22 +l2_8 +23 +l2_9 + +============ +Test 7.3:167 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 +w results +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 + +============ +Test 7.4:168 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l2_1 +l2_2 +l2_3 +l2_4 +l2_5 +l2_6 +l2_7 +l2_8 +l2_9 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 7.5:169 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 7.6:170 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 7.8:171 +============ + +Testing substitution commands + +============ +Test 8.1:172 +============ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX + +============ +Test 8.2:173 +============ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX + +============ +Test 8.3:174 +============ +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXX +XXXXX +XXXXX +XXXXX +XXXXX +XXXXX + +============ +Test 8.4:175 +============ +l1_1 +l1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============ +Test 8.5:176 +============ +l1X1 +l1X2 +l1X3 +l1X4 +l1X5 +l1X6 +l1X7 +l1X8 +l1X9 +l1X10 +l1X11 +l1X12 +l1X13 +l1X14 + +============ +Test 8.6:177 +============ +(l)(1)(_)(1) +(l)(1)(_)(2) +(l)(1)(_)(3) +(l)(1)(_)(4) +(l)(1)(_)(5) +(l)(1)(_)(6) +(l)(1)(_)(7) +(l)(1)(_)(8) +(l)(1)(_)(9) +(l)(1)(_)(1)(0) +(l)(1)(_)(1)(1) +(l)(1)(_)(1)(2) +(l)(1)(_)(1)(3) +(l)(1)(_)(1)(4) + +============ +Test 8.7:178 +============ +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) +(&)(&)(&)(&)(&) + +============ +Test 8.8:179 +============ +x_x1xl1 +x_x1xl2 +x_x1xl3 +x_x1xl4 +x_x1xl5 +x_x1xl6 +x_x1xl7 +x_x1xl8 +x_x1xl9 +x_x1xl10 +x_x1xl11 +x_x1xl12 +x_x1xl13 +x_x1xl14 + +============ +Test 8.9:180 +============ +l1u0 +u1 +u21 +l1u0 +u1 +u22 +l1u0 +u1 +u23 +l1u0 +u1 +u24 +l1u0 +u1 +u25 +l1u0 +u1 +u26 +l1u0 +u1 +u27 +l1u0 +u1 +u28 +l1u0 +u1 +u29 +l1u0 +u1 +u210 +l1u0 +u1 +u211 +l1u0 +u1 +u212 +l1u0 +u1 +u213 +l1u0 +u1 +u214 + +============= +Test 8.10:181 +============= +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X +l1_X0 +l1_X1 +l1_X2 +l1_X3 +l1_X4 + +============= +Test 8.11:182 +============= +lX_1 +lX_2 +lX_3 +lX_4 +lX_5 +lX_6 +lX_7 +lX_8 +lX_9 +lX_10 +lX_11 +lX_12 +lX_13 +lX_14 +s wfile results +lX_1 +lX_2 +lX_3 +lX_4 +lX_5 +lX_6 +lX_7 +lX_8 +lX_9 +lX_10 +lX_11 +lX_12 +lX_13 +lX_14 + +============= +Test 8.12:183 +============= +lX_X +lX_X +lX_X +lX_4 +lX_5 +lX_6 +lX_7 +lX_8 +lX_9 +lX_X0 +lX_XX +lX_XX +lX_XX +lX_X4 + +============= +Test 8.13:184 +============= +l8_8 +l8_7 +l8_6 +l8_5 +l8_4 +l8_3 +l8_2 +l8_1 +l8_0 +l8_89 +l8_88 +l8_87 +l8_86 +l8_85 + +============= +Test 8.14:185 +============= +l8_8 +l8_7 +l8_6 +l8_5 +l8_4 +l8_3 +l8_2 +l8_1 +l8_0 +l8_89 +l8_88 +l8_87 +l8_86 +l8_85 + +============= +Test 8.15:186 +============= +l1_1Xl1_2 +l1_3 +l1_4 +l1_5 +l1_6 +l1_7 +l1_8 +l1_9 +l1_10 +l1_11 +l1_12 +l1_13 +l1_14 + +============= +Test 8.16:187 +============= +eeefff +Xeefff +XYefff +XYeYff +XYeYYf +XYeYYY +XYeYYY diff --git a/testsuite/bsd.sh b/testsuite/bsd.sh new file mode 100755 index 0000000..fecb2f4 --- /dev/null +++ b/testsuite/bsd.sh @@ -0,0 +1,434 @@ +#!/bin/sh - +# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $ +# +# Copyright (c) 1992 Diomidis Spinellis. +# Copyright (c) 1992, 1993 +# The Regents of the University of California. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. All advertising materials mentioning features or use of this software +# must display the following acknowledgement: +# This product includes software developed by the University of +# California, Berkeley and its contributors. +# 4. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# from: @(#)sed.test 8.1 (Berkeley) 6/6/93 +# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $ +# + +# sed Regression Tests + +# Modified by Paolo Bonzini to: +# - not warn about buggy seds +# - run tests once instead of comparing them to the system sed +# - remove most uses of awk +# - cleanup at exit +# - comment tests that broke because of extensions + +main() +{ + TEST="${1-../sed/sed}" + TESTLOG="${2-sed.out}" + # DICT="${3-/usr/share/dict/words}" + + : > lines1 + : > lines2 + for i in 1 2 3 4 5 6 7 8 9; do + echo l1_$i >> lines1 + echo l2_$i >> lines2 + done + for i in 10 11 12 13 14; do + echo l1_$i >> lines1 + done + + # Set these flags to get messages about known problems + tests "$TEST" "$TESTLOG" + + rm -f lines[1234] script[12] +} + +tests() +{ + SED="$1" + LOG="$2" + MARK=100 + rm -f "$LOG" + + exec 3>&0 4>&1 5>&2 + exec 0</dev/null 1>/dev/null 2>/dev/null + test_error + exec 0>&3 1>&4 2>&5 + + exec 4>&1 5>&2 + test_args + test_addr + test_group + test_acid + test_branch + test_pattern + test_print + test_subst + exec 1>&4 2>&5 +} + +mark() +{ + exec 2>&1 >>$LOG + test $MARK = 100 || echo + MARK=`expr $MARK + 1` + echo "Test $1:$MARK" | sed 's/./=/g' + echo "Test $1:$MARK" + echo "Test $1:$MARK" | sed 's/./=/g' +} + +test_args() +{ + mark '1.1' + echo Testing argument parsing + echo First type + $SED 's/^/e1_/p' lines1 + mark '1.2' ; $SED -n 's/^/e1_/p' lines1 + mark '1.3' ; $SED 's/^/e1_/p' <lines1 + mark '1.4' ; $SED -n 's/^/e1_/p' <lines1 + echo Second type + mark '1.4.1' + $SED -e '' <lines1 + echo 's/^/s1_/p' >script1 + echo 's/^/s2_/p' >script2 + mark '1.5' ; $SED -f script1 lines1 + mark '1.6' ; $SED -f script1 <lines1 + mark '1.7' ; $SED -e 's/^/e1_/p' lines1 + mark '1.8' ; $SED -e 's/^/e1_/p' <lines1 + mark '1.9' ; $SED -n -f script1 lines1 + mark '1.10' ; $SED -n -f script1 <lines1 + mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1 + mark '1.12' ; $SED -n -e 's/^/e1_/p' <lines1 + mark '1.13' ; $SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1 + mark '1.14' ; $SED -f script1 -f script2 lines1 + mark '1.15' ; $SED -e 's/^/e1_/p' -f script1 lines1 + mark '1.16' ; $SED -e 's/^/e1_/p' lines1 lines1 + # POSIX D11.2:11251 + mark '1.17' ; $SED p <lines1 lines1 +cat >script1 <<EOF +#n +# A comment + +p +EOF + mark '1.18' ; $SED -f script1 <lines1 lines1 +} + +test_addr() +{ + echo Testing address ranges + mark '2.1' ; $SED -n -e '4p' lines1 + mark '2.2' ; $SED -n -e '20p' lines1 lines2 + mark '2.3' ; $SED -n -e '$p' lines1 + mark '2.4' ; $SED -n -e '$p' lines1 lines2 + mark '2.5' ; $SED -n -e '$a\ +hello' /dev/null + mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2 + # Should not print anything + mark '2.7' ; $SED -n -e '20p' lines1 + # Disabled because it is undefined behavior + # mark '2.8' ; $SED -n -e '0p' lines1 + mark '2.9' ; $SED -n '/l1_7/p' lines1 + mark '2.10' ; $SED -n ' /l1_7/ p' lines1 + mark '2.11' ; $SED -n '\_l1\_7_p' lines1 + mark '2.12' ; $SED -n '1,4p' lines1 + mark '2.13' ; $SED -n '1,$p' lines1 lines2 + mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2 + mark '2.15' ; $SED -n '/4/,$p' lines1 lines2 + mark '2.16' ; $SED -n '/4/,20p' lines1 lines2 + mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2 + mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2 + mark '2.19' ; $SED -n '12,3p' lines1 lines2 + mark '2.20' ; $SED -n '/l1_7/,3p' lines1 lines2 +} + +test_group() +{ + echo Brace and other grouping + mark '3.1' ; $SED -e ' +4,12 { + s/^/^/ + s/$/$/ + s/_/T/ +}' lines1 + mark '3.2' ; $SED -e ' +4,12 { + s/^/^/ + /6/,/10/ { + s/$/$/ + /8/ s/_/T/ + } +}' lines1 + mark '3.3' ; $SED -e ' +4,12 !{ + s/^/^/ + /6/,/10/ !{ + s/$/$/ + /8/ !s/_/T/ + } +}' lines1 + mark '3.4' ; $SED -e '4,12!s/^/^/' lines1 +} + +test_acid() +{ + echo Testing a c d and i commands + mark '4.1' ; $SED -n -e ' +s/^/before_i/p +20i\ +inserted +s/^/after_i/p +' lines1 lines2 + mark '4.2' ; $SED -n -e ' +5,12s/^/5-12/ +s/^/before_a/p +/5-12/a\ +appended +s/^/after_a/p +' lines1 lines2 + mark '4.3' ; $SED -n -e ' +s/^/^/p +/l1_/a\ +appended +8,10N +s/$/$/p +' lines1 lines2 + mark '4.4' ; $SED -n -e ' +c\ +hello +' lines1 + mark '4.5' ; $SED -n -e ' +8c\ +hello +' lines1 + mark '4.6' ; $SED -n -e ' +3,14c\ +hello +' lines1 + mark '4.7' ; $SED -n -e ' +8,3c\ +hello +' lines1 + mark '4.8' ; $SED d <lines1 +} + +test_branch() +{ + echo Testing labels and branching + mark '5.1' ; $SED -n -e ' +b label4 +:label3 +s/^/label3_/p +b end +:label4 +2,12b label1 +b label2 +:label1 +s/^/label1_/p +b +:label2 +s/^/label2_/p +b label3 +:end +' lines1 + mark '5.2' ; $SED -n -e ' +s/l1_/l2_/ +t ok +b +:ok +s/^/tested /p +' lines1 lines2 + mark '5.3' ; $SED -n -e ' +5,8b inside +1,5 { + s/^/^/p + :inside + s/$/$/p +} +' lines1 +# Check that t clears the substitution done flag + mark '5.4' ; $SED -n -e ' +1,8s/^/^/ +t l1 +:l1 +t l2 +s/$/$/p +b +:l2 +s/^/ERROR/ +' lines1 +# Check that reading a line clears the substitution done flag + mark '5.5' ; $SED -n -e ' +t l2 +1,8s/^/^/p +2,7N +b +:l2 +s/^/ERROR/p +' lines1 + mark '5.6' ; $SED 5q lines1 + mark '5.7' ; $SED -e ' +5i\ +hello +5q' lines1 +# Branch across block boundary + mark '5.8' ; $SED -e ' +{ +:b +} +s/l/m/ +tb' lines1 +} + +test_pattern() +{ +echo Pattern space commands +# Check that the pattern space is deleted + mark '6.1' ; $SED -n -e ' +c\ +changed +p +' lines1 + mark '6.2' ; $SED -n -e ' +4d +p +' lines1 + mark '6.3' ; $SED -e ' +N +N +N +D +P +4p +' lines1 + mark '6.4' ; $SED -e ' +2h +3H +4g +5G +6x +6p +6x +6p +' lines1 + mark '6.5' ; $SED -e '4n' lines1 + mark '6.6' ; $SED -n -e '4n' lines1 +} + +test_print() +{ + echo Testing print and file routines + awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \ + </dev/null >lines3 + mark '7.1' ; $SED -n l lines3 + mark '7.2' ; $SED -e '/l2_/=' lines1 lines2 + rm -f lines4 + mark '7.3' ; $SED -e '3,12w lines4' lines1 + echo w results + cat lines4 + mark '7.4' ; $SED -e '4r lines2' lines1 + mark '7.5' ; $SED -e '5r /dev/dds' lines1 + mark '7.6' ; $SED -e '6r /dev/null' lines1 + # mark '7.7' + # sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1 + # rm -rf tmpdir + # mkdir tmpdir + # $SED -f script1 lines1 + # cat tmpdir/* + # rm -rf tmpdir + mark '7.8' + echo line1 > lines3 + echo "" >> lines3 + $SED -n -e '$p' lines3 /dev/null +} + +test_subst() +{ + echo Testing substitution commands + mark '8.1' ; $SED -e 's/./X/g' lines1 + mark '8.2' ; $SED -e 's,.,X,g' lines1 + mark '8.3' ; $SED -e 's.\..X.g' lines1 +# POSIX does not say that this should work +# mark '8.4' ; $SED -e 's/[/]/Q/' lines1 + mark '8.4' ; $SED -e 's/[\/]/Q/' lines1 + mark '8.5' ; $SED -e 's_\__X_' lines1 + mark '8.6' ; $SED -e 's/./(&)/g' lines1 + mark '8.7' ; $SED -e 's/./(\&)/g' lines1 + mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1 + mark '8.9' ; $SED -e 's/_/u0\ +u1\ +u2/g' lines1 + mark '8.10' ; $SED -e 's/./X/4' lines1 + rm -f lines4 + mark '8.11' ; $SED -e 's/1/X/w lines4' lines1 + echo s wfile results + cat lines4 + mark '8.12' ; $SED -e 's/[123]/X/g' lines1 + mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1 + mark '8.14' ; $SED -e 'y10\123456789198765432\101' lines1 + mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1 + mark '8.16' + echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \ + -e 's//Y/p' -e '/f/bx' +} + +test_error() +{ + $SED -x && exit 1 + $SED -f && exit 1 + $SED -e && exit 1 + $SED -f /dev/dds && exit 1 + $SED p /dev/dds && exit 1 + $SED -f /bin/sh && exit 1 + $SED '{' && exit 1 + $SED '{' && exit 1 + $SED '/hello/' && exit 1 + $SED '1,/hello/' && exit 1 + $SED -e '-5p' && exit 1 + $SED '/jj' && exit 1 + # $SED 'a hello' && exit 1 + # $SED 'a \ hello' && exit 1 + $SED 'b foo' && exit 1 + $SED 'd hello' && exit 1 + $SED 's/aa' && exit 1 + $SED 's/aa/' && exit 1 + $SED 's/a/b' && exit 1 + $SED 's/a/b/c/d' && exit 1 + $SED 's/a/b/ 1 2' && exit 1 + # $SED 's/a/b/ 1 g' && exit 1 + $SED 's/a/b/w' && exit 1 + $SED 'y/aa' && exit 1 + $SED 'y/aa/b/' && exit 1 + $SED 'y/aa/' && exit 1 + $SED 'y/a/b' && exit 1 + $SED 'y/a/b/c/d' && exit 1 + $SED '!' && exit 1 + $SED supercalifrangolisticexprialidociussupercalifrangolisticexcius +} + +main ${1+"$@"} diff --git a/testsuite/bug-regex10.c b/testsuite/bug-regex10.c new file mode 100644 index 0000000..1a21617 --- /dev/null +++ b/testsuite/bug-regex10.c @@ -0,0 +1,65 @@ +/* Test for re_match with non-zero start. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <regex.h> + +int +main (void) +{ + struct re_pattern_buffer regex; + struct re_registers regs; + const char *s; + int match; + int result = 0; + + regs.num_regs = 1; + memset (®ex, '\0', sizeof (regex)); + s = re_compile_pattern ("[abc]*d", 7, ®ex); + if (s != NULL) + { + puts ("re_compile_pattern return non-NULL value"); + result = 1; + } + else + { + match = re_match (®ex, "foacabdxy", 9, 2, ®s); + if (match != 5) + { + printf ("re_match returned %d, expected 5\n", match); + result = 1; + } + else if (regs.start[0] != 2 || regs.end[0] != 7) + { + printf ("re_match returned %d..%d, expected 2..7\n", + regs.start[0], regs.end[0]); + result = 1; + } + puts (" -> OK"); + } + + return result; +} diff --git a/testsuite/bug-regex11.c b/testsuite/bug-regex11.c new file mode 100644 index 0000000..dbfa3f9 --- /dev/null +++ b/testsuite/bug-regex11.c @@ -0,0 +1,143 @@ +/* Regular expression tests. + Copyright (C) 2002, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> + +/* Tests supposed to match. */ +struct +{ + const char *pattern; + const char *string; + int flags, nmatch; + regmatch_t rm[5]; +} tests[] = { + /* Test for newline handling in regex. */ + { "[^~]*~", "\nx~y", 0, 2, { { 0, 3 }, { -1, -1 } } }, + /* Other tests. */ + { "a(.*)b", "a b", REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } }, + { ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]", "10~.~|P|K0|I10|O16|?KSb", 0, 3, + { { 0, 21 }, { 15, 16 }, { 16, 18 } } }, + { ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1", "10~.~|P|K0|I10|O16|?KSb", 0, 3, + { { 0, 21 }, { 8, 9 }, { 9, 10 } } }, + { "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)", + "a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0", 0, + 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } }, + /* Test for BRE expression anchoring. POSIX says just that this may match; + in glibc regex it always matched, so avoid changing it. */ + { "\\(^\\|foo\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, + { "\\(foo\\|^\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, + /* In ERE this must be treated as an anchor. */ + { "(^|foo)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, + { "(foo|^)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, + /* Here ^ cannot be treated as an anchor according to POSIX. */ + { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, + { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, + /* More tests on backreferences. */ + { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, + { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } }, + { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, + { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, + { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, + { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } }, + { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, + { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } }, + { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, + { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, + { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } }, + { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, + { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } }, + { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } }, + { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, + { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$", + "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "abcdedcba", REG_EXTENDED, 1, { { 0, 9 } } }, +#if 0 + /* XXX Not used since they fail so far. */ + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", + "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", + "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, + { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", + "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, +#endif +}; + +int +main (void) +{ + regex_t re; + regmatch_t rm[5]; + size_t i; + int n, ret = 0; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) + { + n = regcomp (&re, tests[i].pattern, tests[i].flags); + if (n != 0) + { + char buf[500]; + regerror (n, &re, buf, sizeof (buf)); + printf ("%s: regcomp %lu failed: %s\n", tests[i].pattern, i, buf); + ret = 1; + continue; + } + + if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0)) + { + printf ("%s: regexec %lu failed\n", tests[i].pattern, i); + ret = 1; + regfree (&re); + continue; + } + + for (n = 0; n < tests[i].nmatch; ++n) + if (rm[n].rm_so != tests[i].rm[n].rm_so + || rm[n].rm_eo != tests[i].rm[n].rm_eo) + { + if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1) + break; + printf ("%s: regexec %lu match failure rm[%d] %d..%d\n", + tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo); + ret = 1; + break; + } + + regfree (&re); + } + + return ret; +} diff --git a/testsuite/bug-regex12.c b/testsuite/bug-regex12.c new file mode 100644 index 0000000..a4db0cc --- /dev/null +++ b/testsuite/bug-regex12.c @@ -0,0 +1,81 @@ +/* Regular expression tests. + Copyright (C) 2002, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> + +/* Tests supposed to not match. */ +struct +{ + const char *pattern; + const char *string; + int flags, nmatch; +} tests[] = { + { "^<\\([^~]*\\)\\([^~]\\)[^~]*~\\1\\(.\\).*|=.*\\3.*\\2", + "<,.8~2,~so-|=-~.0,123456789<><", REG_NOSUB, 0 }, + /* In ERE, all carets must be treated as anchors. */ + { "a^b", "a^b", REG_EXTENDED, 0 } +}; + +int +main (void) +{ + regex_t re; + regmatch_t rm[4]; + size_t i; + int n, ret = 0; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) + { + n = regcomp (&re, tests[i].pattern, tests[i].flags); + if (n != 0) + { + char buf[500]; + regerror (n, &re, buf, sizeof (buf)); + printf ("regcomp %lu failed: %s\n", i, buf); + ret = 1; + continue; + } + + if (! regexec (&re, tests[i].string, tests[i].nmatch, + tests[i].nmatch ? rm : NULL, 0)) + { + printf ("regexec %lu incorrectly matched\n", i); + ret = 1; + } + + regfree (&re); + } + + return ret; +} diff --git a/testsuite/bug-regex13.c b/testsuite/bug-regex13.c new file mode 100644 index 0000000..a28c5fa --- /dev/null +++ b/testsuite/bug-regex13.c @@ -0,0 +1,111 @@ +/* Regular expression tests. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static struct +{ + int syntax; + const char *pattern; + const char *string; + int start; +} tests[] = { + {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match. */ + {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match. */ + {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abbc", -1}, + /* Nested duplication. */ + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "ac", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abc", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "ac", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbc", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abbc", -1}, +}; + +int +main (void) +{ + struct re_pattern_buffer regbuf; + const char *err; + size_t i; + int ret = 0; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) + { + int start; + re_set_syntax (tests[i].syntax); + memset (®buf, '\0', sizeof (regbuf)); + err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern), + ®buf); + if (err != NULL) + { + printf ("re_compile_pattern failed: %s\n", err); + ret = 1; + continue; + } + + start = re_search (®buf, tests[i].string, strlen (tests[i].string), + 0, strlen (tests[i].string), NULL); + if (start != tests[i].start) + { + printf ("re_search failed %d\n", start); + ret = 1; + regfree (®buf); + continue; + } + regfree (®buf); + } + + return ret; +} diff --git a/testsuite/bug-regex14.c b/testsuite/bug-regex14.c new file mode 100644 index 0000000..4b296d8 --- /dev/null +++ b/testsuite/bug-regex14.c @@ -0,0 +1,62 @@ +/* Tests re_comp and re_exec. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define _REGEX_RE_COMP +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> + +int +main (void) +{ + const char *err; + size_t i; + int ret = 0; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + for (i = 0; i < 100; ++i) + { + err = re_comp ("a t.st"); + if (err) + { + printf ("re_comp failed: %s\n", err); + ret = 1; + } + + if (! re_exec ("This is a test.")) + { + printf ("re_exec failed\n"); + ret = 1; + } + } + + return ret; +} diff --git a/testsuite/bug-regex15.c b/testsuite/bug-regex15.c new file mode 100644 index 0000000..14707e8 --- /dev/null +++ b/testsuite/bug-regex15.c @@ -0,0 +1,51 @@ +/* Test for memory/CPU leak in regcomp. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> + +#define TEST_DATA_LIMIT (32 << 20) + +int +main () +{ +#ifdef RLIMIT_DATA + regex_t re; + int reerr; + + /* Try to avoid eating all memory if a test leaks. */ + struct rlimit data_limit; + if (getrlimit (RLIMIT_DATA, &data_limit) == 0) + { + if ((rlim_t) TEST_DATA_LIMIT > data_limit.rlim_max) + data_limit.rlim_cur = data_limit.rlim_max; + else if (data_limit.rlim_cur > (rlim_t) TEST_DATA_LIMIT) + data_limit.rlim_cur = (rlim_t) TEST_DATA_LIMIT; + if (setrlimit (RLIMIT_DATA, &data_limit) < 0) + perror ("setrlimit: RLIMIT_DATA"); + } + else + perror ("getrlimit: RLIMIT_DATA"); + + reerr = regcomp (&re, "^6?3?[25]?5?[14]*[25]*[69]*+[58]*87?4?$", + REG_EXTENDED | REG_NOSUB); + if (reerr != 0) + { + char buf[100]; + regerror (reerr, &re, buf, sizeof buf); + printf ("regerror %s\n", buf); + return 1; + } + + return 0; +#else + return 77; +#endif +} diff --git a/testsuite/bug-regex16.c b/testsuite/bug-regex16.c new file mode 100644 index 0000000..7a1d3c8 --- /dev/null +++ b/testsuite/bug-regex16.c @@ -0,0 +1,39 @@ +/* Test re_compile_pattern error messages. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <regex.h> + +int +main (void) +{ + struct re_pattern_buffer re; + const char *s; + int ret = 0; + + re_set_syntax (RE_SYNTAX_POSIX_EGREP); + memset (&re, 0, sizeof (re)); + s = re_compile_pattern ("[[.invalid_collating_symbol.]]", 30, &re); + if (s == NULL || strcmp (s, "Invalid collation character")) + { + printf ("re_compile_pattern returned %s\n", s); + ret = 1; + } + s = re_compile_pattern ("[[=invalid_equivalence_class=]]", 31, &re); + if (s == NULL || strcmp (s, "Invalid collation character")) + { + printf ("re_compile_pattern returned %s\n", s); + ret = 1; + } + s = re_compile_pattern ("[[:invalid_character_class:]]", 29, &re); + if (s == NULL || strcmp (s, "Invalid character class name")) + { + printf ("re_compile_pattern returned %s\n", s); + ret = 1; + } + return ret; +} diff --git a/testsuite/bug-regex21.c b/testsuite/bug-regex21.c new file mode 100644 index 0000000..0232876 --- /dev/null +++ b/testsuite/bug-regex21.c @@ -0,0 +1,53 @@ +/* Test for memory leaks in regcomp. + Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> + +int main (void) +{ + regex_t re; + int i; + int ret = 0; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + for (i = 0; i < 32; ++i) + { + if (regcomp (&re, "X-.+:.+Y=\".*\\.(A|B|C|D|E|F|G|H|I", + REG_EXTENDED | REG_ICASE) == 0) + { + puts ("regcomp unexpectedly succeeded"); + ret = 1; + } + else + regfree (&re); + } + return ret; +} diff --git a/testsuite/bug-regex7.c b/testsuite/bug-regex7.c new file mode 100644 index 0000000..2051985 --- /dev/null +++ b/testsuite/bug-regex7.c @@ -0,0 +1,96 @@ +/* Test for regs allocation in re_search and re_match. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Stepan Kasal <kasal@math.cas.cz>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <regex.h> + + +int +main (void) +{ + struct re_pattern_buffer regex; + struct re_registers regs; + const char *s; + int match, n; + int result = 0; + + memset (®ex, '\0', sizeof (regex)); + regs.start = regs.end = NULL; + regs.num_regs = 0; + s = re_compile_pattern ("a", 1, ®ex); + if (s != NULL) + { + puts ("failed to compile pattern \"a\""); + result = 1; + } + else + { + match = re_search (®ex, "baobab", 6, 0, 6, ®s); + n = 1; + if (match != 1) + { + printf ("re_search returned %d, expected 1\n", match); + result = 1; + } + else if (regs.num_regs <= n || regs.start[n] != -1 || regs.end[n] != -1) + { + puts ("re_search failed to fill the -1 sentinel"); + result = 1; + } + } + + free (regex.buffer); + memset (®ex, '\0', sizeof (regex)); + + s = re_compile_pattern ("\\(\\(\\(a\\)\\)\\)", 13, ®ex); + if (s != NULL) + { + puts ("failed to compile pattern /\\(\\(\\(a\\)\\)\\)/"); + result = 1; + } + else + { + match = re_match (®ex, "apl", 3, 0, ®s); + n = 4; + if (match != 1) + { + printf ("re_match returned %d, expected 1\n", match); + result = 1; + } + else if (regs.num_regs <= n || regs.start[n] != -1 || regs.end[n] != -1) + { + puts ("re_match failed to fill the -1 sentinel"); + result = 1; + } + } + + if (result == 0) + puts (" -> OK"); + + return result; +} diff --git a/testsuite/bug-regex8.c b/testsuite/bug-regex8.c new file mode 100644 index 0000000..e39ad59 --- /dev/null +++ b/testsuite/bug-regex8.c @@ -0,0 +1,88 @@ +/* Test for the STOP parameter of re_match_2 and re_search_2. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Stepan Kasal <kasal@math.cas.cz>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <regex.h> + + +int +main (void) +{ + struct re_pattern_buffer regex; + const char *s; + int match[4]; + + memset (®ex, '\0', sizeof (regex)); + + s = re_compile_pattern ("xy$", 3, ®ex); + if (s != NULL) + { + puts ("failed to compile pattern \"xy$\""); + return 1; + } + else + match[0] = re_match_2(®ex,"xyz",3,NULL,0,0,NULL,2); + + free (regex.buffer); + memset (®ex, '\0', sizeof (regex)); + + s = re_compile_pattern ("xy\\>", 4, ®ex); + if (s != NULL) + { + puts ("failed to compile pattern \"xy\\>\""); + return 1; + } + else + match[1] = re_search_2(®ex,"xyz",3,NULL,0,0,2,NULL,2); + + free (regex.buffer); + memset (®ex, '\0', sizeof (regex)); + + s = re_compile_pattern ("xy \\<", 5, ®ex); + if (s != NULL) + { + puts ("failed to compile pattern \"xy \\<\""); + return 1; + } + else + { + match[2] = re_match_2(®ex,"xy ",4,NULL,0,0,NULL,3); + match[3] = re_match_2(®ex,"xy z",4,NULL,0,0,NULL,3); + } + + if (match[0] != -1 || match[1] != -1 || match[2] != -1 || match[3] != 3) + { + printf ("re_{match,search}_2 returned %d,%d,%d,%d, expected -1,-1,-1,3\n", + match[0], match[1], match[2], match[3]); + return 1; + } + + puts (" -> OK"); + + return 0; +} diff --git a/testsuite/bug-regex9.c b/testsuite/bug-regex9.c new file mode 100644 index 0000000..c0e9e18 --- /dev/null +++ b/testsuite/bug-regex9.c @@ -0,0 +1,75 @@ +/* Test for memory handling in regex. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> + + +static const char text[] = "#! /bin/sh"; + +int +main (void) +{ + regex_t re; + regmatch_t rm[2]; + int n; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + n = regcomp (&re, "^#! */.*/(k|ba||pdk|z)sh", REG_EXTENDED); + if (n != 0) + { + char buf[500]; + regerror (n, &re, buf, sizeof (buf)); + printf ("regcomp failed: %s\n", buf); + exit (1); + } + + for (n = 0; n < 20; ++n) + { + if (regexec (&re, text, 2, rm, 0)) + { + puts ("regexec failed"); + exit (2); + } + if (rm[0].rm_so != 0 || rm[0].rm_eo != 10 + || rm[1].rm_so != 8 || rm[1].rm_eo != 8) + { + printf ("regexec match failure: %d %d %d %d\n", + rm[0].rm_so, rm[0].rm_eo, rm[1].rm_so, rm[1].rm_eo); + exit (3); + } + } + + regfree (&re); + + return 0; +} diff --git a/testsuite/classes.good b/testsuite/classes.good new file mode 100644 index 0000000..58f96f4 --- /dev/null +++ b/testsuite/classes.good @@ -0,0 +1,4 @@ +: ${_cv_='emptyvar'} +: ${ac_cv_prog/RANLIB='/usr/bin/ranlib'} +: ${ac_cv_prog/CC='/usr/unsupported/\ \ /lib/_cv_/cc'} +: ${a/c_cv_prog/CPP='/usr/bin/cpp'} diff --git a/testsuite/classes.inp b/testsuite/classes.inp new file mode 100644 index 0000000..f1314b6 --- /dev/null +++ b/testsuite/classes.inp @@ -0,0 +1,6 @@ +_cv_=emptyvar +ac_cv_prog/RANLIB=/usr/bin/ranlib +ac_cv_prog/CC=/usr/unsupported/\ \ /lib/_cv_/cc +a/c_cv_prog/CPP=/usr/bin/cpp +SHELL=bash +GNU=GNU!UNIX diff --git a/testsuite/classes.sed b/testsuite/classes.sed new file mode 100644 index 0000000..897651f --- /dev/null +++ b/testsuite/classes.sed @@ -0,0 +1,2 @@ +# inspired by an autoconf generated configure script. +s/^\([/[:lower:]A-Z0-9]*_cv_[[:lower:][:upper:]/[:digit:]]*\)=\(.*\)/: \${\1='\2'}/p diff --git a/testsuite/cv-vars.good b/testsuite/cv-vars.good new file mode 100644 index 0000000..14707bf --- /dev/null +++ b/testsuite/cv-vars.good @@ -0,0 +1,4 @@ +: ${_cv_='emptyvar'} +: ${ac_cv_prog_RANLIB='/usr/bin/ranlib'} +: ${ac_cv_prog_CC='/usr/unsupported/\ \ /lib/_cv_/cc'} +: ${ac_cv_prog_CPP='/usr/bin/cpp'} diff --git a/testsuite/cv-vars.inp b/testsuite/cv-vars.inp new file mode 100644 index 0000000..5e92b1c --- /dev/null +++ b/testsuite/cv-vars.inp @@ -0,0 +1,6 @@ +_cv_=emptyvar +ac_cv_prog_RANLIB=/usr/bin/ranlib +ac_cv_prog_CC=/usr/unsupported/\ \ /lib/_cv_/cc +ac_cv_prog_CPP=/usr/bin/cpp +SHELL=bash +GNU=GNU!UNIX diff --git a/testsuite/cv-vars.sed b/testsuite/cv-vars.sed new file mode 100644 index 0000000..e49c73e --- /dev/null +++ b/testsuite/cv-vars.sed @@ -0,0 +1,2 @@ +# inspired by an autoconf generated configure script. +s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/: \${\1='\2'}/p diff --git a/testsuite/dc.good b/testsuite/dc.good new file mode 100644 index 0000000..7604d56 --- /dev/null +++ b/testsuite/dc.good @@ -0,0 +1,3 @@ +31 +March 2002 +1.6A09E667A diff --git a/testsuite/dc.inp b/testsuite/dc.inp new file mode 100644 index 0000000..e4b731b --- /dev/null +++ b/testsuite/dc.inp @@ -0,0 +1,14 @@ +# Compute Easter of 2002... +# usage: (echo YEAR; cat easter.dc) | dc + +2002 + +[ddsf[lfp[too early +]Pq]s@1583>@ +ddd19%1+sg100/1+d3*4/12-sx8*5+25/5-sz5*4/lx-10-sdlg11*20+lz+lx-30% +d[30+]s@0>@d[[1+]s@lg11<@]s@25=@d[1+]s@24=@se44le-d[30+]s@21>@dld+7%-7+ +[March ]smd[31-[April ]sm]s@31<@psnlmPpsn1z>p]splpx + +# Compute square root of 2 + +16oAk2vpq
\ No newline at end of file diff --git a/testsuite/dc.sed b/testsuite/dc.sed new file mode 100644 index 0000000..5267589 --- /dev/null +++ b/testsuite/dc.sed @@ -0,0 +1,322 @@ +#!/bin/sed -nf +# dc.sed - an arbitrary precision RPN calculator +# Created by Greg Ubben <gsu@romulus.ncsc.mil> early 1995, late 1996 +# +# Dedicated to MAC's memory of the IBM 1620 ("CADET") computer. +# @(#)GSU dc.sed 1.1 06-Mar-1999 [non-explanatory] +# +# Examples: +# sqrt(2) to 10 digits: echo "10k 2vp" | dc.sed +# 20 factorial: echo "[d1-d1<!*]s! 20l!xp" | dc.sed +# sin(ln(7)): echo "s(l(7))" | bc -c /usr/lib/lib.b | dc.sed +# hex to base 60: echo "60o16i 6B407.CAFE p" | dc.sed +# tests most of dc.sed: echo 16oAk2vp | dc.sed +# +# To debug or analyze, give the dc Y command as input or add it to +# embedded dc routines, or add the sed p command to the beginning of +# the main loop or at various points in the low-level sed routines. +# If you need to allow [|~] characters in the input, filter this +# script through "tr '|~' '\36\37'" first (or use dc.pl). +# +# Not implemented: ! \ +# But implemented: K Y t # !< !> != fractional-bases +# SunOS limits: 199/199 commands (though could pack in 10-20 more) +# Limitations: scale <= 999; |obase| >= 1; input digits in [0..F] +# Completed: 1am Feb 4, 1997 + +s/^/|P|K0|I10|O10|?~/ + +: next +s/|?./|?/ +s/|?#[ -}]*/|?/ +/|?!*[lLsS;:<>=]\{0,1\}$/N +/|?!*[-+*/%^<>=]/b binop +/^|.*|?[dpPfQXZvxkiosStT;:]/b binop +/|?[_0-9A-F.]/b number +/|?\[/b string +/|?l/b load +/|?L/b Load +/|?[sS]/b save +/|?c/ s/[^|]*// +/|?d/ s/[^~]*~/&&/ +/|?f/ s//&[pSbz0<aLb]dSaxsaLa/ +/|?x/ s/\([^~]*~\)\(.*|?x\)~*/\2\1/ +/|?[KIO]/ s/.*|\([KIO]\)\([^|]*\).*|?\1/\2~&/ +/|?T/ s/\.*0*~/~/ +# a slow, non-stackable array implementation in dc, just for completeness +# A fast, stackable, associative array implementation could be done in sed +# (format: {key}value{key}value...), but would be longer, like load & save. +/|?;/ s/|?;\([^{}]\)/|?~[s}s{L{s}q]S}[S}l\1L}1-d0>}s\1L\1l{xS\1]dS{xL}/ +/|?:/ s/|?:\([^{}]\)/|?~[s}L{s}L{s}L}s\1q]S}S}S{[L}1-d0>}S}l\1s\1L\1l{xS\1]dS{x/ +/|?[ ~ cdfxKIOT]/b next +/|?\n/b next +/|?[pP]/b print +/|?k/ s/^\([0-9]\{1,3\}\)\([.~].*|K\)[^|]*/\2\1/ +/|?i/ s/^\(-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}\)\(~.*|I\)[^|]*/\2\1/ +/|?o/ s/^\(-\{0,1\}[1-9][0-9]*\.\{0,1\}[0-9]*\)\(~.*|O\)[^|]*/\2\1/ +/|?[kio]/b pop +/|?t/b trunc +/|??/b input +/|?Q/b break +/|?q/b quit +h +/|?[XZz]/b count +/|?v/b sqrt +s/.*|?\([^Y]\).*/\1 is unimplemented/ +s/\n/\\n/g +l +g +b next + +: print +/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~.*|?p/!b Print +/|O10|/b Print + +# Print a number in a non-decimal output base. Uses registers a,b,c,d. +# Handles fractional output bases (O<-1 or O>=1), unlike other dc's. +# Converts the fraction correctly on negative output bases, unlike +# UNIX dc. Also scales the fraction more accurately than UNIX dc. +# +s,|?p,&KSa0kd[[-]Psa0la-]Sad0>a[0P]sad0=a[A*2+]saOtd0>a1-ZSd[[[[ ]P]sclb1\ +!=cSbLdlbtZ[[[-]P0lb-sb]sclb0>c1+]sclb0!<c[0P1+dld>c]scdld>cscSdLbP]q]Sb\ +[t[1P1-d0<c]scd0<c]ScO_1>bO1!<cO[16]<bOX0<b[[q]sc[dSbdA>c[A]sbdA=c[B]sbd\ +B=c[C]sbdC=c[D]sbdD=c[E]sbdE=c[F]sb]xscLbP]~Sd[dtdZOZ+k1O/Tdsb[.5]*[.1]O\ +X^*dZkdXK-1+ktsc0kdSb-[Lbdlb*lc+tdSbO*-lb0!=aldx]dsaxLbsb]sad1!>a[[.]POX\ ++sb1[SbO*dtdldx-LbO*dZlb!<a]dsax]sadXd0<asbsasaLasbLbscLcsdLdsdLdLak[]pP, +b next + +: Print +/|?p/s/[^~]*/&\ +~&/ +s/\(.*|P\)\([^|]*\)/\ +\2\1/ +s/\([^~]*\)\n\([^~]*\)\(.*|P\)/\1\3\2/ +h +s/~.*// +/./{ s/.//; p; } +# Just s/.//p would work if we knew we were running under the -n option. +# Using l vs p would kind of do \ continuations, but would break strings. +g + +: pop +s/[^~]*~// +b next + +: load +s/\(.*|?.\)\(.\)/\20~\1/ +s/^\(.\)0\(.*|r\1\([^~|]*\)~\)/\1\3\2/ +s/.// +b next + +: Load +s/\(.*|?.\)\(.\)/\2\1/ +s/^\(.\)\(.*|r\1\)\([^~|]*~\)/|\3\2/ +/^|/!i\ +register empty +s/.// +b next + +: save +s/\(.*|?.\)\(.\)/\2\1/ +/^\(.\).*|r\1/ !s/\(.\).*|/&r\1|/ +/|?S/ s/\(.\).*|r\1/&~/ +s/\(.\)\([^~]*~\)\(.*|r\1\)[^~|]*~\{0,1\}/\3\2/ +b next + +: quit +t quit +s/|?[^~]*~[^~]*~/|?q/ +t next +# Really should be using the -n option to avoid printing a final newline. +s/.*|P\([^|]*\).*/\1/ +q + +: break +s/[0-9]*/&;987654321009;/ +: break1 +s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^;]*\3\(9*\).*|?.\)[^~]*~/\1\5\6\4/ +t break1 +b pop + +: input +N +s/|??\(.*\)\(\n.*\)/|?\2~\1/ +b next + +: count +/|?Z/ s/~.*// +/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}$/ s/[-.0]*\([^.]*\)\.*/\1/ +/|?X/ s/-*[0-9A-F]*\.*\([0-9A-F]*\).*/\1/ +s/|.*// +/~/ s/[^~]//g + +s/./a/g +: count1 + s/a\{10\}/b/g + s/b*a*/&a9876543210;/ + s/a.\{9\}\(.\).*;/\1/ + y/b/a/ +/a/b count1 +G +/|?z/ s/\n/&~/ +s/\n[^~]*// +b next + +: trunc +# for efficiency, doesn't pad with 0s, so 10k 2 5/ returns just .40 +# The X* here and in a couple other places works around a SunOS 4.x sed bug. +s/\([^.~]*\.*\)\(.*|K\([^|]*\)\)/\3;9876543210009909:\1,\2/ +: trunc1 + s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^:]*X*\3\(9*\)[^,]*\),\([0-9]\)/\1\5\6\4\7,/ +t trunc1 +s/[^:]*:\([^,]*\)[^~]*/\1/ +b normal + +: number +s/\(.*|?\)\(_\{0,1\}[0-9A-F]*\.\{0,1\}[0-9A-F]*\)/\2~\1~/ +s/^_/-/ +/^[^A-F~]*~.*|I10|/b normal +/^[-0.]*~/b normal +s:\([^.~]*\)\.*\([^~]*\):[Ilb^lbk/,\1\2~0A1B2C3D4E5F1=11223344556677889900;.\2: +: digit + s/^\([^,]*\),\(-*\)\([0-F]\)\([^;]*\(.\)\3[^1;]*\(1*\)\)/I*+\1\2\6\5~,\2\4/ +t digit +s:...\([^/]*.\)\([^,]*\)[^.]*\(.*|?.\):\2\3KSb[99]k\1]SaSaXSbLalb0<aLakLbktLbk: +b next + +: string +/|?[^]]*$/N +s/\(|?[^]]*\)\[\([^]]*\)]/\1|{\2|}/ +/|?\[/b string +s/\(.*|?\)|{\(.*\)|}/\2~\1[/ +s/|{/[/g +s/|}/]/g +b next + +: binop +/^[^~|]*~[^|]/ !i\ +stack empty +//!b next +/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/[^~]*\(.*|?!*[^!=<>]\)/0\1/ +/^[^~]*~-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/~[^~]*\(.*|?!*[^!=<>]\)/~0\1/ +h +/|?\*/b mul +/|?\//b div +/|?%/b rem +/|?^/b exp + +/|?[+-]/ s/^\(-*\)\([^~]*~\)\(-*\)\([^~]*~\).*|?\(-\{0,1\}\).*/\2\4s\3o\1\3\5/ +s/\([^.~]*\)\([^~]*~[^.~]*\)\(.*\)/<\1,\2,\3|=-~.0,123456789<></ +/^<\([^,]*,[^~]*\)\.*0*~\1\.*0*~/ s/</=/ +: cmp1 + s/^\(<[^,]*\)\([0-9]\),\([^,]*\)\([0-9]\),/\1,\2\3,\4/ +t cmp1 +/^<\([^~]*\)\([^~]\)[^~]*~\1\(.\).*|=.*\3.*\2/ s/</>/ +/|?/{ + s/^\([<>]\)\(-[^~]*~-.*\1\)\(.\)/\3\2/ + s/^\(.\)\(.*|?!*\)\1/\2!\1/ + s/|?![^!]\(.\)/&l\1x/ + s/[^~]*~[^~]*~\(.*|?\)!*.\(.*\)|=.*/\1\2/ + b next +} +s/\(-*\)\1|=.*/;9876543210;9876543210/ +/o-/ s/;9876543210/;0123456789/ +s/^>\([^~]*~\)\([^~]*~\)s\(-*\)\(-*o\3\(-*\)\)/>\2\1s\5\4/ + +s/,\([0-9]*\)\.*\([^,]*\),\([0-9]*\)\.*\([0-9]*\)/\1,\2\3.,\4;0/ +: right1 + s/,\([0-9]\)\([^,]*\),;*\([0-9]\)\([0-9]*\);*0*/\1,\2\3,\4;0/ +t right1 +s/.\([^,]*\),~\(.*\);0~s\(-*\)o-*/\1~\30\2~/ + +: addsub1 + s/\(.\{0,1\}\)\(~[^,]*\)\([0-9]\)\(\.*\),\([^;]*\)\(;\([^;]*\(\3[^;]*\)\).*X*\1\(.*\)\)/\2,\4\5\9\8\7\6/ + s/,\([^~]*~\).\{10\}\(.\)[^;]\{0,9\}\([^;]\{0,1\}\)[^;]*/,\2\1\3/ +# could be done in one s/// if we could have >9 back-refs... +/^~.*~;/!b addsub1 + +: endbin +s/.\([^,]*\),\([0-9.]*\).*/\1\2/ +G +s/\n[^~]*~[^~]*// + +: normal +s/^\(-*\)0*\([0-9.]*[0-9]\)[^~]*/\1\2/ +s/^[^1-9~]*~/0~/ +b next + +: mul +s/\(-*\)\([0-9]*\)\.*\([0-9]*\)~\(-*\)\([0-9]*\)\.*\([0-9]*\).*|K\([^|]*\).*/\1\4\2\5.!\3\6,|\2<\3~\5>\6:\7;9876543210009909/ + +: mul1 + s/![0-9]\([^<]*\)<\([0-9]\{0,1\}\)\([^>]*\)>\([0-9]\{0,1\}\)/0!\1\2<\3\4>/ + /![0-9]/ s/\(:[^;]*\)\([1-9]\)\(0*\)\([^0]*\2\(.\).*X*\3\(9*\)\)/\1\5\6\4/ +/<~[^>]*>:0*;/!t mul1 + +s/\(-*\)\1\([^>]*\).*/;\2^>:9876543210aaaaaaaaa/ + +: mul2 + s/\([0-9]~*\)^/^\1/ + s/<\([0-9]*\)\(.*[~^]\)\([0-9]*\)>/\1<\2>\3/ + + : mul3 + s/>\([0-9]\)\(.*\1.\{9\}\(a*\)\)/\1>\2;9\38\37\36\35\34\33\32\31\30/ + s/\(;[^<]*\)\([0-9]\)<\([^;]*\).*\2[0-9]*\(.*\)/\4\1<\2\3/ + s/a[0-9]/a/g + s/a\{10\}/b/g + s/b\{10\}/c/g + /|0*[1-9][^>]*>0*[1-9]/b mul3 + + s/;/a9876543210;/ + s/a.\{9\}\(.\)[^;]*\([^,]*\)[0-9]\([.!]*\),/\2,\1\3/ + y/cb/ba/ +/|<^/!b mul2 +b endbin + +: div +# CDDET +/^[-.0]*[1-9]/ !i\ +divide by 0 +//!b pop +s/\(-*\)\([0-9]*\)\.*\([^~]*~-*\)\([0-9]*\)\.*\([^~]*\)/\2.\3\1;0\4.\5;0/ +: div1 + s/^\.0\([^.]*\)\.;*\([0-9]\)\([0-9]*\);*0*/.\1\2.\3;0/ + s/^\([^.]*\)\([0-9]\)\.\([^;]*;\)0*\([0-9]*\)\([0-9]\)\./\1.\2\30\4.\5/ +t div1 +s/~\(-*\)\1\(-*\);0*\([^;]*[0-9]\)[^~]*/~123456789743222111~\2\3/ +s/\(.\(.\)[^~]*\)[^9]*\2.\{8\}\(.\)[^~]*/\3~\1/ +s,|?.,&SaSadSaKdlaZ+LaX-1+[sb1]Sbd1>bkLatsbLa[dSa2lbla*-*dLa!=a]dSaxsakLasbLb*t, +b next + +: rem +s,|?%,&Sadla/LaKSa[999]k*Lak-, +b next + +: exp +# This decimal method is just a little faster than the binary method done +# totally in dc: 1LaKLb [kdSb*LbK]Sb [[.5]*d0ktdSa<bkd*KLad1<a]Sa d1<a kk* +/^[^~]*\./i\ +fraction in exponent ignored +s,[^-0-9].*,;9d**dd*8*d*d7dd**d*6d**d5d*d*4*d3d*2lbd**1lb*0, +: exp1 + s/\([0-9]\);\(.*\1\([d*]*\)[^l]*\([^*]*\)\(\**\)\)/;dd*d**d*\4\3\5\2/ +t exp1 +G +s,-*.\{9\}\([^9]*\)[^0]*0.\(.*|?.\),\2~saSaKdsaLb0kLbkK*+k1\1LaktsbkLax, +s,|?.,&SadSbdXSaZla-SbKLaLadSb[0Lb-d1lb-*d+K+0kkSb[1Lb/]q]Sa0>a[dk]sadK<a[Lb], +b next + +: sqrt +# first square root using sed: 8k2v at 1:30am Dec 17, 1996 +/^-/i\ +square root of negative number +/^[-0]/b next +s/~.*// +/^\./ s/0\([0-9]\)/\1/g +/^\./ !s/[0-9][0-9]/7/g +G +s/\n/~/ +s,|?.,&K1+k KSbSb[dk]SadXdK<asadlb/lb+[.5]*[sbdlb/lb+[.5]*dlb>a]dsaxsasaLbsaLatLbk K1-kt, +b next + +# END OF GSU dc.sed diff --git a/testsuite/distrib.good b/testsuite/distrib.good new file mode 100644 index 0000000..a6a8426 --- /dev/null +++ b/testsuite/distrib.good @@ -0,0 +1,29 @@ +Path: mailnewsgateway +From crash@cygnus.com Wed Mar 8 18: 02:42 1995 +From: crash@cygnus.com (Jason Molenda) +Message-ID: <9503090202.AA06931.alt.buddha.fat.short.guy@phydeaux.cygnus.com> +Subject: Note for sed testsuite +Original-To: molenda@msi.umn.edu +Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST) +X-Mailer: ELM [version 2.4 PL23] +Newsgroups: alt.buddha.short.fat.guy +Distribution: world +Sender: news@cygnus.com +Approved: alt.buddha.short.fat.guy@cygnus.com + + _Summum Bonum_ + + All the breath and the bloom of the + year in the bag of one bee: + All the wonder and wealth of the mine in + the heart of one gem: + In the core of one pearl all the shade and the + shine of the sea: + Breath and bloom, shade and shine, -- wonder, + wealth, and -- how far above them -- + Truth, thats brighter than gem, + Trust, that's purer than pearl, -- + Brightest truth, purest trust in the universe -- + all were for me + In the kiss of one girl. + -- Robert Browning diff --git a/testsuite/distrib.inp b/testsuite/distrib.inp new file mode 100644 index 0000000..ceaecec --- /dev/null +++ b/testsuite/distrib.inp @@ -0,0 +1,28 @@ +From crash@cygnus.com Wed Mar 8 18:02:42 1995 +Received: from s1.msi.umn.edu (s1.msi.umn.edu [128.101.24.1]) by cygnus.com (8.6.9/8.6.9) with ESMTP id SAA21692 for <crash@cygnus.com>; Wed, 8 Mar 1995 18:02:41 -0800 +Received: from cygint.cygnus.com (cygint.cygnus.com [140.174.1.1]) by s1.msi.umn.edu (8.6.10/8.6.9) with ESMTP id TAA13398 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 19:59:18 -0600 +Received: from phydeaux.cygnus.com (phydeaux.cygnus.com [140.174.1.85]) by cygnus.com (8.6.9/8.6.9) with SMTP id SAA21688 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 18:02:33 -0800 +From: Jason Molenda <crash@cygnus.com> +Received: by phydeaux.cygnus.com (5.65/4.7) id AA06931; Wed, 8 Mar 1995 18:02:28 -0800 +Message-Id: <9503090202.AA06931@phydeaux.cygnus.com> +Subject: Note for sed testsuite +To: molenda@msi.umn.edu +Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST) +X-Mailer: ELM [version 2.4 PL23] + + _Summum Bonum_ + + All the breath and the bloom of the + year in the bag of one bee: + All the wonder and wealth of the mine in + the heart of one gem: + In the core of one pearl all the shade and the + shine of the sea: + Breath and bloom, shade and shine, -- wonder, + wealth, and -- how far above them -- + Truth, thats brighter than gem, + Trust, that's purer than pearl, -- + Brightest truth, purest trust in the universe -- + all were for me + In the kiss of one girl. + -- Robert Browning diff --git a/testsuite/distrib.sed b/testsuite/distrib.sed new file mode 100644 index 0000000..918b30f --- /dev/null +++ b/testsuite/distrib.sed @@ -0,0 +1,56 @@ +# This is straight out of C News +# +# +# All this does is massage the headers so they look like what news +# software expects. To:, Cc: and Resent-*: headers are masked. +# Reply-To: is turned into references, which is questionable (could +# just as well be dropped. +# +# The From: line is rewritten to use the "address (comments)" form +# instead of "phrase <route>" form our mailer uses. Also, addresses +# with no "@domainname" are assumed to originate locally, and so are +# given a domain. +# +# The Sender: field below reflects the address of the person who +# maintains our mailing lists. The Approved: field is in a special +# form, so that we can do bidirectional gatewaying. Any message +# in a newsgroup that bears this stamp will not be fed into the +# matching mailing list. + +1i\ +Path: mailnewsgateway + :a + /^[Rr]eceived:/b r + /^[Nn]ewsgroups:/b r + /^[Pp]ath:/b r + /^[Tt][Oo]:/s/^/Original-/ + /^[Cc][Cc]:/s/^/Original-/ + /^[Rr][Ee][Ss][Ee][Nn][Tt]-.*/s/^/Original-/ + /^[Mm][Ee][Ss][Ss][Aa][Gg][Ee]-[Ii][Dd]:/s/@/.alt.buddha.fat.short.guy@/ + s/^[Ii]n-[Rr]eply-[Tt]o:/References:/ + /^From:/{ + s/<\([^@]*\)>$/<\1@$thissite>/ + s/^From:[ ][ ]*\(.*\) *<\(.*\)>$/From: \2 (\1)/ + } + s/-[Ii]d:/-ID:/ + s/^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]:[ ]*$/Subject: (none)/ + s/^\([^:]*:\)[ ]*/\1 / + /^$/{i\ +Newsgroups: alt.buddha.short.fat.guy\ +Distribution: world\ +Sender: news@cygnus.com\ +Approved: alt.buddha.short.fat.guy@cygnus.com + b e + } + p + n + b a + :r + s/.*//g + n + /^[ ]/b r + b a + :e + p + n + b e diff --git a/testsuite/distrib.sh b/testsuite/distrib.sh new file mode 100644 index 0000000..dbadbdc --- /dev/null +++ b/testsuite/distrib.sh @@ -0,0 +1,63 @@ +#! /bin/sh +# +# This is stolen from C News +# + + +# +# All this does is massage the headers so they look like what news +# software expects. To:, Cc: and Resent-*: headers are masked. +# Reply-To: is turned into references, which is questionable (could +# just as well be dropped. +# +# The From: line is rewritten to use the "address (comments)" form +# instead of "phrase <route>" form our mailer uses. Also, addresses +# with no "@domainname" are assumed to originate locally, and so are +# given a domain. +# +# The Sender: field below reflects the address of the person who +# maintains our mailing lists. The Approved: field is in a special +# form, so that we can do bidirectional gatewaying. Any message +# in a newsgroup that bears this stamp will not be fed into the +# matching mailing list. + +sed=${1-sed} + +$sed -n -e "1{i\\ +Path: mailnewsgateway + }" \ + -e ":a + /^[Rr]eceived:/b r + /^[Nn]ewsgroups:/b r + /^[Pp]ath:/b r + /^[Tt][Oo]:/s/^/Original-/ + /^[Cc][Cc]:/s/^/Original-/ + /^[Rr][Ee][Ss][Ee][Nn][Tt]-.*/s/^/Original-/ + /^[Mm][Ee][Ss][Ss][Aa][Gg][Ee]-[Ii][Dd]:/s/@/.alt.buddha.fat.short.guy@/ + s/^[Ii]n-[Rr]eply-[Tt]o:/References:/ + /^From:/{ + s/<\([^@]*\)>\$/<\1@$thissite>/ + s/^From:[ ][ ]*\(.*\) *<\(.*\)>\$/From: \2 (\1)/ + } + s/-[Ii]d:/-ID:/ + s/^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]:[ ]*$/Subject: (none)/ + s/^\([^:]*:\)[ ]*/\1 / + /^\$/{i\\ +Newsgroups: alt.buddha.short.fat.guy\\ +Distribution: world\\ +Sender: news@cygnus.com\\ +Approved: alt.buddha.short.fat.guy@cygnus.com + b e + } + p + n + b a + :r + s/.*//g + n + /^[ ]/b r + b a + :e + p + n + b e" diff --git a/testsuite/dollar.good b/testsuite/dollar.good new file mode 100644 index 0000000..23e072a --- /dev/null +++ b/testsuite/dollar.good @@ -0,0 +1,4 @@ +I can't quite remember where I heard it, +but I can't seem to get out of my head +the phrase +space the final frontier diff --git a/testsuite/dollar.inp b/testsuite/dollar.inp new file mode 100644 index 0000000..9267e98 --- /dev/null +++ b/testsuite/dollar.inp @@ -0,0 +1,4 @@ +I can't quite remember where I heard it, +but I can't seem to get out of my head +the phrase +the final frontier diff --git a/testsuite/dollar.sed b/testsuite/dollar.sed new file mode 100644 index 0000000..5fbb35c --- /dev/null +++ b/testsuite/dollar.sed @@ -0,0 +1 @@ +$s/^/space / diff --git a/testsuite/empty.good b/testsuite/empty.good new file mode 100644 index 0000000..07e1a15 --- /dev/null +++ b/testsuite/empty.good @@ -0,0 +1,2 @@ +x + diff --git a/testsuite/empty.inp b/testsuite/empty.inp new file mode 100644 index 0000000..07e1a15 --- /dev/null +++ b/testsuite/empty.inp @@ -0,0 +1,2 @@ +x + diff --git a/testsuite/empty.sed b/testsuite/empty.sed new file mode 100644 index 0000000..b35aed6 --- /dev/null +++ b/testsuite/empty.sed @@ -0,0 +1 @@ +s/^ *// diff --git a/testsuite/enable.good b/testsuite/enable.good new file mode 100644 index 0000000..c6588ba --- /dev/null +++ b/testsuite/enable.good @@ -0,0 +1,3 @@ +targets +x11-testing +wollybears-in-minnesota diff --git a/testsuite/enable.inp b/testsuite/enable.inp new file mode 100644 index 0000000..4509a8d --- /dev/null +++ b/testsuite/enable.inp @@ -0,0 +1,3 @@ +--enable-targets=sparc-sun-sunos4.1.3,srec +--enable-x11-testing=on +--enable-wollybears-in-minnesota=yes-id-like-that diff --git a/testsuite/enable.sed b/testsuite/enable.sed new file mode 100644 index 0000000..0d2a208 --- /dev/null +++ b/testsuite/enable.sed @@ -0,0 +1,2 @@ +# inspired by an autoconf generated configure script. +s/-*enable-//;s/=.*// diff --git a/testsuite/eval.good b/testsuite/eval.good new file mode 100644 index 0000000..6fd021b --- /dev/null +++ b/testsuite/eval.good @@ -0,0 +1,40 @@ +abcd +--- +abcd +--- +abcd +--- +17380: 2 2 5 11 79 +cpu +--- +17380: 2 2 5 11 79 +cpu +--- +17380: 2 2 5 11 79 +cpu +--- + abcd +--- + abcd +--- + abcd +--- +17380: 2 2 5 11 79 + cpu +--- +17380: 2 2 5 11 79 + cpu +--- +17380: 2 2 5 11 79 + cpu +--- +Doing some more tests ----------------------- +17380: 2 2 5 11 79 +--- +../sed/sed 1q eval.in2 +--- +17380: 2 2 5 11 79 +--- +../sed/sed 1q eval.in2 +--- +../sed/sed 1q eval.in2 diff --git a/testsuite/eval.inp b/testsuite/eval.inp new file mode 100644 index 0000000..4e30989 --- /dev/null +++ b/testsuite/eval.inp @@ -0,0 +1,5 @@ +17380: 2 2 5 11 79 +abcd +cpu + abcd + cpu diff --git a/testsuite/eval.sed b/testsuite/eval.sed new file mode 100644 index 0000000..5734786 --- /dev/null +++ b/testsuite/eval.sed @@ -0,0 +1,46 @@ +1d + + #Try eval command + /cpu/!b2 + e../sed/sed 1q eval.in2 + +:2 +p +i--- +h + + #Try eval option + s,.* *cpu *,../sed/sed 1q eval.in2; echo "&",e + +:3 +p +g +i--- + + h + #Try eval option with print + s,.* *cpu.*,../sed/sed 1q eval.in2,ep + g + + +:4 +p +i--- + +$!d + +#Do some more tests +s/.*/Doing some more tests -----------------------/p +s,.*,../sed/sed 1q eval.in2,ep +i--- +s,.*,../sed/sed 1q eval.in2,pe +i--- +s,.*,../sed/sed 1q eval.in2, +h +e +p +g +i--- +s/^/echo /ep +i--- +s/^fubar$/echo wozthis/e diff --git a/testsuite/factor.good b/testsuite/factor.good new file mode 100644 index 0000000..c703182 --- /dev/null +++ b/testsuite/factor.good @@ -0,0 +1,15 @@ +2 +3 +2 +2 +5 +2 +2 +2 +11 +2 +2 +2 +2 +13 +11 diff --git a/testsuite/factor.inp b/testsuite/factor.inp new file mode 100644 index 0000000..1c2e796 --- /dev/null +++ b/testsuite/factor.inp @@ -0,0 +1,8 @@ +2 +3 +4 +5 +8 +11 +16 +143 diff --git a/testsuite/factor.sed b/testsuite/factor.sed new file mode 100644 index 0000000..4416e35 --- /dev/null +++ b/testsuite/factor.sed @@ -0,0 +1,76 @@ +#! /bin/sed -nf + +s/.*/&;9aaaaaaaaa8aaaaaaaa7aaaaaaa6aaaaaa5aaaaa4aaaa3aaa2aa1a0/ +:encode +s/\(a*\)\([0-9]\)\([0-9]*;.*\2\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4\3/ +tencode +s/;.*// + +# Compute a few common factors for speed. Clear the subst flag +t7a + +# These are placed here to make the flow harder to understand :-) +:2 +a\ +2 +b2a +:3 +a\ +3 +b3a +:5 +a\ +5 +b5a +:7 +a\ +7 + +:7a +s/^\(aa*\)\1\{6\}$/\1/ +t7 +:5a +s/^\(aa*\)\1\{4\}$/\1/ +t5 +:3a +s/^\(aa*\)\1\1$/\1/ +t3 +:2a +s/^\(aa*\)\1$/\1/ +t2 + +/^a$/b + +# The quotient of dividing by 11 is a limit to the remaining prime factors +s/^\(aa*\)\1\{10\}/\1=&/ + +# Pattern space looks like CANDIDATE\nNUMBER. When a candidate is valid, +# the number is divided and the candidate is tried again +:factor +/^\(a\{7,\}\)=\1\1*$/! { + # Decrement CANDIDATE, and search again if it is still >1 + s/^a// + /^aa/b factor + + # Print the last remaining factor: since it is stored in the NUMBER + # rather than in the CANDIDATE, swap 'em: now NUMBER=1 + s/\(.*\)=\(.*\)/\2=\1/ +} + +# We have a prime factor in CANDIDATE! Print it +h +s/=.*/;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9/ + +:decode +s/^\(a*\)\1\{9\}\(a\{0,9\}\)\([0-9]*;.*[^a]\2\([0-9]\)\)/\1\4\3/ +/^a/tdecode +s/;.*//p + +g +:divide +s/^\(a*\)\(=b*\)\1/\1\2b/ +tdivide +y/b/a/ + +# If NUMBER = 1, we don't have any more factors +/aa$/bfactor diff --git a/testsuite/fasts.good b/testsuite/fasts.good new file mode 100644 index 0000000..d1c7e4a --- /dev/null +++ b/testsuite/fasts.good @@ -0,0 +1,14 @@ +aaaaaabbbbbbaaaaaaa +bbbbbb +aaaaaabbbbbbaaaaaaa +aaaaaabbbbbbaaaaaaa +aaaaaaabbbbbbaaaaaaa +aaaaaabbbbbbaaaaaaa +aaaaaaabbbbbbaaaaaa +bbbbbbbbbbbbbbbbbbb + +bbbbbbbbbbbbbbbbbbb +bbbbbbbbbbbbbbbbbbb +bbbbbbbbbbbbbbbbbbb +bbbbbbbbbbbbbbbbbbb +bbbbbbbbbbbbbbbbbbbb diff --git a/testsuite/fasts.inp b/testsuite/fasts.inp new file mode 100644 index 0000000..361e17b --- /dev/null +++ b/testsuite/fasts.inp @@ -0,0 +1 @@ +aaaaaaabbbbbbaaaaaaa diff --git a/testsuite/fasts.sed b/testsuite/fasts.sed new file mode 100644 index 0000000..5e482f7 --- /dev/null +++ b/testsuite/fasts.sed @@ -0,0 +1,46 @@ +# test `fast' substitutions + +h +s/a// +p +g +s/a//g +p +g +s/^a//p +g +s/^a//g +p +g +s/not present//g +p +g +s/^[a-z]//g +p +g +s/a$// +p +g + +y/a/b/ +h +s/b// +p +g +s/b//g +p +g +s/^b//p +g +s/^b//g +p +g +s/^[a-z]//g +p +g +s/b$// +p +g + + + diff --git a/testsuite/flipcase.good b/testsuite/flipcase.good new file mode 100644 index 0000000..9fcffa2 --- /dev/null +++ b/testsuite/flipcase.good @@ -0,0 +1,25 @@ +09 - 02 - 2002 00.00 Tg La7 La7 - +09 - 02 - 2002 00.00 Brand New Tmc 2 - +09 - 02 - 2002 00.10 Tg1 Notte Rai Uno - +09 - 02 - 2002 00.15 Tg Parlamento Rai Due - +09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 - +09 - 02 - 2002 00.20 Berserk - La Confessione Di Gatz Italia 1 Cartoon +09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai Tre - +09 - 02 - 2002 00.25 Meteo 2 Rai Due - +09 - 02 - 2002 00.30 Appuntamento Al Cinema Rai Due - +09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre - +09 - 02 - 2002 00.35 Profiler Rai Due - +09 - 02 - 2002 00.35 Stampa Oggi - Che Tempo Fa Rai Uno - +09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno - +09 - 02 - 2002 00.45 Bollettino Della Neve Rete 4 News +09 - 02 - 2002 00.50 Studio Aperto - La Giornata Italia 1 News +09 - 02 - 2002 00.50 Bocca A Bocca - 2 Tempo Rete 4 Film +09 - 02 - 2002 01.00 Appuntamento Al Cinema Rai Tre - +09 - 02 - 2002 01.00 Music Non Stop Tmc 2 - +09 - 02 - 2002 01.00 Studio Sport Italia 1 Sport +09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News +09 - 02 - 2002 01.05 Fuori Orario. Cose (Mai) Viste Rai Tre - +09 - 02 - 2002 01.15 Rainotte Rai Due - +09 - 02 - 2002 01.15 Sottovoce Rai Uno - +09 - 02 - 2002 01.15 Giochi Olimpici Invernali - Cerimonia Di Apertura Rai Tre - +09 - 02 - 2002 01.17 Italia Interroga Rai Due - diff --git a/testsuite/flipcase.inp b/testsuite/flipcase.inp new file mode 100644 index 0000000..f91ec11 --- /dev/null +++ b/testsuite/flipcase.inp @@ -0,0 +1,25 @@ +09 - 02 - 2002 00.00 Tg La7 La7 - +09 - 02 - 2002 00.00 Brand New Tmc 2 - +09 - 02 - 2002 00.10 Tg1 Notte Rai Uno - +09 - 02 - 2002 00.15 Tg Parlamento Rai Due - +09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 - +09 - 02 - 2002 00.20 Berserk - La CoNFESSIONE Di Gatz Italia 1 Cartoon +09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai TrE - +09 - 02 - 2002 00.25 Meteo 2 Rai Due - +09 - 02 - 2002 00.30 Appuntamento Al CinEMA RaI Due - +09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre - +09 - 02 - 2002 00.35 Profiler Rai Due - +09 - 02 - 2002 00.35 Stampa OggI - Che Tempo Fa Rai Uno - +09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno - +09 - 02 - 2002 00.45 BollettINO Della NEVE RETE 4 News +09 - 02 - 2002 00.50 STUDIO Aperto - La Giornata Italia 1 News +09 - 02 - 2002 00.50 BOCCA A Bocca - 2 Tempo Rete 4 Film +09 - 02 - 2002 01.00 AppuntAMENTO Al Cinema Rai Tre - +09 - 02 - 2002 01.00 Music NoN Stop Tmc 2 - +09 - 02 - 2002 01.00 Studio SpORT Italia 1 SporT +09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News +09 - 02 - 2002 01.05 Fuori Orario. CosE (Mai) Viste Rai Tre - +09 - 02 - 2002 01.15 RAINOTTE Rai Due - +09 - 02 - 2002 01.15 Sottovoce Rai Uno - +09 - 02 - 2002 01.15 GiOCHI Olimpici InVERNALI - CERIMONIA Di Apertura Rai Tre - +09 - 02 - 2002 01.17 Italia Interroga Rai Due - diff --git a/testsuite/flipcase.sed b/testsuite/flipcase.sed new file mode 100644 index 0000000..211d0d0 --- /dev/null +++ b/testsuite/flipcase.sed @@ -0,0 +1 @@ +s,\([^A-Za-z]*\)\([A-Za-z]*\),\1\L\u\2,g
\ No newline at end of file diff --git a/testsuite/head.good b/testsuite/head.good new file mode 100644 index 0000000..6392831 --- /dev/null +++ b/testsuite/head.good @@ -0,0 +1,3 @@ + "...by imposing a tiny bit of order in a communication you are + translating, you are carving out a little bit of order in the + universe. You will never succeed. Everything will fail and come diff --git a/testsuite/head.inp b/testsuite/head.inp new file mode 100644 index 0000000..5c4b4a4 --- /dev/null +++ b/testsuite/head.inp @@ -0,0 +1,9 @@ + "...by imposing a tiny bit of order in a communication you are + translating, you are carving out a little bit of order in the + universe. You will never succeed. Everything will fail and come + to an end finally. But you have a chance to carve a little bit + of order and maybe even beauty out of the raw materials that + surround you everywhere, and I think there is no greater meaning + in life." + + Donald L. Philippi, Oct 1930 - Jan 1993 diff --git a/testsuite/head.sed b/testsuite/head.sed new file mode 100644 index 0000000..d8ea37d --- /dev/null +++ b/testsuite/head.sed @@ -0,0 +1 @@ +3q diff --git a/testsuite/inclib.good b/testsuite/inclib.good new file mode 100644 index 0000000..6b1279a --- /dev/null +++ b/testsuite/inclib.good @@ -0,0 +1,34 @@ + /usr/X11R6/include + /usr/X11R5/include + /usr/X11R4/include + + /usr/include/X11R6 + /usr/include/X11R5 + /usr/include/X11R4 + + /usr/local/X11R6/include + /usr/local/X11R5/include + /usr/local/X11R4/include + + /usr/local/include/X11R6 + /usr/local/include/X11R5 + /usr/local/include/X11R4 + + /usr/X11/include + /usr/include/X11 + /usr/local/X11/include + /usr/local/include/X11 + + /usr/X386/include + /usr/x386/include + /usr/XFree86/include/X11 + + /usr/include + /usr/local/include + /usr/unsupported/include + /usr/athena/include + /usr/local/x11r5/include + /usr/lpp/Xamples/include + + /usr/openwin/include + /usr/openwin/share/include diff --git a/testsuite/inclib.inp b/testsuite/inclib.inp new file mode 100644 index 0000000..552e9e2 --- /dev/null +++ b/testsuite/inclib.inp @@ -0,0 +1,34 @@ + /usr/X11R6/lib + /usr/X11R5/lib + /usr/X11R4/lib + + /usr/lib/X11R6 + /usr/lib/X11R5 + /usr/lib/X11R4 + + /usr/local/X11R6/lib + /usr/local/X11R5/lib + /usr/local/X11R4/lib + + /usr/local/lib/X11R6 + /usr/local/lib/X11R5 + /usr/local/lib/X11R4 + + /usr/X11/lib + /usr/lib/X11 + /usr/local/X11/lib + /usr/local/lib/X11 + + /usr/X386/lib + /usr/x386/lib + /usr/XFree86/lib/X11 + + /usr/lib + /usr/local/lib + /usr/unsupported/lib + /usr/athena/lib + /usr/local/x11r5/lib + /usr/lpp/Xamples/lib + + /usr/openwin/lib + /usr/openwin/share/lib diff --git a/testsuite/inclib.sed b/testsuite/inclib.sed new file mode 100644 index 0000000..528f158 --- /dev/null +++ b/testsuite/inclib.sed @@ -0,0 +1,2 @@ +# inspired by an autoconf generated configure script. +s;lib;include; diff --git a/testsuite/insens.good b/testsuite/insens.good new file mode 100644 index 0000000..6fd1bc1 --- /dev/null +++ b/testsuite/insens.good @@ -0,0 +1,2 @@ +1.2.3 +1.2.3 diff --git a/testsuite/insens.inp b/testsuite/insens.inp new file mode 100644 index 0000000..baefc12 --- /dev/null +++ b/testsuite/insens.inp @@ -0,0 +1 @@ +Version: 1.2.3 diff --git a/testsuite/insens.sed b/testsuite/insens.sed new file mode 100644 index 0000000..afab9fa --- /dev/null +++ b/testsuite/insens.sed @@ -0,0 +1,4 @@ +h +s/Version: *//p +g +s/version: *//Ip diff --git a/testsuite/khadafy.good b/testsuite/khadafy.good new file mode 100644 index 0000000..e719f4e --- /dev/null +++ b/testsuite/khadafy.good @@ -0,0 +1,32 @@ +1) Muammar Qaddafi +2) Mo'ammar Gadhafi +3) Muammar Kaddafi +4) Muammar Qadhafi +5) Moammar El Kadhafi +6) Muammar Gadafi +7) Mu'ammar al-Qadafi +8) Moamer El Kazzafi +9) Moamar al-Gaddafi +10) Mu'ammar Al Qathafi +11) Muammar Al Qathafi +12) Mo'ammar el-Gadhafi +13) Moamar El Kadhafi +14) Muammar al-Qadhafi +15) Mu'ammar al-Qadhdhafi +16) Mu'ammar Qadafi +17) Moamar Gaddafi +18) Mu'ammar Qadhdhafi +19) Muammar Khaddafi +20) Muammar al-Khaddafi +21) Mu'amar al-Kadafi +22) Muammar Ghaddafy +23) Muammar Ghadafi +24) Muammar Ghaddafi +25) Muamar Kaddafi +26) Muammar Quathafi +27) Muammar Gheddafi +28) Muamar Al-Kaddafi +29) Moammar Khadafy +30) Moammar Qudhafi +31) Mu'ammar al-Qaddafi +32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi diff --git a/testsuite/khadafy.inp b/testsuite/khadafy.inp new file mode 100644 index 0000000..e719f4e --- /dev/null +++ b/testsuite/khadafy.inp @@ -0,0 +1,32 @@ +1) Muammar Qaddafi +2) Mo'ammar Gadhafi +3) Muammar Kaddafi +4) Muammar Qadhafi +5) Moammar El Kadhafi +6) Muammar Gadafi +7) Mu'ammar al-Qadafi +8) Moamer El Kazzafi +9) Moamar al-Gaddafi +10) Mu'ammar Al Qathafi +11) Muammar Al Qathafi +12) Mo'ammar el-Gadhafi +13) Moamar El Kadhafi +14) Muammar al-Qadhafi +15) Mu'ammar al-Qadhdhafi +16) Mu'ammar Qadafi +17) Moamar Gaddafi +18) Mu'ammar Qadhdhafi +19) Muammar Khaddafi +20) Muammar al-Khaddafi +21) Mu'amar al-Kadafi +22) Muammar Ghaddafy +23) Muammar Ghadafi +24) Muammar Ghaddafi +25) Muamar Kaddafi +26) Muammar Quathafi +27) Muammar Gheddafi +28) Muamar Al-Kaddafi +29) Moammar Khadafy +30) Moammar Qudhafi +31) Mu'ammar al-Qaddafi +32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi diff --git a/testsuite/khadafy.sed b/testsuite/khadafy.sed new file mode 100644 index 0000000..8ac81c0 --- /dev/null +++ b/testsuite/khadafy.sed @@ -0,0 +1,2 @@ +# The Khadafy test is brought to you by Scott Anderson . . . +/M[ou]'\{0,1\}am\{1,2\}[ae]r .*\([AEae]l[- ]\)\{0,1\}[GKQ]h\{0,1\}[aeu]\{1,\}\([dtz][dhz]\{0,1\}\)\{1,\}af[iy]/!d diff --git a/testsuite/linecnt.good b/testsuite/linecnt.good new file mode 100644 index 0000000..3cc1bd6 --- /dev/null +++ b/testsuite/linecnt.good @@ -0,0 +1,110 @@ +1 +A dialogue on poverty +2 + +3 + On the night when the rain beats, +4 + Driven by the wind, +5 + On the night when the snowflakes mingle +6 + With a sleety rain, +7 + I feel so helplessly cold. +8 + I nibble at a lump of salt, +9 + Sip the hot, oft-diluted dregs of _sake_; +10 + And coughing, snuffling, +11 + And stroking my scanty beard, +12 + I say in my pride, +13 + "There's none worthy, save I!" +14 + But I shiver still with cold. +15 + I pull up my hempen bedclothes, +16 + Wear what few sleeveless clothes I have, +17 + But cold and bitter is the night! +18 + As for those poorer than myself, +19 + Their parents must be cold and hungry, +20 + Their wives and children beg and cry. +21 + Then, how do you struggle through life? +22 + +23 + Wide as they call the heaven and earth, +24 + For me they have shrunk quite small; +25 + Bright though they call the sun and moon, +26 + They never shine for me. +27 + Is it the same with all men, +28 + Or for me alone? +29 + By rare chance I was born a man +30 + And no meaner than my fellows, +31 + But, wearing unwadded sleeveless clothes +32 + In tatters, like weeds waving in the sea, +33 + Hanging from my shoulders, +34 + And under the sunken roof, +35 + Within the leaning walls, +36 + Here I lie on straw +37 + Spread on bare earth, +38 + With my parents at my pillow, +39 + And my wife and children at my feet, +40 + All huddled in grief and tears. +41 + No fire sends up smoke +42 + At the cooking-place, +43 + And in the cauldron +44 + A spider spins its web. +45 + With not a grain to cook, +46 + We moan like the night thrush. +47 + Then, "to cut," as the saying is, +48 + "The ends of what is already too short," +49 + The village headman comes, +50 + With rod in hand, to our sleeping place, +51 + Growling for his dues. +52 + Must it be so hopeless -- +53 + The way of this world? +54 + +55 + -- Yamanoue Okura diff --git a/testsuite/linecnt.inp b/testsuite/linecnt.inp new file mode 100644 index 0000000..9eb6070 --- /dev/null +++ b/testsuite/linecnt.inp @@ -0,0 +1,55 @@ +A dialogue on poverty + + On the night when the rain beats, + Driven by the wind, + On the night when the snowflakes mingle + With a sleety rain, + I feel so helplessly cold. + I nibble at a lump of salt, + Sip the hot, oft-diluted dregs of _sake_; + And coughing, snuffling, + And stroking my scanty beard, + I say in my pride, + "There's none worthy, save I!" + But I shiver still with cold. + I pull up my hempen bedclothes, + Wear what few sleeveless clothes I have, + But cold and bitter is the night! + As for those poorer than myself, + Their parents must be cold and hungry, + Their wives and children beg and cry. + Then, how do you struggle through life? + + Wide as they call the heaven and earth, + For me they have shrunk quite small; + Bright though they call the sun and moon, + They never shine for me. + Is it the same with all men, + Or for me alone? + By rare chance I was born a man + And no meaner than my fellows, + But, wearing unwadded sleeveless clothes + In tatters, like weeds waving in the sea, + Hanging from my shoulders, + And under the sunken roof, + Within the leaning walls, + Here I lie on straw + Spread on bare earth, + With my parents at my pillow, + And my wife and children at my feet, + All huddled in grief and tears. + No fire sends up smoke + At the cooking-place, + And in the cauldron + A spider spins its web. + With not a grain to cook, + We moan like the night thrush. + Then, "to cut," as the saying is, + "The ends of what is already too short," + The village headman comes, + With rod in hand, to our sleeping place, + Growling for his dues. + Must it be so hopeless -- + The way of this world? + + -- Yamanoue Okura diff --git a/testsuite/linecnt.sed b/testsuite/linecnt.sed new file mode 100644 index 0000000..3134d36 --- /dev/null +++ b/testsuite/linecnt.sed @@ -0,0 +1 @@ += diff --git a/testsuite/mac-mf.good b/testsuite/mac-mf.good new file mode 100644 index 0000000..9be165d --- /dev/null +++ b/testsuite/mac-mf.good @@ -0,0 +1,200 @@ +## config:mac-pre.in +## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree + +# +# MPW-style lines for the MakeFile +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make +# +# This first part is long enough that NFS:Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make + +# +# End of MPW-style lines for MakeFile +# + +WHAT = mac + +# Directory syntax Ä +R= +C= +S=: +U=: + +BUILDTOP = ::: +srcdir = + +# FIXME Ä This doesn't translate to MPW yet, srcdir must be same as objdir +# File in object dir can come from either the current dir or srcdir +# +# . Ä . "{srcdir}" + +# Default rule that puts each file into separate segment + +.c.o Ä .c + {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o + +CPPFLAGS = -i {SRCTOP}:include -i {BUILDTOP}:include -i {SRCTOP}:include:krb5 -i {BUILDTOP}:include:krb5 -i {CIncludes} +DEFS = {CPPFLAGS} +CC = c +LD = link +# The funny quoting in the LDFLAGS is to avoid xxx.c.o being mangled by +# mac-mf.sed into xxx.c.o +LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o +CCOPTS = +LIBS = +KRB5ROOT= @KRB5ROOT@ +KRB4=@KRB4@ +INSTALL=Duplicate -y +INSTALL_PROGRAM=Duplicate -y +INSTALL_DATA=Duplicate -y +INSTALL_SETUID=Duplicate -y + +KRB5MANROOT = {KRB5ROOT}{S}man +ADMIN_BINDIR = {KRB5ROOT}{S}admin +SERVER_BINDIR = {KRB5ROOT}{S}sbin +CLIENT_BINDIR = {KRB5ROOT}{S}bin +ADMIN_MANDIR = {KRB5MANROOT}{S}man8 +SERVER_MANDIR = {KRB5MANROOT}{S}man8 +CLIENT_MANDIR = {KRB5MANROOT}{S}man1 +FILE_MANDIR = {KRB5MANROOT}{S}man5 +KRB5_LIBDIR = {KRB5ROOT}{S}lib +KRB5_INCDIR = {KRB5ROOT}{S}include +KRB5_INCSUBDIRS = ¶ + {KRB5_INCDIR}{S}krb5 ¶ + {KRB5_INCDIR}{S}asn.1 ¶ + {KRB5_INCDIR}{S}kerberosIV + + +RM = Delete -y -i +CP = Duplicate -y +MV = mv -f +CHMOD=chmod +RANLIB = @RANLIB@ +ARCHIVE = @ARCHIVE@ +ARADD = @ARADD@ +LN = @LN_S@ +AWK = @AWK@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +YACC = @YACC@ + +# FIXME Ä This won't work for srcdir != objdir. But on the Mac, there +# is no easy way to build a relative or absolute path, because Ä means +# both the path separator, and the "go up a directory" indicator +#SRCTOP = {srcdir}{S}{BUILDTOP} +SRCTOP = {BUILDTOP} +SUBDIRS = @subdirs@ + +TOPLIBD = {BUILDTOP}{S}lib + +OBJEXT = c.o +LIBEXT = a +EXEEXT = + +all ÄÄ +# Generated automatically from Makefile.in by configure +CFLAGS = {CCOPTS} {DEFS} -i ::des + +##DOSBUILDTOP = ..\..\: +##DOSLIBNAME=..\crypto.lib +##DOS!include {BUILDTOP}\config\windows.in + +OBJS= md5.{OBJEXT} md5glue.{OBJEXT} md5crypto.{OBJEXT} + +SRCS= md5.c md5glue.c md5crypto.c + +all ÄÄ {OBJS} + +t_mddriver Ä t_mddriver.c.o md5.c.o + Link {LDFLAGS} -o t_mddriver t_mddriver.c.o md5.c.o + +t_mddriver.exe Ä + {CC} {CFLAGS2} -o t_mddriver.exe t_mddriver.c md5.c + +check ÄÄ t_mddriver{EXEEXT} + {C}t_mddriver{EXEEXT} -x + +clean ÄÄ + {RM} t_mddriver{EXEEXT} t_mddriver.{OBJEXT} +# config:post.in +# put all ÄÄ first just in case no other rules occur here +# +all ÄÄ + +check ÄÄ + +clean ÄÄ clean-{WHAT} + {RM} config.log pre.c.out post.c.out Makefile.c.out + +clean-unix ÄÄ + if test -n "{OBJS}" ; then {RM} {OBJS}; else Ä ; fi + +clean-windows ÄÄ + {RM} Å.{OBJEXT} + {RM} msvc.pdb Å.err diff --git a/testsuite/mac-mf.inp b/testsuite/mac-mf.inp new file mode 100644 index 0000000..3adaee2 --- /dev/null +++ b/testsuite/mac-mf.inp @@ -0,0 +1,200 @@ +## config/mac-pre.in +## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree. + +# +# MPW-style lines for the MakeFile. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. +# +# This first part is long enough that NFS/Share doesn't notice the non-ASCII +# characters in the rest of the file, so it claims that the file is type +# TEXT, which is what we want. The non-ASCII chars are necessary for MPW +# Make. + +# +# End of MPW-style lines for MakeFile. +# + +WHAT = mac + +# Directory syntax: +R= +C= +S=: +U=: + +BUILDTOP = ../../.. +srcdir = . + +# FIXME: This doesn't translate to MPW yet, srcdir must be same as objdir. +# File in object dir can come from either the current dir or srcdir. +# +# . : . "{srcdir}" + +# Default rule that puts each file into separate segment. + +.c.o: .c + {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o + +CPPFLAGS = -I$(SRCTOP)/include -I$(BUILDTOP)/include -I$(SRCTOP)/include/krb5 -I$(BUILDTOP)/include/krb5 -i {CIncludes} +DEFS = $(CPPFLAGS) +CC = c +LD = link +# The funny quoting in the LDFLAGS is to avoid xxx.o being mangled by +# mac-mf.sed into xxx.c.o. +LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o +CCOPTS = +LIBS = +KRB5ROOT= @KRB5ROOT@ +KRB4=@KRB4@ +INSTALL=Duplicate -y +INSTALL_PROGRAM=Duplicate -y +INSTALL_DATA=Duplicate -y +INSTALL_SETUID=Duplicate -y + +KRB5MANROOT = $(KRB5ROOT)$(S)man +ADMIN_BINDIR = $(KRB5ROOT)$(S)admin +SERVER_BINDIR = $(KRB5ROOT)$(S)sbin +CLIENT_BINDIR = $(KRB5ROOT)$(S)bin +ADMIN_MANDIR = $(KRB5MANROOT)$(S)man8 +SERVER_MANDIR = $(KRB5MANROOT)$(S)man8 +CLIENT_MANDIR = $(KRB5MANROOT)$(S)man1 +FILE_MANDIR = $(KRB5MANROOT)$(S)man5 +KRB5_LIBDIR = $(KRB5ROOT)$(S)lib +KRB5_INCDIR = $(KRB5ROOT)$(S)include +KRB5_INCSUBDIRS = \ + $(KRB5_INCDIR)$(S)krb5 \ + $(KRB5_INCDIR)$(S)asn.1 \ + $(KRB5_INCDIR)$(S)kerberosIV + + +RM = Delete -y -i +CP = Duplicate -y +MV = mv -f +CHMOD=chmod +RANLIB = @RANLIB@ +ARCHIVE = @ARCHIVE@ +ARADD = @ARADD@ +LN = @LN_S@ +AWK = @AWK@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +YACC = @YACC@ + +# FIXME: This won't work for srcdir != objdir. But on the Mac, there +# is no easy way to build a relative or absolute path, because : means +# both the path separator, and the "go up a directory" indicator. +#SRCTOP = $(srcdir)$(S)$(BUILDTOP) +SRCTOP = $(BUILDTOP) +SUBDIRS = @subdirs@ + +TOPLIBD = $(BUILDTOP)$(S)lib + +OBJEXT = c.o +LIBEXT = a +EXEEXT = + +all:: +# Generated automatically from Makefile.in by configure. +CFLAGS = $(CCOPTS) $(DEFS) -I$(srcdir)/../des + +##DOSBUILDTOP = ..\..\.. +##DOSLIBNAME=..\crypto.lib +##DOS!include $(BUILDTOP)\config\windows.in + +OBJS= md5.$(OBJEXT) md5glue.$(OBJEXT) md5crypto.$(OBJEXT) + +SRCS= $(srcdir)/md5.c $(srcdir)/md5glue.c $(srcdir)/md5crypto.c + +all:: $(OBJS) + +t_mddriver: t_mddriver.o md5.o + $(CC) $(CFLAGS) $(LDFLAGS) -o t_mddriver t_mddriver.o md5.o + +t_mddriver.exe: + $(CC) $(CFLAGS2) -o t_mddriver.exe t_mddriver.c md5.c + +check:: t_mddriver$(EXEEXT) + $(C)t_mddriver$(EXEEXT) -x + +clean:: + $(RM) t_mddriver$(EXEEXT) t_mddriver.$(OBJEXT) +# config/post.in +# put all:: first just in case no other rules occur here +# +all:: + +check:: + +clean:: clean-$(WHAT) + $(RM) config.log pre.out post.out Makefile.out + +clean-unix:: + if test -n "$(OBJS)" ; then $(RM) $(OBJS); else :; fi + +clean-windows:: + $(RM) *.$(OBJEXT) + $(RM) msvc.pdb *.err diff --git a/testsuite/mac-mf.sed b/testsuite/mac-mf.sed new file mode 100644 index 0000000..9b08e60 --- /dev/null +++ b/testsuite/mac-mf.sed @@ -0,0 +1,154 @@ +# Rewrite default rules from .c.o: to .c.o: .c +/^\./s/^\(\.[a-z]*\)\(\.[a-z]*\)\( *: *\)$/\1\2\3 \1/ + +# Change dependency char. +/::/s/::/ \\Option-f\\Option-f /g +/:/s/:/ \\Option-f /g +/^[SU]=/s/ \\Option-f /:/g + +# Change syntax of Makefile vars. +/\$/s/\${\([a-zA-Z0-9_]*\)}/{\1}/g +/\$/s/\$(\([a-zA-Z0-9_]*\))/{\1}/g + +# Change $@ to {targ} +/\$@/s/\$@/{targ}/g + +# Change pathname syntax. +# +# If line ends with .. then assume it sets a variable that will +# be used to prefix something else -- eliminate one colon, assuming +# that a slash after the ${name} will turn into the missing colon. +# Mac pathname conventions are IRREGULAR and UGLY! +/\./s,\.\./\.\.$,::, +/\./s,\.\.$,:, +# Same if it ends with . (a single dot); turn it into nothing. +/\./s,\.$,,g +# Rules for .. and . elsewhere in the line +# Convert ../: to ::, recur to get whole paths. +/\./s,\.\./:,::,g +# Convert ../../ to ::: +/\./s,\.\./\.\./,:::,g +/\./s,\.\./,::,g +/\.\//s,\./,:,g +/\//s,/,:,g + +/=/s/ = \.$/ = :/ + +# Comment out any explicit srcdir setting. +# /srcdir/s/^srcdir/# srcdir/ + +/version/s/^version=/# version=/ + +/BASEDIR/s/^BASEDIR =.*$/BASEDIR = "{srcroot}"/ +/{BASEDIR}:/s/{BASEDIR}:/{BASEDIR}/g +# The original lines screw up -I$(srcdir)/../des by eliminating a colon. +# Proposed fix: Eliminate srcdir prefixes totally. +#/{srcdir}:/s/{srcdir}:/"{srcdir}"/g +/{srcdir}:/s/{srcdir}://g +#/"{srcdir}":/s/"{srcdir}":/"{srcdir}"/g + +# Comment out settings of anything set by mpw host config. +##/CC/s/^CC *=/#CC =/ +##/CFLAGS/s/^CFLAGS *=/#CFLAGS =/ +##/LDFLAGS/s/^LDFLAGS *=/#LDFLAGS =/ + +# Change -I usage. +/-I/s/-I\./-i :/g +/-I/s/-I::bfd/-i ::bfd:/g +/-I/s/-I::include/-i ::include:/g +/-I/s/-I/-i /g + +# Change -D usage. +/-D/s/\([ =]\)-D\([^ ]*\)/\1-d \2/g + +# Change continuation char. +/\\$/s/\\$/\\Option-d/ + +# Change wildcard char. +/^[^#]/s/\*/\\Option-x/g + +# Change path of various types of source files. +#/\.[chly]/s/\([ ><=]\)\([-a-zA-Z0-9_$:"]*\)\.\([chly]\)/\1"{s}"\2.\3/g +#/\.[chly]/s/^\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/"{s}"\1.\2/g +# Skip the {s} and {o} business for now... +# Fix some overenthusiasms. +#/{s}/s/"{s}""{srcdir}"/"{srcdir}"/g +#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)dir}/"{\1dir}"/g +#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)DIR}/"{\1DIR}"/g +#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)dir}"/"{\1dir}"/g +#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)DIR}"/"{\1DIR}"/g +#/{s}/s/"{s}":/:/g +#/{s}/s/^"{s}"//g +#/^\./s/"{s}"\././g + +# Change extension and path of objects, except in the OBJEXT line. +#/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1"{o}"\2.c.o/g +#/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/"{o}"\1.c.o/g +# Skip the {o} stuff for now... +/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1\2.c.o/g +/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/\1.c.o/g +# Clean up. +#/\.o/s/"{o}""{o}"/"{o}"/g +#/{o}/s/^"{o}"\([a-zA-Z0-9_]*\)=/\1=/g + +# Change extension of libs. +# /\.a/s/lib\([a-z]*\)\.a/lib\1.o/g + +# Remove non-echo option. +/^ -/s/^ -/ / + +# Change cp to duplicate. +# /cp/s/^\([ ]*\)cp /\1Duplicate -d -y / +# Change mv to rename. +# /mv/s/^\([ ]*\)mv /\1Rename -y / +# /Rename/s/^\([ ]*\)Rename -y -f/\1Rename -y/ +# Change rm to delete. +/^RM=/s/rm -f/Delete -i -y/ +# /rm/s/^\([ ]*\)rm /\1Delete -y / +# /Delete/s/^\([ ]*\)Delete -y -f/\1Delete -y/ +# Comment out symlinking. +# /ln/s/^\([ ]*\)ln /\1# ln / + +# Remove -c from explicit compiler calls. +# /-c/s/{CC}\(.*\) -c \(.*\)\([-a-z]*\)\.c/{CC}\1 \2\3.c -o "{o}"\3.c.o/g +# Don't ask... prev subst seems to omit the second filename. +# /-o/s/\([-a-z]*\)\.c -o "{o}".c.o/\1\.c -o "{o}"\1.c.o/ + +# Change linking cc to link. +/LDFLAGS/ s/{CC} \(.*\){CFLAGS}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/ +/CFLAGS_LINK/s/{CC} \(.*\){CFLAGS_LINK}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/ + +# Comment out .PHONY rules. +/\.PHONY/s/^\.PHONY/# \.PHONY/ +# Comment out .SUFFIXES rules. +/\.SUFFIXES/s/^\.SUFFIXES/# \.SUFFIXES/ +# Comment out .PRECIOUS rules. +/\.PRECIOUS/s/^\.PRECIOUS/# \.PRECIOUS/ +## Comment out default rules. +##/^\./s/^\(\.[a-z]*\.[a-z]* \)/# \1/ + +# +# End of original hack-mf.sed +# +# Begin original hack-mf2.sed +# +# Transform expressions. + +# Set the install program appropriate. +# /INSTALL/s/^INSTALL *= *`.*`:install.sh -c/INSTALL = Duplicate -y/ + +# Include from the extra-include dir. +# /^INCLUDES = /s/^INCLUDES = /INCLUDES = -i "{srcroot}"extra-include / + +# Yuck - remove unconverted autoconf things. +# /@/s/@[^ ]*@//g + +# Hackery, pure and simple +# To speed up compiles, remove duplicated -i options. +/-i/s/\(-i [^ ]*\) \1 /\1 /g + +# Note! There are 8-bit characters in the three lines below: +# 0xc4, 0xb6, 0xc5. +/Option/s/\\Option-f/Ä/g +/Option/s/\\Option-d/¶/g +/Option/s/\\Option-x/Å/g diff --git a/testsuite/madding.good b/testsuite/madding.good new file mode 100644 index 0000000..537ab50 --- /dev/null +++ b/testsuite/madding.good @@ -0,0 +1 @@ +The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude." diff --git a/testsuite/madding.inp b/testsuite/madding.inp new file mode 100644 index 0000000..2367bc8 --- /dev/null +++ b/testsuite/madding.inp @@ -0,0 +1 @@ +The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity." diff --git a/testsuite/madding.sed b/testsuite/madding.sed new file mode 100644 index 0000000..5494f2b --- /dev/null +++ b/testsuite/madding.sed @@ -0,0 +1,8 @@ +# this is from Thomas Hardy's _Far From the Madding Crowd_. +# +# cf ftp://ftp.cdrom.com/pub/gutenberg/etext94/crowd10a.txt +# +# the point of this test, in case it isn't obvious, is to overfill fixed +# buffers wherever they might be. +# +s/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity."/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude."/ diff --git a/testsuite/manis.good b/testsuite/manis.good new file mode 100644 index 0000000..f349b76 --- /dev/null +++ b/testsuite/manis.good @@ -0,0 +1,22 @@ +s%@CFLAGS@%%g +s%@CPPFLAGS@%-I/%g +s%@CXXFLAGS@%-x c++%g +s%@DEFS@%$DEFS%g +s%@LDFLAGS@%-L/usr/lib%g +s%@LIBS@%-lgnu -lbfd%g +s%@exec_prefix@%%g +s%@prefix@%$prefix%g +s%@RANLIB@%$RANLIB%g +s%@CC@%/usr/local/bin/gcc%g +s%@CPP@%$CPP%g +s%@XCFLAGS@%$XCFLAGS%g +s%@XINCLUDES@%$XINCLUDES%g +s%@XLIBS@%$XLIBS%g +s%@XPROGS@%$XPROGS%g +s%@TCLHDIR@%$TCLHDIR%g +s%@TCLLIB@%$TCLLIB%g +s%@TKHDIR@%$TKHDIR%g +s%@TKLIB@%$TKLIB%g +s%@PTY_TYPE@%$PTY_TYPE%g +s%@EVENT_TYPE@%$EVENT_TYPE%g +s%@SETUID@%$SETUID%g diff --git a/testsuite/manis.inp b/testsuite/manis.inp new file mode 100644 index 0000000..f349b76 --- /dev/null +++ b/testsuite/manis.inp @@ -0,0 +1,22 @@ +s%@CFLAGS@%%g +s%@CPPFLAGS@%-I/%g +s%@CXXFLAGS@%-x c++%g +s%@DEFS@%$DEFS%g +s%@LDFLAGS@%-L/usr/lib%g +s%@LIBS@%-lgnu -lbfd%g +s%@exec_prefix@%%g +s%@prefix@%$prefix%g +s%@RANLIB@%$RANLIB%g +s%@CC@%/usr/local/bin/gcc%g +s%@CPP@%$CPP%g +s%@XCFLAGS@%$XCFLAGS%g +s%@XINCLUDES@%$XINCLUDES%g +s%@XLIBS@%$XLIBS%g +s%@XPROGS@%$XPROGS%g +s%@TCLHDIR@%$TCLHDIR%g +s%@TCLLIB@%$TCLLIB%g +s%@TKHDIR@%$TKHDIR%g +s%@TKLIB@%$TKLIB%g +s%@PTY_TYPE@%$PTY_TYPE%g +s%@EVENT_TYPE@%$EVENT_TYPE%g +s%@SETUID@%$SETUID%g diff --git a/testsuite/manis.sed b/testsuite/manis.sed new file mode 100644 index 0000000..5017845 --- /dev/null +++ b/testsuite/manis.sed @@ -0,0 +1,6 @@ +# straight out of an autoconf-generated configure. +# The input should look just like the input after this is run. +# +# Protect against being on the right side of a sed subst in config.status. +s/%@/@@/; s/@%/@@/; s/%g$/@g/; /@g$/s/[\\\\&%]/\\\\&/g; + s/@@/%@/; s/@@/@%/; s/@g$/%g/ diff --git a/testsuite/middle.good b/testsuite/middle.good new file mode 100644 index 0000000..71f33c1 --- /dev/null +++ b/testsuite/middle.good @@ -0,0 +1,3 @@ + universe. You will never succeed. Everything will fail and come + to an end finally. But you have a chance to carve a little bit + of order and maybe even beauty out of the raw materials that diff --git a/testsuite/middle.inp b/testsuite/middle.inp new file mode 100644 index 0000000..5c4b4a4 --- /dev/null +++ b/testsuite/middle.inp @@ -0,0 +1,9 @@ + "...by imposing a tiny bit of order in a communication you are + translating, you are carving out a little bit of order in the + universe. You will never succeed. Everything will fail and come + to an end finally. But you have a chance to carve a little bit + of order and maybe even beauty out of the raw materials that + surround you everywhere, and I think there is no greater meaning + in life." + + Donald L. Philippi, Oct 1930 - Jan 1993 diff --git a/testsuite/middle.sed b/testsuite/middle.sed new file mode 100644 index 0000000..3471789 --- /dev/null +++ b/testsuite/middle.sed @@ -0,0 +1 @@ +3,5p diff --git a/testsuite/modulo.good b/testsuite/modulo.good new file mode 100644 index 0000000..b42f1ab --- /dev/null +++ b/testsuite/modulo.good @@ -0,0 +1,22 @@ +1 +s%@CFLAGS@%%g +3 +s%@CXXFLAGS@%-x c++%g +5 +s%@LDFLAGS@%-L/usr/lib%g +7 +s%@exec_prefix@%%g +9 +s%@RANLIB@%$RANLIB%g +11 +s%@CPP@%$CPP%g +13 +s%@XINCLUDES@%$XINCLUDES%g +15 +s%@XPROGS@%$XPROGS%g +17 +s%@TCLLIB@%$TCLLIB%g +19 +s%@TKLIB@%$TKLIB%g +21 +s%@EVENT_TYPE@%$EVENT_TYPE%g diff --git a/testsuite/modulo.inp b/testsuite/modulo.inp new file mode 100644 index 0000000..f349b76 --- /dev/null +++ b/testsuite/modulo.inp @@ -0,0 +1,22 @@ +s%@CFLAGS@%%g +s%@CPPFLAGS@%-I/%g +s%@CXXFLAGS@%-x c++%g +s%@DEFS@%$DEFS%g +s%@LDFLAGS@%-L/usr/lib%g +s%@LIBS@%-lgnu -lbfd%g +s%@exec_prefix@%%g +s%@prefix@%$prefix%g +s%@RANLIB@%$RANLIB%g +s%@CC@%/usr/local/bin/gcc%g +s%@CPP@%$CPP%g +s%@XCFLAGS@%$XCFLAGS%g +s%@XINCLUDES@%$XINCLUDES%g +s%@XLIBS@%$XLIBS%g +s%@XPROGS@%$XPROGS%g +s%@TCLHDIR@%$TCLHDIR%g +s%@TCLLIB@%$TCLLIB%g +s%@TKHDIR@%$TKHDIR%g +s%@TKLIB@%$TKLIB%g +s%@PTY_TYPE@%$PTY_TYPE%g +s%@EVENT_TYPE@%$EVENT_TYPE%g +s%@SETUID@%$SETUID%g diff --git a/testsuite/modulo.sed b/testsuite/modulo.sed new file mode 100644 index 0000000..68b4930 --- /dev/null +++ b/testsuite/modulo.sed @@ -0,0 +1 @@ +0~2d;= diff --git a/testsuite/newjis.good b/testsuite/newjis.good new file mode 100644 index 0000000..4de16b0 --- /dev/null +++ b/testsuite/newjis.good @@ -0,0 +1,4 @@ +$B$H$J$j$NM9JX6I$K(B +$B$?$F$+$1$?$N$O(B +$B$?$F$+$1$?$+$C$?$+$i(B +$B$?$F$+$1$?!#(B diff --git a/testsuite/newjis.inp b/testsuite/newjis.inp new file mode 100644 index 0000000..fc710f6 --- /dev/null +++ b/testsuite/newjis.inp @@ -0,0 +1,4 @@ +$B$H$J$j$N$?$1$,$-$K(B +$B$?$F$+$1$?$N$O(B +$B$?$F$+$1$?$+$C$?$+$i(B +$B$?$F$+$1$?!#(B diff --git a/testsuite/newjis.sed b/testsuite/newjis.sed new file mode 100644 index 0000000..1bc941d --- /dev/null +++ b/testsuite/newjis.sed @@ -0,0 +1 @@ +s/$?$1$,$-/M9JX6I/ diff --git a/testsuite/noeol.good b/testsuite/noeol.good new file mode 100644 index 0000000..fa5fc0e --- /dev/null +++ b/testsuite/noeol.good @@ -0,0 +1,3 @@ +This file is uniquewakuwaku +in that it doeswakuwaku +end in a newline.wakuwaku
\ No newline at end of file diff --git a/testsuite/noeol.inp b/testsuite/noeol.inp new file mode 100644 index 0000000..c4cf6a1 --- /dev/null +++ b/testsuite/noeol.inp @@ -0,0 +1,3 @@ +This file is unique +in that it does +end in a newline.
\ No newline at end of file diff --git a/testsuite/noeol.sed b/testsuite/noeol.sed new file mode 100644 index 0000000..bea7110 --- /dev/null +++ b/testsuite/noeol.sed @@ -0,0 +1 @@ +s/$/wakuwaku/g diff --git a/testsuite/noeolw.1good b/testsuite/noeolw.1good new file mode 100644 index 0000000..f0f44d9 --- /dev/null +++ b/testsuite/noeolw.1good @@ -0,0 +1,7 @@ +This file is unique +in that it does +end in a newline. +This file is unique +in that it does +end in a newline. +in that it does diff --git a/testsuite/noeolw.2good b/testsuite/noeolw.2good new file mode 100644 index 0000000..c4cf6a1 --- /dev/null +++ b/testsuite/noeolw.2good @@ -0,0 +1,3 @@ +This file is unique +in that it does +end in a newline.
\ No newline at end of file diff --git a/testsuite/noeolw.good b/testsuite/noeolw.good new file mode 100644 index 0000000..e76509a --- /dev/null +++ b/testsuite/noeolw.good @@ -0,0 +1,12 @@ +This file is unique +This file is unique +in that it does +in that it does +end in a newline. +end in a newline. +This file is unique +This file is unique +in that it does +in that it does +end in a newline. +end in a newline.
\ No newline at end of file diff --git a/testsuite/noeolw.sed b/testsuite/noeolw.sed new file mode 100644 index 0000000..0924619 --- /dev/null +++ b/testsuite/noeolw.sed @@ -0,0 +1,10 @@ +w noeolw.1out +$ { + x + w noeolw.1out + x +} +h +1,3w noeolw.2out +p +p diff --git a/testsuite/numsub.good b/testsuite/numsub.good new file mode 100644 index 0000000..9bdaaef --- /dev/null +++ b/testsuite/numsub.good @@ -0,0 +1 @@ +foo foo fo oo f oo foo foo foo foo foo foo foo bar foo foo foo foo foo diff --git a/testsuite/numsub.inp b/testsuite/numsub.inp new file mode 100644 index 0000000..6924c98 --- /dev/null +++ b/testsuite/numsub.inp @@ -0,0 +1,2 @@ +foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo +foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo diff --git a/testsuite/numsub.sed b/testsuite/numsub.sed new file mode 100644 index 0000000..4a96cad --- /dev/null +++ b/testsuite/numsub.sed @@ -0,0 +1,7 @@ +# the first one matches, the second doesn't +1s/foo/bar/10 +2s/foo/bar/20 + +# The second line should be deleted. ssed 3.55-3.58 do not. +t +d diff --git a/testsuite/numsub2.good b/testsuite/numsub2.good new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/testsuite/numsub2.good diff --git a/testsuite/numsub2.inp b/testsuite/numsub2.inp new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/testsuite/numsub2.inp @@ -0,0 +1 @@ + diff --git a/testsuite/numsub2.sed b/testsuite/numsub2.sed new file mode 100644 index 0000000..dddead9 --- /dev/null +++ b/testsuite/numsub2.sed @@ -0,0 +1 @@ +s/a*/b/2 diff --git a/testsuite/numsub3.good b/testsuite/numsub3.good new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/testsuite/numsub3.good diff --git a/testsuite/numsub3.inp b/testsuite/numsub3.inp new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/testsuite/numsub3.inp @@ -0,0 +1 @@ + diff --git a/testsuite/numsub3.sed b/testsuite/numsub3.sed new file mode 100644 index 0000000..0ea96a4 --- /dev/null +++ b/testsuite/numsub3.sed @@ -0,0 +1 @@ +s/^a*/b/2 diff --git a/testsuite/numsub4.good b/testsuite/numsub4.good new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/testsuite/numsub4.good diff --git a/testsuite/numsub4.inp b/testsuite/numsub4.inp new file mode 100644 index 0000000..b680253 --- /dev/null +++ b/testsuite/numsub4.inp @@ -0,0 +1 @@ +z diff --git a/testsuite/numsub4.sed b/testsuite/numsub4.sed new file mode 100644 index 0000000..e76c5bf --- /dev/null +++ b/testsuite/numsub4.sed @@ -0,0 +1 @@ +s/^a*/b/2p diff --git a/testsuite/numsub5.good b/testsuite/numsub5.good new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/testsuite/numsub5.good diff --git a/testsuite/numsub5.inp b/testsuite/numsub5.inp new file mode 100644 index 0000000..b680253 --- /dev/null +++ b/testsuite/numsub5.inp @@ -0,0 +1 @@ +z diff --git a/testsuite/numsub5.sed b/testsuite/numsub5.sed new file mode 100644 index 0000000..d8ecda2 --- /dev/null +++ b/testsuite/numsub5.sed @@ -0,0 +1 @@ +s/a*/b/3p diff --git a/testsuite/ptestcases.h b/testsuite/ptestcases.h new file mode 100644 index 0000000..506b1cc --- /dev/null +++ b/testsuite/ptestcases.h @@ -0,0 +1,326 @@ + { 0, 0, "2.8.2 Regular Expression General Requirement", NULL, }, + { 2, 4, "bb*", "abbbc", }, + { 2, 2, "bb*", "ababbbc", }, + { 7, 9, "A#*::", "A:A#:qA::qA#::qA##::q", }, + { 1, 5, "A#*::", "A##::A#::qA::qA#:q", }, + { 0, 0, "2.8.3.1.2 BRE Special Characters", NULL, }, + { 0, 0, "GA108", NULL, }, + { 2, 2, "\\.", "a.c", }, + { 2, 2, "\\[", "a[c", }, + { 2, 2, "\\\\", "a\\c", }, + { 2, 2, "\\*", "a*c", }, + { 2, 2, "\\^", "a^c", }, + { 2, 2, "\\$", "a$c", }, + { 7, 11, "X\\*Y\\*8", "Y*8X*8X*Y*8", }, + { 0, 0, "GA109", NULL, }, + { 2, 2, "[.]", "a.c", }, + { 2, 2, "[[]", "a[c", }, + { -1, -1, "[[]", "ac", }, + { 2, 2, "[\\]", "a\\c", }, + { 1, 1, "[\\a]", "abc", }, + { 2, 2, "[\\.]", "a\\.c", }, + { 2, 2, "[\\.]", "a.\\c", }, + { 2, 2, "[*]", "a*c", }, + { 2, 2, "[$]", "a$c", }, + { 2, 2, "[X*Y8]", "7*8YX", }, + { 0, 0, "GA110", NULL, }, + { 2, 2, "*", "a*c", }, + { 3, 4, "*a", "*b*a*c", }, + { 1, 5, "**9=", "***9=9", }, + { 0, 0, "GA111", NULL, }, + { 1, 1, "^*", "*bc", }, + { -1, -1, "^*", "a*c", }, + { -1, -1, "^*", "^*ab", }, + { 1, 5, "^**9=", "***9=", }, + { -1, -1, "^*5<*9", "5<9*5<*9", }, + { 0, 0, "GA112", NULL, }, + { 2, 3, "\\(*b\\)", "a*b", }, + { -1, -1, "\\(*b\\)", "ac", }, + { 1, 6, "A\\(**9\\)=", "A***9=79", }, + { 0, 0, "GA113(1)", NULL, }, + { 1, 3, "\\(^*ab\\)", "*ab", }, + { -1, -1, "\\(^*ab\\)", "^*ab", }, + { -1, -1, "\\(^*b\\)", "a*b", }, + { -1, -1, "\\(^*b\\)", "^*b", }, + { 0, 0, "GA114", NULL, }, + { 1, 3, "a^b", "a^b", }, + { 1, 3, "a\\^b", "a^b", }, + { 1, 1, "^^", "^bc", }, + { 2, 2, "\\^", "a^c", }, + { 1, 1, "[c^b]", "^abc", }, + { 1, 1, "[\\^ab]", "^ab", }, + { 2, 2, "[\\^ab]", "c\\d", }, + { -1, -1, "[^^]", "^", }, + { 1, 3, "\\(a^b\\)", "a^b", }, + { 1, 3, "\\(a\\^b\\)", "a^b", }, + { 2, 2, "\\(\\^\\)", "a^b", }, + { 0, 0, "GA115", NULL, }, + { 3, 3, "$$", "ab$", }, + { -1, -1, "$$", "$ab", }, + { 2, 3, "$c", "a$c", }, + { 2, 2, "[$]", "a$c", }, + { 1, 2, "\\$a", "$a", }, + { 3, 3, "\\$$", "ab$", }, + { 2, 6, "A\\([34]$[34]\\)B", "XA4$3BY", }, + { 0, 0, "2.8.3.1.3 Periods in BREs", NULL, }, + { 0, 0, "GA116", NULL, }, + { 1, 1, ".", "abc", }, + { -1, -1, ".ab", "abc", }, + { 1, 3, "ab.", "abc", }, + { 1, 3, "a.b", "a,b", }, + { -1, -1, ".......", "PqRs6", }, + { 1, 7, ".......", "PqRs6T8", }, + { 0, 0, "2.8.3.2 RE Bracket Expression", NULL, }, + { 0, 0, "GA118", NULL, }, + { 2, 2, "[abc]", "xbyz", }, + { -1, -1, "[abc]", "xyz", }, + { 2, 2, "[abc]", "xbay", }, + { 0, 0, "GA119", NULL, }, + { 2, 2, "[^a]", "abc", }, + { 4, 4, "[^]cd]", "cd]ef", }, + { 2, 2, "[^abc]", "axyz", }, + { -1, -1, "[^abc]", "abc", }, + { 3, 3, "[^[.a.]b]", "abc", }, + { 3, 3, "[^[=a=]b]", "abc", }, + { 2, 2, "[^-ac]", "abcde-", }, + { 2, 2, "[^ac-]", "abcde-", }, + { 3, 3, "[^a-b]", "abcde", }, + { 3, 3, "[^a-bd-e]", "dec", }, + { 2, 2, "[^---]", "-ab", }, + { 16, 16, "[^a-zA-Z0-9]", "pqrstVWXYZ23579#", }, + { 0, 0, "GA120(1)", NULL, }, + { 3, 3, "[]a]", "cd]ef", }, + { 1, 1, "[]-a]", "a_b", }, + { 3, 3, "[][.-.]-0]", "ab0-]", }, + { 1, 1, "[]^a-z]", "string", }, + { 0, 0, "GA120(2)", NULL, }, + { 4, 4, "[^]cd]", "cd]ef", }, + { 0, 0, "[^]]*", "]]]]]]]]X", }, + { 0, 0, "[^]]*", "]]]]]]]]", }, + { 9, 9, "[^]]\\{1,\\}", "]]]]]]]]X", }, + { -1, -1, "[^]]\\{1,\\}", "]]]]]]]]", }, + { 0, 0, "GA120(3)", NULL, }, + { 3, 3, "[c[.].]d]", "ab]cd", }, + { 2, 8, "[a-z]*[[.].]][A-Z]*", "Abcd]DEFg", }, + { 0, 0, "GA121", NULL, }, + { 2, 2, "[[.a.]b]", "Abc", }, + { 1, 1, "[[.a.]b]", "aBc", }, + { -1, -1, "[[.a.]b]", "ABc", }, + { 3, 3, "[^[.a.]b]", "abc", }, + { 3, 3, "[][.-.]-0]", "ab0-]", }, + { 3, 3, "[A-[.].]c]", "ab]!", }, + { 0, 0, "GA122", NULL, }, + { -2, -2, "[[.ch.]]", "abc", }, + { -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", }, + { 0, 0, "GA125", NULL, }, + { 2, 2, "[[=a=]b]", "Abc", }, + { 1, 1, "[[=a=]b]", "aBc", }, + { -1, -1, "[[=a=]b]", "ABc", }, + { 3, 3, "[^[=a=]b]", "abc", }, + { 0, 0, "GA126", NULL, }, + { 0, 0, NULL, "the expected result for [[:alnum:]]* is 2-7 which is wrong" }, + { 0, 0, "[[:alnum:]]*", " aB28gH", }, + { 2, 7, "[[:alnum:]][[:alnum:]]*", " aB28gH", }, + { 0, 0, NULL, "the expected result for [^[:alnum:]]* is 2-5 which is wrong" }, + { 0, 0, "[^[:alnum:]]*", "2 ,a", }, + { 2, 5, "[^[:alnum:]][^[:alnum:]]*", "2 ,a", }, + { 0, 0, NULL, "the expected result for [[:alpha:]]* is 2-5 which is wrong" }, + { 0, 0, "[[:alpha:]]*", " aBgH2", }, + { 2, 5, "[[:alpha:]][[:alpha:]]*", " aBgH2", }, + { 1, 6, "[^[:alpha:]]*", "2 8,a", }, + { 1, 2, "[[:blank:]]*", " \r", }, + { 1, 8, "[^[:blank:]]*", "aB28gH, ", }, + { 1, 2, "[[:cntrl:]]*", " ", }, + { 1, 8, "[^[:cntrl:]]*", "aB2 8gh,", }, + { 0, 0, NULL, "the expected result for [[:digit:]]* is 2-3 which is wrong" }, + { 0, 0, "[[:digit:]]*", "a28", }, + { 2, 3, "[[:digit:]][[:digit:]]*", "a28", }, + { 1, 8, "[^[:digit:]]*", "aB gH,", }, + { 1, 7, "[[:graph:]]*", "aB28gH, ", }, + { 1, 3, "[^[:graph:]]*", " ,", }, + { 1, 2, "[[:lower:]]*", "agB", }, + { 1, 8, "[^[:lower:]]*", "B2 8H,a", }, + { 1, 8, "[[:print:]]*", "aB2 8gH, ", }, + { 1, 2, "[^[:print:]]*", " ", }, + { 0, 0, NULL, "the expected result for [[:punct:]]* is 2-2 which is wrong" }, + { 0, 0, "[[:punct:]]*", "a,2", }, + { 2, 3, "[[:punct:]][[:punct:]]*", "a,,2", }, + { 1, 9, "[^[:punct:]]*", "aB2 8gH", }, + { 1, 3, "[[:space:]]*", " \r", }, + { 0, 0, NULL, "the expected result for [^[:space:]]* is 2-9 which is wrong" }, + { 0, 0, "[^[:space:]]*", " aB28gH, ", }, + { 2, 9, "[^[:space:]][^[:space:]]*", " aB28gH, ", }, + { 0, 0, NULL, "the expected result for [[:upper:]]* is 2-3 which is wrong" }, + { 0, 0, "[[:upper:]]*", "aBH2", }, + { 2, 3, "[[:upper:]][[:upper:]]*", "aBH2", }, + { 1, 8, "[^[:upper:]]*", "a2 8g,B", }, + { 0, 0, NULL, "the expected result for [[:xdigit:]]* is 2-5 which is wrong" }, + { 0, 0, "[[:xdigit:]]*", "gaB28h", }, + { 2, 5, "[[:xdigit:]][[:xdigit:]]*", "gaB28h", }, + { 0, 0, NULL, "the expected result for [^[:xdigit:]]* is 2-7 which is wrong" }, + { 2, 7, "[^[:xdigit:]][^[:xdigit:]]*", "a gH,2", }, + { 0, 0, "GA127", NULL, }, + { -2, -2, "[b-a]", "abc", }, + { 1, 1, "[a-c]", "bbccde", }, + { 2, 2, "[a-b]", "-bc", }, + { 3, 3, "[a-z0-9]", "AB0", }, + { 3, 3, "[^a-b]", "abcde", }, + { 3, 3, "[^a-bd-e]", "dec", }, + { 1, 1, "[]-a]", "a_b", }, + { 2, 2, "[+--]", "a,b", }, + { 2, 2, "[--/]", "a.b", }, + { 2, 2, "[^---]", "-ab", }, + { 3, 3, "[][.-.]-0]", "ab0-]", }, + { 3, 3, "[A-[.].]c]", "ab]!", }, + { 2, 6, "bc[d-w]xy", "abchxyz", }, + { 0, 0, "GA129", NULL, }, + { 1, 1, "[a-cd-f]", "dbccde", }, + { -1, -1, "[a-ce-f]", "dBCCdE", }, + { 2, 4, "b[n-zA-M]Y", "absY9Z", }, + { 2, 4, "b[n-zA-M]Y", "abGY9Z", }, + { 0, 0, "GA130", NULL, }, + { 3, 3, "[-xy]", "ac-", }, + { 2, 4, "c[-xy]D", "ac-D+", }, + { 2, 2, "[--/]", "a.b", }, + { 2, 4, "c[--/]D", "ac.D+b", }, + { 2, 2, "[^-ac]", "abcde-", }, + { 1, 3, "a[^-ac]c", "abcde-", }, + { 3, 3, "[xy-]", "zc-", }, + { 2, 4, "c[xy-]7", "zc-786", }, + { 2, 2, "[^ac-]", "abcde-", }, + { 2, 4, "a[^ac-]c", "5abcde-", }, + { 2, 2, "[+--]", "a,b", }, + { 2, 4, "a[+--]B", "Xa,By", }, + { 2, 2, "[^---]", "-ab", }, + { 4, 6, "X[^---]Y", "X-YXaYXbY", }, + { 0, 0, "2.8.3.3 BREs Matching Multiple Characters", NULL, }, + { 0, 0, "GA131", NULL, }, + { 3, 4, "cd", "abcdeabcde", }, + { 1, 2, "ag*b", "abcde", }, + { -1, -1, "[a-c][e-f]", "abcdef", }, + { 3, 4, "[a-c][e-f]", "acbedf", }, + { 4, 8, "abc*XYZ", "890abXYZ#*", }, + { 4, 9, "abc*XYZ", "890abcXYZ#*", }, + { 4, 15, "abc*XYZ", "890abcccccccXYZ#*", }, + { -1, -1, "abc*XYZ", "890abc*XYZ#*", }, + { 0, 0, "GA132", NULL, }, + { 2, 4, "\\(*bc\\)", "a*bc", }, + { 1, 2, "\\(ab\\)", "abcde", }, + { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", }, + { 3, 8, "43\\(2\\(6\\)*0\\)AB", "654320ABCD", }, + { 3, 9, "43\\(2\\(7\\)*0\\)AB", "6543270ABCD", }, + { 3, 12, "43\\(2\\(7\\)*0\\)AB", "6543277770ABCD", }, + { 0, 0, "GA133", NULL, }, + { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", }, + { -1, -1, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(k\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", }, + { 0, 0, "GA134", NULL, }, + { 2, 4, "\\(bb*\\)", "abbbc", }, + { 2, 2, "\\(bb*\\)", "ababbbc", }, + { 1, 6, "a\\(.*b\\)", "ababbbc", }, + { 1, 2, "a\\(b*\\)", "ababbbc", }, + { 1, 20, "a\\(.*b\\)c", "axcaxbbbcsxbbbbbbbbc", }, + { 0, 0, "GA135", NULL, }, + { 1, 7, "\\(a\\(b\\(c\\(d\\(e\\)\\)\\)\\)\\)\\4", "abcdededede", }, + { 0, 0, NULL, "POSIX does not really specify whether a\\(b\\)*c\\1 matches acb." }, + { 0, 0, NULL, "back references are supposed to expand to the last match, but what" }, + { 0, 0, NULL, "if there never was a match as in this case?" }, + { -1, -1, "a\\(b\\)*c\\1", "acb", }, + { 1, 11, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)\\9", "abcdefghijjk", }, + { 0, 0, "GA136", NULL, }, + { 0, 0, NULL, "These two tests have the same problem as the test in GA135. No match" }, + { 0, 0, NULL, "of a subexpression, why should the back reference be usable?" }, + { 0, 0, NULL, "1 2 a\\(b\\)*c\\1 acb" }, + { 0, 0, NULL, "4 7 a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4¦xYzabcdePQRST" }, + { -1, -1, "a\\(b\\)*c\\1", "acb", }, + { -1, -1, "a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4", "xYzabcdePQRST", }, + { 0, 0, "GA137", NULL, }, + { -2, -2, "\\(a\\(b\\)\\)\\3", "foo", }, + { -2, -2, "\\(a\\(b\\)\\)\\(a\\(b\\)\\)\\5", "foo", }, + { 0, 0, "GA138", NULL, }, + { 1, 2, "ag*b", "abcde", }, + { 1, 10, "a.*b", "abababvbabc", }, + { 2, 5, "b*c", "abbbcdeabbbbbbcde", }, + { 2, 5, "bbb*c", "abbbcdeabbbbbbcde", }, + { 1, 5, "a\\(b\\)*c\\1", "abbcbbb", }, + { -1, -1, "a\\(b\\)*c\\1", "abbdbd", }, + { 0, 0, "\\([a-c]*\\)\\1", "abcacdef", }, + { 1, 6, "\\([a-c]*\\)\\1", "abcabcabcd", }, + { 1, 2, "a^*b", "ab", }, + { 1, 5, "a^*b", "a^^^b", }, + { 0, 0, "GA139", NULL, }, + { 1, 2, "a\\{2\\}", "aaaa", }, + { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", }, + { 1, 2, "\\(a\\)\\1\\{1,2\\}", "aabc", }, + { 1, 3, "\\(a\\)\\1\\{1,2\\}", "aaaabc", }, + { 0, 0, NULL, "the expression \\(\\(a\\)\\1\\)\\{1,2\\} is ill-formed, using \\2" }, + { 1, 4, "\\(\\(a\\)\\2\\)\\{1,2\\}", "aaaabc", }, + { 0, 0, "GA140", NULL, }, + { 1, 2, "a\\{2\\}", "aaaa", }, + { -1, -1, "a\\{2\\}", "abcd", }, + { 0, 0, "a\\{0\\}", "aaaa", }, + { 1, 64, "a\\{64\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", }, + { 0, 0, "GA141", NULL, }, + { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", }, + { 0, 0, NULL, "the expected result for \\([a-c]*\\)\\{2,\\} is failure which isn't correct" }, + { 1, 3, "\\([a-c]*\\)\\{2,\\}", "abcdefg", }, + { 1, 3, "\\([a-c]*\\)\\{1,\\}", "abcdefg", }, + { -1, -1, "a\\{64,\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", }, + { 0, 0, "GA142", NULL, }, + { 1, 3, "a\\{2,3\\}", "aaaa", }, + { -1, -1, "a\\{2,3\\}", "abcd", }, + { 0, 0, "\\([a-c]*\\)\\{0,0\\}", "foo", }, + { 1, 63, "a\\{1,63\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", }, + { 0, 0, "2.8.3.4 BRE Precedence", NULL, }, + { 0, 0, "GA143", NULL, }, + { 0, 0, NULL, "There are numerous bugs in the original version." }, + { 2, 19, "\\^\\[[[.].]]\\\\(\\\\1\\\\)\\*\\\\{1,2\\\\}\\$", "a^[]\\(\\1\\)*\\{1,2\\}$b", }, + { 1, 6, "[[=*=]][[=\\=]][[=]=]][[===]][[...]][[:punct:]]", "*\\]=.;", }, + { 1, 6, "[$\\(*\\)^]*", "$\\()*^", }, + { 1, 1, "[\\1]", "1", }, + { 1, 1, "[\\{1,2\\}]", "{", }, + { 0, 0, NULL, "the expected result for \\(*\\)*\\1* is 2-2 which isn't correct" }, + { 0, 0, "\\(*\\)*\\1*", "a*b*11", }, + { 2, 3, "\\(*\\)*\\1*b", "a*b*11", }, + { 0, 0, NULL, "the expected result for \\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\) is 1-5 which isn't correct" }, + { 1, 3, "\\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\)", "abbab", }, + { 1, 5, "\\(a\\(b\\{1,2\\}\\)\\)\\{1,2\\}", "abbab", }, + { 1, 1, "^\\(^\\(^a$\\)$\\)$", "a", }, + { 1, 2, "\\(a\\)\\1$", "aa", }, + { 1, 3, "ab*", "abb", }, + { 1, 4, "ab\\{2,4\\}", "abbbc", }, + { 0, 0, "2.8.3.5 BRE Expression Anchoring", NULL, }, + { 0, 0, "GA144", NULL, }, + { 1, 1, "^a", "abc", }, + { -1, -1, "^b", "abc", }, + { -1, -1, "^[a-zA-Z]", "99Nine", }, + { 1, 4, "^[a-zA-Z]*", "Nine99", }, + { 0, 0, "GA145(1)", NULL, }, + { 1, 2, "\\(^a\\)\\1", "aabc", }, + { -1, -1, "\\(^a\\)\\1", "^a^abc", }, + { 1, 2, "\\(^^a\\)", "^a", }, + { 1, 1, "\\(^^\\)", "^^", }, + { 1, 3, "\\(^abc\\)", "abcdef", }, + { -1, -1, "\\(^def\\)", "abcdef", }, + { 0, 0, "GA146", NULL, }, + { 3, 3, "a$", "cba", }, + { -1, -1, "a$", "abc", }, + { 5, 7, "[a-z]*$", "99ZZxyz", }, + { 0, 0, NULL, "the expected result for [a-z]*$ is failure which isn't correct" }, + { 10, 9, "[a-z]*$", "99ZZxyz99", }, + { 3, 3, "$$", "ab$", }, + { -1, -1, "$$", "$ab", }, + { 3, 3, "\\$$", "ab$", }, + { 0, 0, "GA147(1)", NULL, }, + { -1, -1, "\\(a$\\)\\1", "bcaa", }, + { -1, -1, "\\(a$\\)\\1", "ba$", }, + { -1, -1, "\\(ab$\\)", "ab$", }, + { 1, 2, "\\(ab$\\)", "ab", }, + { 4, 6, "\\(def$\\)", "abcdef", }, + { -1, -1, "\\(abc$\\)", "abcdef", }, + { 0, 0, "GA148", NULL, }, + { 0, 0, "^$", "", }, + { 1, 3, "^abc$", "abc", }, + { -1, -1, "^xyz$", "^xyz^", }, + { -1, -1, "^234$", "^234$", }, + { 1, 9, "^[a-zA-Z0-9]*$", "2aA3bB9zZ", }, + { -1, -1, "^[a-z0-9]*$", "2aA3b#B9zZ", }, diff --git a/testsuite/readin.good b/testsuite/readin.good new file mode 100644 index 0000000..479a444 --- /dev/null +++ b/testsuite/readin.good @@ -0,0 +1,19 @@ +``Democracy will not come today, this year, + nor ever through compromise and fear. +MOO + I have as much right as the other fellow has + to stand on my two feet and own the land. +MOO + I tire so of hearing people say + let things take their course, + tomorrow is another day. +MOO + I do not need my freedom when I'm dead. +MOO + I cannot live on tomorrow's bread. +MOO + Freedom is a strong seed + planted in a great need. +MOO + I live here, too. +MOO diff --git a/testsuite/readin.in2 b/testsuite/readin.in2 new file mode 100644 index 0000000..fa93196 --- /dev/null +++ b/testsuite/readin.in2 @@ -0,0 +1 @@ +MOO diff --git a/testsuite/readin.inp b/testsuite/readin.inp new file mode 100644 index 0000000..95fb969 --- /dev/null +++ b/testsuite/readin.inp @@ -0,0 +1,14 @@ +``Democracy will not come today, this year, + nor ever through compromise and fear. + I have as much right as the other fellow has + to stand on my two feet and own the land. + I tire so of hearing people say + let things take their course, + tomorrow is another day. + I do not need my freedom when I'm dead. + I cannot live on tomorrow's bread. + Freedom is a strong seed + planted in a great need. + I live here, too. + I want freedom just as you.'' + ``The Weary Blues'', Langston Hughes diff --git a/testsuite/readin.sed b/testsuite/readin.sed new file mode 100644 index 0000000..fac07a4 --- /dev/null +++ b/testsuite/readin.sed @@ -0,0 +1,2 @@ +/\.$/r readin.in2 +/too\.$/q diff --git a/testsuite/recall.good b/testsuite/recall.good new file mode 100644 index 0000000..230cc08 --- /dev/null +++ b/testsuite/recall.good @@ -0,0 +1,7 @@ +eeefff +Xeefff +XYefff +XYeYff +XYeYYf +XYeYYY +XYeYYY diff --git a/testsuite/recall.inp b/testsuite/recall.inp new file mode 100644 index 0000000..ef34b7e --- /dev/null +++ b/testsuite/recall.inp @@ -0,0 +1 @@ +eeefff diff --git a/testsuite/recall.sed b/testsuite/recall.sed new file mode 100644 index 0000000..c1d7f9c --- /dev/null +++ b/testsuite/recall.sed @@ -0,0 +1,7 @@ +# Check that the empty regex recalls the last *executed* regex, +# not the last *compiled* regex +p +s/e/X/p +:x +s//Y/p +/f/bx diff --git a/testsuite/recall2.good b/testsuite/recall2.good new file mode 100644 index 0000000..74c01ea --- /dev/null +++ b/testsuite/recall2.good @@ -0,0 +1 @@ +>abb<||>abbbb< diff --git a/testsuite/recall2.inp b/testsuite/recall2.inp new file mode 100644 index 0000000..9046d59 --- /dev/null +++ b/testsuite/recall2.inp @@ -0,0 +1 @@ +ababb||abbbabbbb diff --git a/testsuite/recall2.sed b/testsuite/recall2.sed new file mode 100644 index 0000000..f668773 --- /dev/null +++ b/testsuite/recall2.sed @@ -0,0 +1,5 @@ +# Starting from sed 4.1.3, regexes are compiled with REG_NOSUB +# if they are used in an address, so that the matcher does not +# have to obey leftmost-longest. The tricky part is to recompile +# them if they are then used in a substitution. +/\(ab*\)\+/ s//>\1</g diff --git a/testsuite/runptests.c b/testsuite/runptests.c new file mode 100644 index 0000000..cbe61cc --- /dev/null +++ b/testsuite/runptests.c @@ -0,0 +1,127 @@ +/* POSIX regex testsuite from IEEE 2003.2. + Copyright (C) 1998, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <regex.h> +#include <stdio.h> +#include <string.h> + +/* Data structure to describe the tests. */ +struct test +{ + int start; + int end; + const char *reg; + const char *str; + int options; +} tests[] = +{ +#include "ptestcases.h" +}; + + +int +main (int argc, char *argv[]) +{ + size_t cnt; + int errors = 0; + + for (cnt = 0; cnt < sizeof (tests) / sizeof (tests[0]); ++cnt) + if (tests[cnt].str == NULL) + { + printf ("\n%s\n%.*s\n", tests[cnt].reg, + (int) strlen (tests[cnt].reg), + "-----------------------------------------------------"); + } + else if (tests[cnt].reg == NULL) + printf ("!!! %s\n", tests[cnt].str); + else + { + regex_t re; + regmatch_t match[20]; + int err; + + printf ("regexp: \"%s\", string: \"%s\" -> ", tests[cnt].reg, + tests[cnt].str); + + /* Compile the expression. */ + err = regcomp (&re, tests[cnt].reg, tests[cnt].options); + if (err != 0) + { + if (tests[cnt].start == -2) + puts ("compiling failed, OK"); + else + { + char buf[100]; + regerror (err, &re, buf, sizeof (buf)); + printf ("FAIL: %s\n", buf); + ++errors; + } + + continue; + } + else if (tests[cnt].start == -2) + { + puts ("compiling suceeds, FAIL"); + errors++; + continue; + } + + /* Run the actual test. */ + err = regexec (&re, tests[cnt].str, 20, match, 0); + + if (err != 0) + { + if (tests[cnt].start == -1) + puts ("no match, OK"); + else + { + puts ("no match, FAIL"); + ++errors; + } + } + else + { + if (match[0].rm_so == 0 && tests[cnt].start == 0 + && match[0].rm_eo == 0 && tests[cnt].end == 0) + puts ("match, OK"); + else if (match[0].rm_so + 1 == tests[cnt].start + && match[0].rm_eo == tests[cnt].end) + puts ("match, OK"); + else + { + printf ("wrong match (%d to %d): FAIL\n", + match[0].rm_so, match[0].rm_eo); + ++errors; + } + } + + /* Free all resources. */ + regfree (&re); + } + + printf ("\n%u tests, %d errors\n", (int) cnt, errors); + + return errors != 0; +} diff --git a/testsuite/runtest b/testsuite/runtest new file mode 100755 index 0000000..0134a5d --- /dev/null +++ b/testsuite/runtest @@ -0,0 +1,18 @@ +#! /bin/sh + +: ${MAKE=make} +: ${srcdir=.} +: ${SED="../sed/sed"} + +makefile="$srcdir/Makefile.tests" +test=`echo "$@"| sed 's,.*/,,'` + +# As a convenience, suppress the output of make if the test passes +if $MAKE SED="$SED" srcdir="$srcdir" -f "$makefile" $test > tmp.test 2>&1; then + rm -f tmp.test +else + exitcode=$? + cat tmp.test + rm -f tmp.test + exit $exitcode +fi diff --git a/testsuite/runtests.c b/testsuite/runtests.c new file mode 100644 index 0000000..2f62315 --- /dev/null +++ b/testsuite/runtests.c @@ -0,0 +1,146 @@ +/*********************************************************** + +Copyright 1995 by Tom Lord + + All Rights Reserved + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of the copyright holder not be +used in advertising or publicity pertaining to distribution of the +software without specific, written prior permission. + +Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. + +******************************************************************/ + + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + + +struct a_test +{ + int expected; + const char * pattern; + const unsigned char * data; +}; + +static const struct a_test the_tests[] = +{ +#include "testcases.h" + {-1, 0, 0} +}; + + + + +static int +run_a_test (int id, const struct a_test * t) +{ + static const char * last_pattern = 0; + static regex_t r; + int err; + char errmsg[100]; + int x; + regmatch_t regs[10]; + + if (!last_pattern || strcmp (last_pattern, t->pattern)) + { + if (last_pattern) + regfree (&r); + last_pattern = t->pattern; + err = regcomp (&r, t->pattern, REG_EXTENDED); + if (err) + { + if (t->expected == 2) + { + puts (" OK."); + return 0; + } + if (last_pattern) + regfree (&r); + last_pattern = NULL; + regerror (err, &r, errmsg, 100); + printf (" FAIL: %s.\n", errmsg); + return 1; + } + else if (t->expected == 2) + { + printf ("test %d\n", id); + printf ("pattern \"%s\" successfull compilation not expected\n", + t->pattern); + return 1; + } + } + + for (x = 0; x < 10; ++x) + regs[x].rm_so = regs[x].rm_eo = -1; + + err = regexec (&r, t->data, 10, regs, 0); + + if (err != t->expected) + { + printf ("test %d\n", id); + printf ("pattern \"%s\" data \"%s\" wanted %d got %d\n", + t->pattern, t->data, t->expected, err); + for (x = 0; x < 10; ++x) + if (regs[x].rm_so != -1) + printf ("reg %d == (%d, %d) %.*s\n", + x, + regs[x].rm_so, + regs[x].rm_eo, + regs[x].rm_eo - regs[x].rm_so, + t->data + regs[x].rm_so); + return 1; + } + puts (" OK."); + return 0; +} + + + +int +main (int argc, char * argv[]) +{ + int x; + int lo; + int hi; + int res = 0; + + lo = 0; + hi = (sizeof (the_tests) / sizeof (the_tests[0])) - 1; + + if (argc > 1) + { + lo = atoi (argv[1]); + hi = lo + 1; + + if (argc > 2) + hi = atoi (argv[2]); + } + + for (x = lo; x < hi; ++x) + { + printf ("#%d:", x); + res |= run_a_test (x, &the_tests[x]); + } + return res != 0; +} diff --git a/testsuite/sep.good b/testsuite/sep.good new file mode 100644 index 0000000..7db0e1e --- /dev/null +++ b/testsuite/sep.good @@ -0,0 +1,3 @@ + +/// +// diff --git a/testsuite/sep.inp b/testsuite/sep.inp new file mode 100644 index 0000000..5795f4b --- /dev/null +++ b/testsuite/sep.inp @@ -0,0 +1,3 @@ +miss mary mack mack//mack/ran down/the track track track +slashes\aren't%used enough/in/casual-conversation/// +possibly sentences would be more attractive if they ended in two slashes// diff --git a/testsuite/sep.sed b/testsuite/sep.sed new file mode 100644 index 0000000..4864b81 --- /dev/null +++ b/testsuite/sep.sed @@ -0,0 +1,4 @@ +# inspired by an autoconf generated configure script. +s%/[^/][^/]*$%% +s%[\/][^\/][^\/]*$%% +s,.*[^\/],, diff --git a/testsuite/space.good b/testsuite/space.good new file mode 100644 index 0000000..9b267aa --- /dev/null +++ b/testsuite/space.good @@ -0,0 +1,2 @@ +Hello_World_! +SecondXXine__of_tests diff --git a/testsuite/space.inp b/testsuite/space.inp new file mode 100644 index 0000000..83b0adb --- /dev/null +++ b/testsuite/space.inp @@ -0,0 +1,2 @@ +Hello World ! +Second_line_ of tests diff --git a/testsuite/space.sed b/testsuite/space.sed new file mode 100644 index 0000000..0bfa522 --- /dev/null +++ b/testsuite/space.sed @@ -0,0 +1 @@ +s/_\S/XX/g;s/\s/_/g diff --git a/testsuite/subwrite.inp b/testsuite/subwrite.inp new file mode 100644 index 0000000..3e910cc --- /dev/null +++ b/testsuite/subwrite.inp @@ -0,0 +1,4 @@ +Not some church, and not the state, +Not some dark capricious fate. +Who you are, and when you lose, +Comes only from the things you choose. diff --git a/testsuite/subwrite.sed b/testsuite/subwrite.sed new file mode 100644 index 0000000..1a4a01d --- /dev/null +++ b/testsuite/subwrite.sed @@ -0,0 +1 @@ +s/you/YoU/w subwrite.wout diff --git a/testsuite/subwrt1.good b/testsuite/subwrt1.good new file mode 100644 index 0000000..560b698 --- /dev/null +++ b/testsuite/subwrt1.good @@ -0,0 +1,4 @@ +Not some church, and not the state, +Not some dark capricious fate. +Who YoU are, and when you lose, +Comes only from the things YoU choose. diff --git a/testsuite/subwrt2.good b/testsuite/subwrt2.good new file mode 100644 index 0000000..c87bb68 --- /dev/null +++ b/testsuite/subwrt2.good @@ -0,0 +1,2 @@ +Who YoU are, and when you lose, +Comes only from the things YoU choose. diff --git a/testsuite/testcases.h b/testsuite/testcases.h new file mode 100644 index 0000000..834f530 --- /dev/null +++ b/testsuite/testcases.h @@ -0,0 +1,167 @@ + {0, "(.*)*\\1", "xx"}, + {0, "^", ""}, + {0, "$", ""}, + {0, "^$", ""}, + {0, "^a$", "a"}, + {0, "abc", "abc"}, + {1, "abc", "xbc"}, + {1, "abc", "axc"}, + {1, "abc", "abx"}, + {0, "abc", "xabcy"}, + {0, "abc", "ababc"}, + {0, "ab*c", "abc"}, + {0, "ab*bc", "abc"}, + {0, "ab*bc", "abbc"}, + {0, "ab*bc", "abbbbc"}, + {0, "ab+bc", "abbc"}, + {1, "ab+bc", "abc"}, + {1, "ab+bc", "abq"}, + {0, "ab+bc", "abbbbc"}, + {0, "ab?bc", "abbc"}, + {0, "ab?bc", "abc"}, + {1, "ab?bc", "abbbbc"}, + {0, "ab?c", "abc"}, + {0, "^abc$", "abc"}, + {1, "^abc$", "abcc"}, + {0, "^abc", "abcc"}, + {1, "^abc$", "aabc"}, + {0, "abc$", "aabc"}, + {0, "^", "abc"}, + {0, "$", "abc"}, + {0, "a.c", "abc"}, + {0, "a.c", "axc"}, + {0, "a.*c", "axyzc"}, + {1, "a.*c", "axyzd"}, + {1, "a[bc]d", "abc"}, + {0, "a[bc]d", "abd"}, + {1, "a[b-d]e", "abd"}, + {0, "a[b-d]e", "ace"}, + {0, "a[b-d]", "aac"}, + {0, "a[-b]", "a-"}, + {0, "a[b-]", "a-"}, + {2, "a[b-a]", "-"}, + {2, "a[]b", "-"}, + {2, "a[", "-"}, + {0, "a]", "a]"}, + {0, "a[]]b", "a]b"}, + {0, "a[^bc]d", "aed"}, + {1, "a[^bc]d", "abd"}, + {0, "a[^-b]c", "adc"}, + {1, "a[^-b]c", "a-c"}, + {1, "a[^]b]c", "a]c"}, + {0, "a[^]b]c", "adc"}, + {0, "ab|cd", "abc"}, + {0, "ab|cd", "abcd"}, + {0, "()ef", "def"}, + {0, "()*", "-"}, + {2, "*a", "-"}, + {2, "^*", "-"}, + {2, "$*", "-"}, + {2, "(*)b", "-"}, + {1, "$b", "b"}, + {2, "a\\", "-"}, + {0, "a\\(b", "a(b"}, + {0, "a\\(*b", "ab"}, + {0, "a\\(*b", "a((b"}, + {1, "a\\x", "a\\x"}, + {1, "abc)", "-"}, + {2, "(abc", "-"}, + {0, "((a))", "abc"}, + {0, "(a)b(c)", "abc"}, + {0, "a+b+c", "aabbabc"}, + {0, "a**", "-"}, + {0, "a*?", "-"}, + {0, "(a*)*", "-"}, + {0, "(a*)+", "-"}, + {0, "(a|)*", "-"}, + {0, "(a*|b)*", "-"}, + {0, "(a+|b)*", "ab"}, + {0, "(a+|b)+", "ab"}, + {0, "(a+|b)?", "ab"}, + {0, "[^ab]*", "cde"}, + {0, "(^)*", "-"}, + {0, "(ab|)*", "-"}, + {2, ")(", "-"}, + {1, "abc", ""}, + {1, "abc", ""}, + {0, "a*", ""}, + {0, "([abc])*d", "abbbcd"}, + {0, "([abc])*bcd", "abcd"}, + {0, "a|b|c|d|e", "e"}, + {0, "(a|b|c|d|e)f", "ef"}, + {0, "((a*|b))*", "-"}, + {0, "abcd*efg", "abcdefg"}, + {0, "ab*", "xabyabbbz"}, + {0, "ab*", "xayabbbz"}, + {0, "(ab|cd)e", "abcde"}, + {0, "[abhgefdc]ij", "hij"}, + {1, "^(ab|cd)e", "abcde"}, + {0, "(abc|)ef", "abcdef"}, + {0, "(a|b)c*d", "abcd"}, + {0, "(ab|ab*)bc", "abc"}, + {0, "a([bc]*)c*", "abc"}, + {0, "a([bc]*)(c*d)", "abcd"}, + {0, "a([bc]+)(c*d)", "abcd"}, + {0, "a([bc]*)(c+d)", "abcd"}, + {0, "a[bcd]*dcdcde", "adcdcde"}, + {1, "a[bcd]+dcdcde", "adcdcde"}, + {0, "(ab|a)b*c", "abc"}, + {0, "((a)(b)c)(d)", "abcd"}, + {0, "[A-Za-z_][A-Za-z0-9_]*", "alpha"}, + {0, "^a(bc+|b[eh])g|.h$", "abh"}, + {0, "(bc+d$|ef*g.|h?i(j|k))", "effgz"}, + {0, "(bc+d$|ef*g.|h?i(j|k))", "ij"}, + {1, "(bc+d$|ef*g.|h?i(j|k))", "effg"}, + {1, "(bc+d$|ef*g.|h?i(j|k))", "bcdd"}, + {0, "(bc+d$|ef*g.|h?i(j|k))", "reffgz"}, + {1, "((((((((((a))))))))))", "-"}, + {0, "(((((((((a)))))))))", "a"}, + {1, "multiple words of text", "uh-uh"}, + {0, "multiple words", "multiple words, yeah"}, + {0, "(.*)c(.*)", "abcde"}, + {1, "\\((.*),", "(.*)\\)"}, + {1, "[k]", "ab"}, + {0, "abcd", "abcd"}, + {0, "a(bc)d", "abcd"}, + {0, "a[-]?c", "ac"}, + {0, "(....).*\\1", "beriberi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar Gadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Kaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar El Kadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gadafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamer El Kazzafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar al-Gaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Al Qathafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Al Qathafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar el-Gadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar El Kadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Qadhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadhdhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar Gaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadhdhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Khaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Khaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'amar al-Kadafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafy"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghadafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Kaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Quathafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gheddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Al-Kaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Khadafy "}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Qudhafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qaddafi"}, + {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi"}, + {0, "[[:digit:]]+", "01234"}, + {1, "[[:alpha:]]+", "01234"}, + {0, "^[[:digit:]]*$", "01234"}, + {1, "^[[:digit:]]*$", "01234a"}, + {0, "^[[:alnum:]]*$", "01234a"}, + {0, "^[[:xdigit:]]*$", "01234a"}, + {1, "^[[:xdigit:]]*$", "01234g"}, + {0, "^[[:alnum:][:space:]]*$", "Hello world"}, diff --git a/testsuite/tst-boost.c b/testsuite/tst-boost.c new file mode 100644 index 0000000..b38a620 --- /dev/null +++ b/testsuite/tst-boost.c @@ -0,0 +1,235 @@ +/* Regular expression tests. + Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +void +frob_escapes (char *src, int pattern) +{ + char *dst; + + for (dst = src; *src != '\0'; dst++, src++) + { + if (*src == '\\') + { + switch (src[1]) + { + case 't': + src++; + *dst = '\t'; + continue; + case 'n': + src++; + *dst = '\n'; + continue; + case 'r': + src++; + *dst = '\r'; + continue; + case '\\': + case '^': + case '{': + case '|': + case '}': + if (!pattern) + { + src++; + *dst = *src; + continue; + } + break; + } + } + if (src != dst) + *dst = *src; + } + *dst = '\0'; +} + +int +main (int argc, char **argv) +{ + int ret = 0, n; + char *line = NULL; + size_t line_len = 0; + ssize_t len; + FILE *f; + char *pattern, *string; + int flags = REG_EXTENDED; + int eflags = 0; + regex_t re; + regmatch_t rm[20]; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + if (argc < 2) + { + fprintf (stderr, "Missing test filename\n"); + return 1; + } + + f = fopen (argv[1], "r"); + if (f == NULL) + { + fprintf (stderr, "Couldn't open %s\n", argv[1]); + return 1; + } + + while ((len = getline (&line, &line_len, f)) > 0) + { + char *p, *q; + int i; + + if (line[len - 1] == '\n') + line[--len] = '\0'; + + puts (line); + + if (line[0] == ';') + continue; + + if (line[0] == '\0') + continue; + + if (line[0] == '-') + { + if (strstr (line, "REG_BASIC")) + flags = 0; + else + flags = REG_EXTENDED; + if (strstr (line, "REG_ICASE")) + flags |= REG_ICASE; + if (strstr (line, "REG_NEWLINE")) + flags |= REG_NEWLINE; + eflags = 0; + if (strstr (line, "REG_NOTBOL")) + eflags |= REG_NOTBOL; + if (strstr (line, "REG_NOTEOL")) + eflags |= REG_NOTEOL; + continue; + } + + pattern = line + strspn (line, " \t"); + if (*pattern == '\0') + continue; + p = pattern + strcspn (pattern, " \t"); + if (*p == '\0') + continue; + *p++ = '\0'; + + string = p + strspn (p, " \t"); + if (*string == '\0') + continue; + if (*string == '"') + { + string++; + p = strchr (string, '"'); + if (p == NULL) + continue; + *p++ = '\0'; + } + else + { + p = string + strcspn (string, " \t"); + if (*string == '!') + string = NULL; + else if (*p == '\0') + continue; + else + *p++ = '\0'; + } + + frob_escapes (pattern, 1); + if (string != NULL) + frob_escapes (string, 0); + + n = regcomp (&re, pattern, flags); + if (n != 0) + { + if (string != NULL) + { + char buf[500]; + regerror (n, &re, buf, sizeof (buf)); + printf ("FAIL regcomp unexpectedly failed: %s\n", + buf); + ret = 1; + } + continue; + } + else if (string == NULL) + { + regfree (&re); + puts ("FAIL regcomp unpexpectedly succeeded"); + ret = 1; + continue; + } + + if (regexec (&re, string, 20, rm, eflags)) + { + for (i = 0; i < 20; ++i) + { + rm[i].rm_so = -1; + rm[i].rm_eo = -1; + } + } + + regfree (&re); + + for (i = 0; i < 20 && *p != '\0'; ++i) + { + int rm_so, rm_eo; + + rm_so = strtol (p, &q, 10); + if (p == q) + break; + p = q; + + rm_eo = strtol (p, &q, 10); + if (p == q) + break; + p = q; + + if (rm[i].rm_so != rm_so || rm[i].rm_eo != rm_eo) + { + printf ("FAIL rm[%d] %d..%d != expected %d..%d\n", + i, rm[i].rm_so, rm[i].rm_eo, rm_so, rm_eo); + ret = 1; + break; + } + } + } + + free (line); + fclose (f); + return ret; +} diff --git a/testsuite/tst-pcre.c b/testsuite/tst-pcre.c new file mode 100644 index 0000000..e8c6a0b --- /dev/null +++ b/testsuite/tst-pcre.c @@ -0,0 +1,249 @@ +/* Regular expression tests. + Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int +main (int argc, char **argv) +{ + int ret = 0; + char *line = NULL; + size_t line_len = 0; + ssize_t len; + FILE *f; + char *pattern = NULL, *string = NULL; + regmatch_t rm[20]; + size_t pattern_alloced = 0, string_alloced = 0; + int ignorecase = 0; + int pattern_valid = 0, rm_valid = 0; + size_t linenum; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + if (argc < 2) + { + fprintf (stderr, "Missing test filename\n"); + return 1; + } + + f = fopen (argv[1], "r"); + if (f == NULL) + { + fprintf (stderr, "Couldn't open %s\n", argv[1]); + return 1; + } + + if ((len = getline (&line, &line_len, f)) <= 0 + || strncmp (line, "# PCRE", 6) != 0) + { + fprintf (stderr, "Not a PCRE test file\n"); + fclose (f); + free (line); + return 1; + } + + linenum = 1; + + while ((len = getline (&line, &line_len, f)) > 0) + { + char *p; + unsigned long num; + + ++linenum; + + if (line[len - 1] == '\n') + line[--len] = '\0'; + + if (line[0] == '#') + continue; + + if (line[0] == '\0') + { + /* End of test. */ + ignorecase = 0; + pattern_valid = 0; + rm_valid = 0; + continue; + } + + if (line[0] == '/') + { + /* Pattern. */ + p = strrchr (line + 1, '/'); + + pattern_valid = 0; + rm_valid = 0; + if (p == NULL) + { + printf ("%lu: Invalid pattern line: %s\n", linenum, line); + ret = 1; + continue; + } + + if (p[1] == 'i' && p[2] == '\0') + ignorecase = 1; + else if (p[1] != '\0') + { + printf ("%lu: Invalid pattern line: %s\n", linenum, line); + ret = 1; + continue; + } + + if (pattern_alloced < (size_t) (p - line)) + { + pattern = realloc (pattern, p - line); + if (pattern == NULL) + { + printf ("%lu: Cannot record pattern: %m\n", linenum); + ret = 1; + break; + } + pattern_alloced = p - line; + } + + memcpy (pattern, line + 1, p - line - 1); + pattern[p - line - 1] = '\0'; + pattern_valid = 1; + continue; + } + + if (strncmp (line, " ", 4) == 0) + { + regex_t re; + int n; + + if (!pattern_valid) + { + printf ("%lu: No previous valid pattern %s\n", linenum, line); + continue; + } + + if (string_alloced < (size_t) (len - 3)) + { + string = realloc (string, len - 3); + if (string == NULL) + { + printf ("%lu: Cannot record search string: %m\n", linenum); + ret = 1; + break; + } + string_alloced = len - 3; + } + + memcpy (string, line + 4, len - 3); + + n = regcomp (&re, pattern, + REG_EXTENDED | (ignorecase ? REG_ICASE : 0)); + if (n != 0) + { + char buf[500]; + regerror (n, &re, buf, sizeof (buf)); + printf ("%lu: regcomp failed for %s: %s\n", + linenum, pattern, buf); + ret = 1; + continue; + } + + if (regexec (&re, string, 20, rm, 0)) + { + rm[0].rm_so = -1; + rm[0].rm_eo = -1; + } + + regfree (&re); + rm_valid = 1; + continue; + } + + if (!rm_valid) + { + printf ("%lu: No preceeding pattern or search string\n", linenum); + ret = 1; + continue; + } + + if (strcmp (line, "No match") == 0) + { + if (rm[0].rm_so != -1 || rm[0].rm_eo != -1) + { + printf ("%lu: /%s/ on %s unexpectedly matched %d..%d\n", + linenum, pattern, string, rm[0].rm_so, rm[0].rm_eo); + ret = 1; + } + + continue; + } + + p = line; + if (*p == ' ') + ++p; + + num = strtoul (p, &p, 10); + if (num >= 20 || *p != ':' || p[1] != ' ') + { + printf ("%lu: Invalid line %s\n", linenum, line); + ret = 1; + continue; + } + + if (rm[num].rm_so == -1 || rm[num].rm_eo == -1) + { + if (strcmp (p + 2, "<unset>") != 0) + { + printf ("%lu: /%s/ on %s unexpectedly failed to match register %ld %d..%d\n", + linenum, pattern, string, num, + rm[num].rm_so, rm[num].rm_eo); + ret = 1; + } + continue; + } + + if (rm[num].rm_eo < rm[num].rm_so + || rm[num].rm_eo - rm[num].rm_so != len - (p + 2 - line) + || strncmp (p + 2, string + rm[num].rm_so, + rm[num].rm_eo - rm[num].rm_so) != 0) + { + printf ("%lu: /%s/ on %s unexpectedly failed to match %s for register %ld %d..%d\n", + linenum, pattern, string, p + 2, num, + rm[num].rm_so, rm[num].rm_eo); + ret = 1; + continue; + } + } + + free (pattern); + free (string); + free (line); + fclose (f); + return ret; +} diff --git a/testsuite/tst-regex2.c b/testsuite/tst-regex2.c new file mode 100644 index 0000000..1dc14ad --- /dev/null +++ b/testsuite/tst-regex2.c @@ -0,0 +1,209 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <errno.h> +#include <fcntl.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +int +main (int argc, char *argv[]) +{ + struct stat st; + static const char *pat[] = { + ".?.?.?.?.?.?.?argc", + "(.?)(.?)(.?)(.?)(.?)(.?)(.?)argc", + "((((((((((.?))))))))))((((((((((.?))))))))))((((((((((.?))))))))))" + "((((((((((.?))))))))))((((((((((.?))))))))))((((((((((.?))))))))))" + "((((((((((.?))))))))))argc" }; + + size_t len; + int fd; + int testno, i, j, k, l; + char *string; + char *buf; + + if (argc < 2) + abort (); + + fd = open (argv[1], O_RDONLY); + if (fd < 0) + { + printf ("Couldn't open %s: %s\n", argv[1], strerror (errno)); + abort (); + } + + if (fstat (fd, &st) < 0) + { + printf ("Couldn't fstat %s: %s\n", argv[1], strerror (errno)); + abort (); + } + + buf = malloc (st.st_size + 1); + if (buf == NULL) + { + printf ("Couldn't allocate buffer: %s\n", strerror (errno)); + abort (); + } + + if (read (fd, buf, st.st_size) != (ssize_t) st.st_size) + { + printf ("Couldn't read %s", argv[1]); + abort (); + } + + close (fd); + buf[st.st_size] = '\0'; + + string = buf; + len = st.st_size; + + for (testno = 0; testno < 4; ++testno) + for (i = 0; i < sizeof (pat) / sizeof (pat[0]); ++i) + { + regex_t rbuf; + struct re_pattern_buffer rpbuf; + int err; + + printf ("test %d pattern %d", testno, i); + if (testno < 2) + { + err = regcomp (&rbuf, pat[i], + REG_EXTENDED | (testno ? REG_NOSUB : 0)); + if (err != 0) + { + char errstr[300]; + putchar ('\n'); + regerror (err, &rbuf, errstr, sizeof (errstr)); + puts (errstr); + return err; + } + } + else + { + const char *s; + re_set_syntax (RE_SYNTAX_POSIX_EGREP + | (testno == 3 ? RE_NO_SUB : 0)); + + memset (&rpbuf, 0, sizeof (rpbuf)); + s = re_compile_pattern (pat[i], strlen (pat[i]), &rpbuf); + if (s != NULL) + { + printf ("\n%s\n", s); + abort (); + } + + /* Just so that this can be tested with earlier glibc as well. */ + if (testno == 3) + rpbuf.no_sub = 1; + } + + if (testno < 2) + { + regmatch_t pmatch[71]; + err = regexec (&rbuf, string, 71, pmatch, 0); + if (err == REG_NOMATCH) + { + puts ("\nregexec failed"); + abort (); + } + + if (testno == 0) + { + if (pmatch[0].rm_eo != pmatch[0].rm_so + 11 + || pmatch[0].rm_eo > len + || string + pmatch[0].rm_so >= strchr (string, 'R') + || strncmp (string + pmatch[0].rm_so, + "n (int argc", + sizeof "n (int argc" - 1) + != 0) + { + puts ("\nregexec without REG_NOSUB did not find the correct match"); + abort (); + } + + if (i > 0) + for (j = 0, l = 1; j < 7; ++j) + for (k = 0; k < (i == 1 ? 1 : 10); ++k, ++l) + if (pmatch[l].rm_so != pmatch[0].rm_so + j + || pmatch[l].rm_eo != pmatch[l].rm_so + 1) + { + printf ("\npmatch[%d] incorrect\n", l); + abort (); + } + } + } + else + { + struct re_registers regs; + int match; + + memset (®s, 0, sizeof (regs)); + match = re_search (&rpbuf, string, len, 0, len, + ®s); + if (match < 0) + { + puts ("\nre_search failed"); + abort (); + } + + if (match + 11 > len + || string + match >= strchr (string, 'R') + || strncmp (string + match, + "n (int argc", + sizeof "n (int argc" - 1) + != 0) + { + puts ("\nre_search did not find the correct match"); + abort (); + } + + if (testno == 2) + { + if (regs.num_regs != 2 + (i == 0 ? 0 : i == 1 ? 7 : 70)) + { + printf ("\nincorrect num_regs %d\n", regs.num_regs); + abort (); + } + + if (regs.start[0] != match || regs.end[0] != match + 11) + { + printf ("\nincorrect regs.{start,end}[0] = { %d, %d}\n", + regs.start[0], regs.end[0]); + abort (); + } + + if (regs.start[regs.num_regs - 1] != -1 + || regs.end[regs.num_regs - 1] != -1) + { + puts ("\nincorrect regs.{start,end}[num_regs - 1]"); + abort (); + } + + if (i > 0) + for (j = 0, l = 1; j < 7; ++j) + for (k = 0; k < (i == 1 ? 1 : 10); ++k, ++l) + if (regs.start[l] != match + j + || regs.end[l] != regs.start[l] + 1) + { + printf ("\nregs.{start,end}[%d] incorrect\n", l); + abort (); + } + } + } + + putchar ('\n'); + + if (testno < 2) + regfree (&rbuf); + else + regfree (&rpbuf); + } + + exit (0); +} diff --git a/testsuite/tst-rxspencer.c b/testsuite/tst-rxspencer.c new file mode 100644 index 0000000..758fc04 --- /dev/null +++ b/testsuite/tst-rxspencer.c @@ -0,0 +1,560 @@ +/* Regular expression tests. + Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_MCHECK_H +#include <mcheck.h> +#endif +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <locale.h> +#include <getopt.h> + +static void +replace_special_chars (char *str) +{ + for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str) + switch (*str) + { + case 'N': *str = '\n'; break; + case 'T': *str = '\t'; break; + case 'S': *str = ' '; break; + case 'Z': *str = '\0'; break; + } +} + +static void +glibc_re_syntax (char *str) +{ + char *p, *end = strchr (str, '\0') + 1; + + /* Replace [[:<:]] with \< and [[:>:]] with \>. */ + for (p = str; (p = strstr (p, "[[:")) != NULL; ) + if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0) + { + p[0] = '\\'; + p[1] = p[3]; + memmove (p + 2, p + 7, end - p - 7); + end -= 5; + p += 2; + } + else + p += 3; +} + +static char * +mb_replace (char *dst, const char c) +{ + switch (c) + { + /* Replace a with \'a and A with \'A. */ + case 'a': + *dst++ = '\xc3'; + *dst++ = '\xa1'; + break; + case 'A': + *dst++ = '\xc3'; + *dst++ = '\x81'; + break; + /* Replace b with \v{c} and B with \v{C}. */ + case 'b': + *dst++ = '\xc4'; + *dst++ = '\x8d'; + break; + case 'B': + *dst++ = '\xc4'; + *dst++ = '\x8c'; + break; + /* Replace c with \v{d} and C with \v{D}. */ + case 'c': + *dst++ = '\xc4'; + *dst++ = '\x8f'; + break; + case 'C': + *dst++ = '\xc4'; + *dst++ = '\x8e'; + break; + /* Replace d with \'e and D with \'E. */ + case 'd': + *dst++ = '\xc3'; + *dst++ = '\xa9'; + break; + case 'D': + *dst++ = '\xc3'; + *dst++ = '\x89'; + break; + } + return dst; +} + +static char * +mb_frob_string (const char *str, const char *letters) +{ + char *ret, *dst; + const char *src; + + if (str == NULL) + return NULL; + + ret = malloc (2 * strlen (str) + 1); + if (ret == NULL) + return NULL; + + for (src = str, dst = ret; *src; ++src) + if (strchr (letters, *src)) + dst = mb_replace (dst, *src); + else + *dst++ = *src; + *dst = '\0'; + return ret; +} + +/* Like mb_frob_string, but don't replace anything between + [: and :], [. and .] or [= and =]. */ + +static char * +mb_frob_pattern (const char *str, const char *letters) +{ + char *ret, *dst; + const char *src; + int in_class = 0; + + if (str == NULL) + return NULL; + + ret = malloc (2 * strlen (str) + 1); + if (ret == NULL) + return NULL; + + for (src = str, dst = ret; *src; ++src) + if (!in_class && strchr (letters, *src)) + dst = mb_replace (dst, *src); + else + { + if (!in_class && *src == '[' && strchr (":.=", src[1])) + in_class = 1; + else if (in_class && *src == ']' && strchr (":.=", src[-1])) + in_class = 0; + *dst++ = *src; + } + *dst = '\0'; + return ret; +} + +static int +check_match (regmatch_t *rm, int idx, const char *string, + const char *match, const char *fail) +{ + if (match[0] == '-' && match[1] == '\0') + { + if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1) + return 0; + printf ("%s rm[%d] unexpectedly matched\n", fail, idx); + return 1; + } + + if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1) + { + printf ("%s rm[%d] unexpectedly did not match\n", fail, idx); + return 1; + } + + if (match[0] == '@') + { + if (rm[idx].rm_so != rm[idx].rm_eo) + { + printf ("%s rm[%d] not empty\n", fail, idx); + return 1; + } + + if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) + ? strlen (match + 1) : 1)) + { + printf ("%s rm[%d] not matching %s\n", fail, idx, match); + return 1; + } + return 0; + } + + if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match) + || strncmp (string + rm[idx].rm_so, match, + rm[idx].rm_eo - rm[idx].rm_so)) + { + printf ("%s rm[%d] not matching %s\n", fail, idx, match); + return 1; + } + + return 0; +} + +static int +test (const char *pattern, int cflags, const char *string, int eflags, + char *expect, char *matches, const char *fail) +{ + regex_t re; + regmatch_t rm[10]; + int n, ret = 0; + + n = regcomp (&re, pattern, cflags); + if (n != 0) + { + char buf[500]; + if (eflags == -1) + { + static struct { reg_errcode_t code; const char *name; } codes [] +#define C(x) { REG_##x, #x } + = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE), + C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK), + C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE), + C(ESPACE), C(BADRPT) }; + + int i; + for (i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i) + if (n == codes[i].code) + { + if (strcmp (string, codes[i].name)) + { + printf ("%s regcomp returned REG_%s (expected REG_%s)\n", + fail, codes[i].name, string); + return 1; + } + return 0; + } + + printf ("%s regcomp return value REG_%d\n", fail, n); + return 1; + } + + regerror (n, &re, buf, sizeof (buf)); + printf ("%s regcomp failed: %s\n", fail, buf); + return 1; + } + + if (eflags == -1) + { + regfree (&re); + + /* The test case file assumes something only guaranteed by the + rxspencer regex implementation. Namely that for empty + expressions regcomp() return REG_EMPTY. This is not the case + for us and so we ignore this error. */ + if (strcmp (string, "EMPTY") == 0) + return 0; + + printf ("%s regcomp unexpectedly succeeded\n", fail); + return 1; + } + + if (regexec (&re, string, 10, rm, eflags)) + { + regfree (&re); + if (expect == NULL) + return 0; + printf ("%s regexec failed\n", fail); + return 1; + } + + regfree (&re); + + if (expect == NULL) + { + printf ("%s regexec unexpectedly succeeded\n", fail); + return 1; + } + + if (cflags & REG_NOSUB) + return 0; + + ret = check_match (rm, 0, string, expect, fail); + if (matches == NULL) + return ret; + + for (n = 1; ret == 0 && n < 10; ++n) + { + char *p = NULL; + + if (matches) + { + p = strchr (matches, ','); + if (p != NULL) + *p = '\0'; + } + ret = check_match (rm, n, string, matches ? matches : "-", fail); + if (p) + { + *p = ','; + matches = p + 1; + } + else + matches = NULL; + } + + return ret; +} + +static int +mb_test (const char *pattern, int cflags, const char *string, int eflags, + char *expect, const char *matches, const char *letters, + const char *fail) +{ + char *pattern_mb = mb_frob_pattern (pattern, letters); + const char *string_mb + = eflags == -1 ? string : mb_frob_string (string, letters); + char *expect_mb = mb_frob_string (expect, letters); + char *matches_mb = mb_frob_string (matches, letters); + int ret = 0; + + if (!pattern_mb || !string_mb + || (expect && !expect_mb) || (matches && !matches_mb)) + { + printf ("%s %m", fail); + ret = 1; + } + else + ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb, + matches_mb, fail); + + free (matches_mb); + free (expect_mb); + if (string_mb != string) + free ((char *) string_mb); + free (pattern_mb); + return ret; +} + +static int +mb_tests (const char *pattern, int cflags, const char *string, int eflags, + char *expect, const char *matches) +{ + int ret = 0; + int i; + char letters[9], fail[20]; + + /* The tests aren't supposed to work with xdigit, since a-dA-D are + hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */ + if (strstr (pattern, "[:xdigit:]")) + return 0; + + /* XXX: regex ATM handles only single byte equivalence classes. */ + if (strstr (pattern, "[[=b=]]")) + return 0; + + for (i = 1; i < 16; ++i) + { + char *p = letters; + if (i & 1) + { + if (!strchr (pattern, 'a') && !strchr (string, 'a') + && !strchr (pattern, 'A') && !strchr (string, 'A')) + continue; + *p++ = 'a', *p++ = 'A'; + } + if (i & 2) + { + if (!strchr (pattern, 'b') && !strchr (string, 'b') + && !strchr (pattern, 'B') && !strchr (string, 'B')) + continue; + *p++ = 'b', *p++ = 'B'; + } + if (i & 4) + { + if (!strchr (pattern, 'c') && !strchr (string, 'c') + && !strchr (pattern, 'C') && !strchr (string, 'C')) + continue; + *p++ = 'c', *p++ = 'C'; + } + if (i & 8) + { + if (!strchr (pattern, 'd') && !strchr (string, 'd') + && !strchr (pattern, 'D') && !strchr (string, 'D')) + continue; + *p++ = 'd', *p++ = 'D'; + } + *p++ = '\0'; + sprintf (fail, "UTF-8 %s FAIL", letters); + ret |= mb_test (pattern, cflags, string, eflags, expect, matches, + letters, fail); + } + return ret; +} + +int +main (int argc, char **argv) +{ + int ret = 0; + char *line = NULL; + size_t line_len = 0; + ssize_t len; + FILE *f; + static int test_utf8 = 0; + static const struct option options[] = + { + {"utf8", no_argument, &test_utf8, 1}, + {NULL, 0, NULL, 0 } + }; + +#ifdef HAVE_MCHECK_H + mtrace (); +#endif + + while (getopt_long (argc, argv, "", options, NULL) >= 0); + + if (optind + 1 != argc) + { + fprintf (stderr, "Missing test filename\n"); + return 1; + } + + f = fopen (argv[optind], "r"); + if (f == NULL) + { + fprintf (stderr, "Couldn't open %s\n", argv[optind]); + return 1; + } + + while ((len = getline (&line, &line_len, f)) > 0) + { + char *pattern, *flagstr, *string, *expect, *matches, *p; + int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0; + + if (line[len - 1] == '\n') + line[len - 1] = '\0'; + + /* Skip comments and empty lines. */ + if (*line == '#' || *line == '\0') + continue; + + puts (line); + fflush (stdout); + + pattern = strtok (line, "\t"); + if (pattern == NULL) + continue; + + if (strcmp (pattern, "\"\"") == 0) + pattern += 2; + + flagstr = strtok (NULL, "\t"); + if (flagstr == NULL) + continue; + + string = strtok (NULL, "\t"); + if (string == NULL) + continue; + + if (strcmp (string, "\"\"") == 0) + string += 2; + + for (p = flagstr; *p; ++p) + switch (*p) + { + case '-': + break; + case 'b': + cflags &= ~REG_EXTENDED; + break; + case '&': + try_bre_ere = 1; + break; + case 'C': + eflags = -1; + break; + case 'i': + cflags |= REG_ICASE; + break; + case 's': + cflags |= REG_NOSUB; + break; + case 'n': + cflags |= REG_NEWLINE; + break; + case '^': + eflags |= REG_NOTBOL; + break; + case '$': + eflags |= REG_NOTEOL; + break; + case 'm': + case 'p': + case '#': + /* Not supported. */ + flagstr = NULL; + break; + } + + if (flagstr == NULL) + continue; + + replace_special_chars (pattern); + glibc_re_syntax (pattern); + if (eflags != -1) + replace_special_chars (string); + + expect = strtok (NULL, "\t"); + matches = NULL; + if (expect != NULL) + { + replace_special_chars (expect); + matches = strtok (NULL, "\t"); + if (matches != NULL) + replace_special_chars (matches); + } + + if (setlocale (LC_ALL, "C") == NULL) + { + puts ("setlocale C failed"); + ret = 1; + } + if (test (pattern, cflags, string, eflags, expect, matches, "FAIL") + || (try_bre_ere + && test (pattern, cflags & ~REG_EXTENDED, string, eflags, + expect, matches, "FAIL"))) + ret = 1; + else if (test_utf8) + { + if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL) + { + puts ("setlocale cs_CZ.UTF-8 failed"); + ret = 1; + } + else if (test (pattern, cflags, string, eflags, expect, matches, + "UTF-8 FAIL") + || (try_bre_ere + && test (pattern, cflags & ~REG_EXTENDED, string, + eflags, expect, matches, "UTF-8 FAIL"))) + ret = 1; + else if (mb_tests (pattern, cflags, string, eflags, expect, matches) + || (try_bre_ere + && mb_tests (pattern, cflags & ~REG_EXTENDED, string, + eflags, expect, matches))) + ret = 1; + } + } + + free (line); + fclose (f); + return ret; +} diff --git a/testsuite/uniq.good b/testsuite/uniq.good new file mode 100644 index 0000000..2941bec --- /dev/null +++ b/testsuite/uniq.good @@ -0,0 +1,874 @@ + +#define DPRINTF(p) /*nothing */ +#define DPRINTF(p) printf p +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define class pcre_class +#define match_condassert 0x01 /* Called to check a condition assertion */ +#define match_isgroup 0x02 /* Set if start of bracketed group */ +#else +#endif +#ifdef DEBUG /* Sigh. Some compilers never learn. */ +#ifdef DEBUG +#ifdef __cplusplus +#include "internal.h" +&& length - re->max_match_size > start_offset) +((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word)) +((md->ctypes[*eptr] & ctype_word) != 0); +((md->ctypes[eptr[-1]] & ctype_word) != 0); +(eptr == md->end_subject - 1 && *eptr != '\n')) +(i.e. keep it out of the loop). Also we can test that there are at least +(md->ctypes[*eptr++] & ctype_digit) != 0) +(md->ctypes[*eptr++] & ctype_digit) == 0) +(md->ctypes[*eptr++] & ctype_space) != 0) +(md->ctypes[*eptr++] & ctype_space) == 0) +(md->ctypes[*eptr++] & ctype_word) != 0) +(md->ctypes[*eptr++] & ctype_word) == 0) +(offsetcount - 2) * sizeof (int)); +(offsets == NULL && offsetcount > 0)) +(pcre_free) (match_block.offset_vector); +(pcre_free) (save); +(re->tables + fcc_offset)[req_char] : req_char; +* Match a back-reference * +* Execute a Regular Expression * +* Match from current position * +* Debugging function to print chars * +* Perl-Compatible Regular Expressions * +* Macros and tables for character handling * +*************************************************/ +*/ +*iptr = -1; +*iptr++ = -1; +*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || +*prev == OP_ONCE) +----------------------------------------------------------------------------- +-1 => failed to match +/* +/* "Once" brackets are like assertion brackets except that after a match, +/* ... else fall through */ +/* Advance to a possible match for an initial string after study */ +/* Allow compilation as C++ source code, should anybody want to do that. */ +/* Always fail if not enough characters left */ +/* An alternation is the end of a branch; scan along to find the end of the +/* Assert before internal newline if multiline, or before a terminating +/* Assertion brackets. Check the alternative branches in turn - the +/* At the start of a bracketed group, add the current subject pointer to the +/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating +/* Caseful comparisons */ +/* Change option settings */ +/* Common code for all repeated single character type matches */ +/* Common code for all repeated single-character matches. We can give +/* Compute the minimum number of offsets that we need to reset each time. Doing +/* Conditional group: compilation checked that there are no more than +/* Continue as from after the assertion, updating the offsets high water +/* Continue from after the assertion, updating the offsets high water +/* Control never gets here */ +/* Control never reaches here */ +/* Copy the offset information from temporary store if necessary */ +/* Do a single test if no case difference is set up */ +/* Do not stick any code in here without much thought; it is assumed +/* End of a group, repeated or non-repeating. If we are at the end of +/* End of subject assertion (\z) */ +/* End of subject or ending \n assertion (\Z) */ +/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched +/* First, ensure the minimum number of matches are present. */ +/* First, ensure the minimum number of matches are present. Use inline +/* First, ensure the minimum number of matches are present. We get back +/* Flag bits for the match() function */ +/* For a non-repeating ket, just continue at this level. This also +/* For anchored or unanchored matches, there may be a "last known required +/* For extended extraction brackets (large number), we have to fish out +/* For extended extraction brackets (large number), we have to fish out the +/* For matches anchored to the end of the pattern, we can often avoid +/* If a back reference hasn't been set, the length that is passed is greater +/* If checking an assertion for a condition, return TRUE. */ +/* If hit the end of the group (which could be repeated), fail */ +/* If max == min we can continue with the main loop without the +/* If maximizing it is worth using inline code for speed, doing the type +/* If maximizing, find the longest possible run, then work backwards. */ +/* If maximizing, find the longest string and work backwards */ +/* If min = max, continue at the same level without recursing */ +/* If min = max, continue at the same level without recursion. +/* If minimizing, keep testing the rest of the expression and advancing +/* If minimizing, keep trying and advancing the pointer */ +/* If minimizing, we have to test the rest of the pattern before each +/* If req_char is set, we know that that character must appear in the subject +/* If the expression has got more back references than the offsets supplied can +/* If the length of the reference is zero, just continue with the +/* If the reference is unset, set the length to be longer than the amount +/* If we can't find the required character, break the matching loop */ +/* If we have found the required character, save the point where we +/* In all other cases except a conditional group we have to check the +/* In case the recursion has set more capturing values, save the final +/* Include the internals header, which itself includes Standard C headers plus +/* Insufficient room for saving captured contents */ +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +/* Match a back reference, possibly repeatedly. Look past the end of the +/* Match a character class, possibly repeatedly. Look past the end of the +/* Match a negated single character */ +/* Match a negated single character repeatedly. This is almost a repeat of +/* Match a run of characters */ +/* Match a single character repeatedly; different opcodes share code. */ +/* Match a single character type repeatedly; several different opcodes +/* Match a single character type; inline for speed */ +/* Min and max values for the common repeats; for the maxima, 0 => infinity */ +/* Move the subject pointer back. This occurs only at the start of +/* Negative assertion: all branches must fail to match */ +/* Now start processing the operations. */ +/* OP_KETRMAX */ +/* On entry ecode points to the first opcode, and eptr to the first character +/* Opening capturing bracket. If there is space in the offset vector, save +/* Or to a non-unique first char after study */ +/* Or to a unique first char if possible */ +/* Or to just after \n for a multiline match if possible */ +/* Other types of node can be handled by a switch */ +/* Otherwise test for either case */ +/* Print a sequence of chars in printable format, stopping at the end of the +/* Recursion matches the current regex, nested. If there are any capturing +/* Reset the maximum number of extractions we might see. */ +/* Reset the value of the ims flags, in case they got changed during +/* Reset the working variable associated with each extraction. These should +/* Separate the caselesss case for speed */ +/* Set up for repetition, or handle the non-repeated case */ +/* Set up the first character to match, if available. The first_char value is +/* Skip over conditional reference data or large extraction number data if +/* Start of subject assertion */ +/* Start of subject unless notbol, or after internal newline if multiline */ +/* Structure for building a chain of data that actually lives on the +/* The code is duplicated for the caseless and caseful cases, for speed, +/* The condition is an assertion. Call match() to evaluate it - setting +/* The ims options can vary during the matching as a result of the presence +/* The repeating kets try the rest of the pattern or restart from the +/* There's been some horrible disaster. */ +/* This "while" is the end of the "do" above */ +/* This function applies a compiled re to a subject string and picks out +/* Use a macro for debugging printing, 'cause that limits the use of #ifdef +/* We don't need to repeat the search if we haven't yet reached the +/* When a match occurs, substrings will be set for all internal extractions; +/* Word boundary assertions */ +/************************************************* +1. This software is distributed in the hope that it will be useful, +2. The origin of this software must not be misrepresented, either by +3. Altered versions must be plainly marked as such, and must not be +4. If PCRE is embedded in any software that is released under the GNU +5.005. If there is an options reset, it will get obeyed in the normal +6 : 3 + (ecode[1] << 8) + ecode[2]), +< -1 => some kind of unexpected problem += 0 => success, but offsets is not big enough +Arguments: +BOOL anchored; +BOOL cur_is_word = (eptr < md->end_subject) && +BOOL is_subject; +BOOL minimize = FALSE; +BOOL prev_is_word = (eptr != md->start_subject) && +BOOL rc; +BOOL startline; +BOOL using_temporary_offsets = FALSE; +Copyright (c) 1997-2000 University of Cambridge +DPRINTF ((">>>> returning %d\n", match_block.errorcode)); +DPRINTF ((">>>> returning %d\n", rc)); +DPRINTF (("Copied offsets from temporary memory\n")); +DPRINTF (("Freeing temporary memory\n")); +DPRINTF (("Got memory to hold back references\n")); +DPRINTF (("Unknown opcode %d\n", *ecode)); +DPRINTF (("bracket %d failed\n", number)); +DPRINTF (("bracket 0 failed\n")); +DPRINTF (("ims reset to %02lx\n", ims)); +DPRINTF (("ims set to %02lx at group repeat\n", ims)); +DPRINTF (("ims set to %02lx\n", ims)); +DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max, +DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, +DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); +DPRINTF (("start bracket 0\n")); +GETCHAR (c, eptr) /* Get character */ +GETCHARINC (c, eptr) /* Get character; increment eptr */ +General Purpose Licence (GPL), then the terms of that licence shall +However, if the referenced string is the empty string, always treat +If the bracket fails to match, we need to restore this value and also the +If there isn't enough space in the offset vector, treat this as if it were a +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +Otherwise, we can use the vector supplied, rounding down its size to a multiple +Permission is granted to anyone to use this software for any purpose on any +REPEATCHAR: +REPEATNOTCHAR: +REPEATTYPE: +Returns: > 0 => success; value is the number of elements filled in +Returns: TRUE if matched +Returns: TRUE if matched +Returns: nothing +They are not both allowed to be zero. */ +This is a library of functions to support regular expressions whose syntax +This is the forcible breaking of infinite loops as implemented in Perl +Writing separate code makes it go faster, as does using an autoincrement and +Written by: Philip Hazel <ph10@cam.ac.uk> +a move back into the brackets. Check the alternative branches in turn - the +address of eptr, so that eptr can be a register variable. */ +an assertion "group", stop matching and return TRUE, but record the +an empty string - recursion will then try other alternatives, if any. */ +an error. Save the top 15 values on the stack, and accept that the rest +an unanchored pattern, of course. If there's no first char and the pattern was +analyzing most of the pattern. length > re->max_match_size is +anchored = ((re->options | options) & PCRE_ANCHORED) != 0; +and advance one byte in the pattern code. */ +and reinstate them after the recursion. However, we don't know how many +and semantics are as close as possible to those of the Perl 5 language. See +and the required character in fact is caseful. */ +at run time, so we have to test for anchoring. The first char may be unset for +avoid duplicate testing (which takes significant time). This covers the vast +backing off on a match. */ +bmtable = extra->data.bmtable; +both cases of the character. Otherwise set the two values the same, which will +bracketed group and go to there. */ +brackets - for testing for empty matches +brackets started but not finished, we have to save their starting points +break; +but WITHOUT ANY WARRANTY; without even the implied warranty of +c != md->lcc[*eptr++]) +c = *ecode++ - OP_CRSTAR; +c = *ecode++ - OP_NOTSTAR; +c = *ecode++ - OP_STAR; +c = *ecode++ - OP_TYPESTAR; +c = *ecode++; +c = *eptr++; +c = 15; +c = max - min; +c = md->end_subject - eptr; +c = md->lcc[c]; +c = md->offset_max; +c == md->lcc[*eptr++]) +can't just fail here, because of the possibility of quantifiers with zero +case OP_ALT: +case OP_ANY: +case OP_ASSERT: +case OP_ASSERTBACK: +case OP_ASSERTBACK_NOT: +case OP_ASSERT_NOT: +case OP_BEG_WORD: +case OP_BRA: /* Non-capturing bracket: optimized */ +case OP_BRAMINZERO: +case OP_BRANUMBER: +case OP_BRAZERO: +case OP_CHARS: +case OP_CIRC: +case OP_CLASS: +case OP_COND: +case OP_CREF: +case OP_CRMINPLUS: +case OP_CRMINQUERY: +case OP_CRMINRANGE: +case OP_CRMINSTAR: +case OP_CRPLUS: +case OP_CRQUERY: +case OP_CRRANGE: +case OP_CRSTAR: +case OP_DIGIT: +case OP_DOLL: +case OP_END: +case OP_END_WORD: +case OP_EOD: +case OP_EODN: +case OP_EXACT: +case OP_KET: +case OP_KETRMAX: +case OP_KETRMIN: +case OP_MINPLUS: +case OP_MINQUERY: +case OP_MINSTAR: +case OP_MINUPTO: +case OP_NOT: +case OP_NOTEXACT: +case OP_NOTMINPLUS: +case OP_NOTMINQUERY: +case OP_NOTMINSTAR: +case OP_NOTMINUPTO: +case OP_NOTPLUS: +case OP_NOTQUERY: +case OP_NOTSTAR: +case OP_NOTUPTO: +case OP_NOT_DIGIT: +case OP_NOT_WHITESPACE: +case OP_NOT_WORDCHAR: +case OP_NOT_WORD_BOUNDARY: +case OP_ONCE: +case OP_OPT: +case OP_PLUS: +case OP_QUERY: +case OP_RECURSE: +case OP_REF: +case OP_REVERSE: +case OP_SOD: +case OP_STAR: +case OP_TYPEEXACT: +case OP_TYPEMINPLUS: +case OP_TYPEMINQUERY: +case OP_TYPEMINSTAR: +case OP_TYPEMINUPTO: +case OP_TYPEPLUS: +case OP_TYPEQUERY: +case OP_TYPESTAR: +case OP_TYPEUPTO: +case OP_UPTO: +case OP_WHITESPACE: +case OP_WORDCHAR: +case OP_WORD_BOUNDARY: +case matching may be when this character is hit, so test for it in both its +caselessly, or if there are any changes of this flag within the regex, set up +cases if necessary. However, the different cased versions will not be set up +character" set. If the PCRE_CASELESS is set, implying that the match starts +characters and work backwards. */ +code for maximizing the speed, and do the type test once at the start +code to character type repeats - written out again for speed. */ +commoning these up that doesn't require a test of the positive/negative +computer system, and to redistribute it freely, subject to the following +const char *subject; +const pcre *re; +const pcre_extra *extra; +const uschar *bmtable = NULL; +const uschar *data = ecode + 1; /* Save for matching */ +const uschar *end_subject; +const uschar *next = ecode + 1; +const uschar *p = md->start_subject + md->offset_vector[offset]; +const uschar *p; +const uschar *pp = eptr; +const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; +const uschar *prev = ecode; +const uschar *req_char_ptr = start_match - 1; +const uschar *saved_eptr = eptr; +const uschar *saved_eptr = eptrb->saved_eptr; +const uschar *saved_eptr; +const uschar *start_bits = NULL; +const uschar *start_match = (const uschar *) subject + start_offset; +continue; /* With the main loop */ +continue; +course of events. */ +ctype = *ecode++; /* Code for the character type */ +cur_is_word == prev_is_word : cur_is_word != prev_is_word) +current high water mark for use by positive assertions. Do this also +default: /* No repeat follows */ +default: +do +each branch of a lookbehind assertion. If we are too close to the start to +each substring: the offsets to the start and end of the substring. +ecode position in code +ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ? +ecode += (ecode[1] << 8) + ecode[2]; +ecode += 2; +ecode += 3 + (ecode[4] << 8) + ecode[5]; +ecode += 33; /* Advance past the item */ +ecode += 3; /* Advance past the item */ +ecode += 3; +ecode += 5; +ecode = next + 3; +ecode++; +else +else if ((extra->options & PCRE_STUDY_BM) != 0) +else if (first_char >= 0) +else if (start_bits != NULL) +else if (startline) +encountered */ +end_subject = match_block.end_subject; +eptr pointer in subject +eptr points into the subject +eptr += c; +eptr += length; +eptr += min; +eptr -= (ecode[1] << 8) + ecode[2]; +eptr -= length; +eptr = md->end_match_ptr; +eptr++; +eptrb pointer to chain of blocks containing eptr at start of +eptrb = &newptrb; +eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ +eptrblock *eptrb; +eptrblock newptrb; +eptrblock; +exactly what going to the ket would do. */ +explicit claim or by omission. +external_extra points to "hints" from pcre_study() or is NULL +external_re points to the compiled expression +extraction by setting the offsets and bumping the high water mark. */ +first_char = match_block.lcc[first_char]; +first_char = re->first_char; +flags can contain +for (;;) +for (i = 1; i <= c; i++) +for (i = 1; i <= min; i++) +for (i = min; i < max; i++) +for (i = min;; i++) +for the "once" (not-backup up) groups. */ +for the match to succeed. If the first character is set, req_char must be +found it, so that we don't search again next time round the loop if +from a previous iteration of this group, and be referred to by a reference +goto REPEATCHAR; +goto REPEATNOTCHAR; +goto REPEATTYPE; +group number back at the start and if necessary complete handling an +happens for a repeating ket if no characters were matched in the group. +here; that is handled in the code for KET. */ +hold, we get a temporary bit of working store to use during the matching. +i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper +if (!anchored) +if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup)) +if (!match_ref (offset, eptr, length, md, ims)) +if (!md->endonly) +if (!rc) +if (!startline && extra != NULL) +if ((*ecode++ == OP_WORD_BOUNDARY) ? +if ((data[c / 8] & (1 << (c & 7))) != 0) +if ((data[c / 8] & (1 << (c & 7))) == 0) +if ((extra->options & PCRE_STUDY_MAPPED) != 0) +if ((flags & match_condassert) != 0) +if ((flags & match_isgroup) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_DOTALL) == 0 && c == '\n') +if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') +if ((ims & PCRE_DOTALL) == 0) +if ((ims & PCRE_MULTILINE) != 0) +if ((md->ctypes[*eptr++] & ctype_digit) != 0) +if ((md->ctypes[*eptr++] & ctype_digit) == 0) +if ((md->ctypes[*eptr++] & ctype_space) != 0) +if ((md->ctypes[*eptr++] & ctype_space) == 0) +if ((md->ctypes[*eptr++] & ctype_word) != 0) +if ((md->ctypes[*eptr++] & ctype_word) == 0) +if ((md->ctypes[c] & ctype_digit) != 0) +if ((md->ctypes[c] & ctype_digit) == 0) +if ((md->ctypes[c] & ctype_space) != 0) +if ((md->ctypes[c] & ctype_space) == 0) +if ((md->ctypes[c] & ctype_word) != 0) +if ((md->ctypes[c] & ctype_word) == 0) +if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) +if ((re->options & PCRE_FIRSTSET) != 0) +if ((re->options & PCRE_REQCHSET) != 0) +if ((start_bits[c / 8] & (1 << (c & 7))) == 0) +if (*ecode != OP_ONCE && *ecode != OP_ALT) +if (*ecode == OP_KET || eptr == saved_eptr) +if (*ecode == OP_KET) +if (*ecode == OP_KETRMIN) +if (*ecode++ != *eptr++) +if (*ecode++ == *eptr++) +if (*eptr != '\n') +if (*eptr++ == '\n') +if (*p++ != *eptr++) +if (*p++ == req_char) +if (*prev != OP_COND) +if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || +if (bmtable != NULL) +if (bmtable[*start_match]) +if (c != *eptr++) +if (c != md->lcc[*eptr++]) +if (c < 16) +if (c == *eptr++) +if (c == md->lcc[*eptr++]) +if (c > md->end_subject - eptr) +if (cur_is_word == prev_is_word || +if (ecode[3] == OP_CREF) /* Condition is extraction test */ +if (ecode[3] == OP_OPT) +if (eptr != md->start_subject && eptr[-1] != '\n') +if (eptr != md->start_subject) +if (eptr < md->end_subject - 1 || +if (eptr < md->end_subject) +if (eptr < md->start_subject) +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) +if (eptr >= md->end_subject || *eptr == '\n') +if (eptr >= md->end_subject || c != *eptr) +if (eptr >= md->end_subject || c != md->lcc[*eptr]) +if (eptr >= md->end_subject || c == *eptr) +if (eptr >= md->end_subject || c == md->lcc[*eptr]) +if (eptr >= md->end_subject) +if (eptr++ >= md->end_subject) +if (i >= max || !match_ref (offset, eptr, length, md, ims)) +if (i >= max || eptr >= md->end_subject || +if (i >= max || eptr >= md->end_subject || c != *eptr++) +if (i >= max || eptr >= md->end_subject || c == *eptr++) +if (i >= max || eptr >= md->end_subject) +if (is_subject && length > md->end_subject - p) +if (isprint (c = *(p++))) +if (length == 0) +if (length > md->end_subject - eptr) +if (match (eptr, ecode + 3, offset_top, md, ims, NULL, +if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup)) +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) || +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match_block.end_offset_top > offsetcount) +if (match_block.offset_vector != NULL) +if (match_block.offset_vector == NULL) +if (max == 0) +if (md->lcc[*ecode++] != md->lcc[*eptr++]) +if (md->lcc[*ecode++] == md->lcc[*eptr++]) +if (md->lcc[*p++] != md->lcc[*eptr++]) +if (md->notbol && eptr == md->start_subject) +if (md->notempty && eptr == md->start_match) +if (md->noteol) +if (min == max) +if (min > 0) +if (min > md->end_subject - eptr) +if (minimize) +if (number > 0) +if (number > EXTRACT_BASIC_MAX) +if (offset < md->offset_max) +if (offset >= md->offset_max) +if (offset_top <= offset) +if (offsetcount < 2) +if (offsetcount >= 4) +if (op > OP_BRA) +if (p > req_char_ptr) +if (p >= end_subject) +if (pp == req_char || pp == req_char2) +if (re == NULL || subject == NULL || +if (re->magic_number != MAGIC_NUMBER) +if (re->max_match_size >= 0 +if (re->top_backref > 0 && re->top_backref >= ocount / 3) +if (req_char == req_char2) +if (req_char >= 0) +if (resetcount > offsetcount) +if (save != stacksave) +if (save == NULL) +if (skipped_chars) +if (start_match + bmtable[256] > end_subject) +if (start_match > match_block.start_subject + start_offset) +if (using_temporary_offsets) +if certain parts of the pattern were not used. */ +if the malloc fails ... there is no way of returning to the top level with +implied in the second condition, because start_offset > 0. */ +ims current /i, /m, and /s options +ims the ims flags +ims = (ims & ~PCRE_IMS) | ecode[4]; +ims = ecode[1]; +ims = original_ims; +ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL); +in the pattern. */ +in the subject string, while eptrb holds the value of eptr at the start of the +initialize them to avoid reading uninitialized locations. */ +inline, and there are *still* stupid compilers about that don't like indented +inside the group. +int +int *offsets; +int *save; +int c; +int first_char = -1; +int flags; +int length; +int min, max, ctype; +int number = *prev - OP_BRA; +int number = op - OP_BRA; +int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */ +int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */ +int offset; +int offset_top; +int offsetcount; +int op = (int) *ecode; +int options; +int rc; +int req_char = -1; +int req_char2 = -1; +int resetcount, ocount; +int save_offset1 = md->offset_vector[offset]; +int save_offset2 = md->offset_vector[offset + 1]; +int save_offset3 = md->offset_vector[md->offset_end - number]; +int skipped_chars = 0; +int stacksave[15]; +int start_offset; +is a bit large to put on the stack, but using malloc for small numbers +is_subject TRUE if printing from within md->start_subject +it as matched, any number of times (otherwise there could be infinite +item to see if there is repeat information following. The code is similar +item to see if there is repeat information following. Then obey similar +last bracketed group - used for breaking infinite loops matching zero-length +later in the subject; otherwise the test starts at the match point. This +length length of subject string (may contain binary zeros) +length length to be matched +length number to print +length = (offset >= offset_top || md->offset_vector[offset] < 0) ? +length = md->end_subject - p; +level without recursing. Otherwise, if minimizing, keep trying the rest of +loop. */ +loops). */ +main loop. */ +majority of cases. It will be suboptimal when the case flag changes in a regex +mark, since extracts may have been taken during the assertion. */ +mark, since extracts may have been taken. */ +match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0)) +match (eptr, ecode, offset_top, md, ims, eptrb, flags) +match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) +match_block.ctypes = re->tables + ctypes_offset; +match_block.end_subject = match_block.start_subject + length; +match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; +match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ +match_block.errorcode == PCRE_ERROR_NOMATCH && +match_block.lcc = re->tables + lcc_offset; +match_block.lcc[*start_match] != first_char) +match_block.notbol = (options & PCRE_NOTBOL) != 0; +match_block.notempty = (options & PCRE_NOTEMPTY) != 0; +match_block.noteol = (options & PCRE_NOTEOL) != 0; +match_block.offset_end = ocount; +match_block.offset_max = (2 * ocount) / 3; +match_block.offset_overflow = FALSE; +match_block.offset_overflow = TRUE; +match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int)); +match_block.offset_vector = offsets; +match_block.start_match = start_match; +match_block.start_pattern = re->code; +match_block.start_subject = (const uschar *) subject; +match_condassert - this is an assertion condition +match_condassert | match_isgroup)) +match_data *md; +match_data match_block; +match_isgroup - this is the start of a bracketed group +match_isgroup); +match_ref (offset, eptr, length, md, ims) +matches, we carry on as at the end of a normal bracket, leaving the subject +matching won't pass the KET for an assertion. If any one branch matches, +matching won't pass the KET for this kind of subpattern. If any one branch +max = (ecode[1] << 8) + ecode[2]; +max = (ecode[3] << 8) + ecode[4]; +max = INT_MAX; +max = rep_max[c]; /* zero for max => infinity */ +max, eptr)); +maximum. Alternatively, if maximizing, find the maximum number of +may be wrong. */ +md pointer to "static" info for the match +md pointer to matching data block, if is_subject is TRUE +md points to match data block +md->end_match_ptr = eptr; /* For ONCE */ +md->end_match_ptr = eptr; /* Record where we ended */ +md->end_offset_top = offset_top; /* and how many extracts were taken */ +md->end_offset_top = offset_top; +md->end_subject - eptr + 1 : +md->errorcode = PCRE_ERROR_UNKNOWN_NODE; +md->offset_overflow = TRUE; +md->offset_vector[md->offset_end - i] = save[i]; +md->offset_vector[md->offset_end - number] = eptr - md->start_subject; +md->offset_vector[md->offset_end - number] = save_offset3; +md->offset_vector[md->offset_end - number]; +md->offset_vector[offset + 1] - md->offset_vector[offset]; +md->offset_vector[offset + 1] = eptr - md->start_subject; +md->offset_vector[offset + 1] = save_offset2; +md->offset_vector[offset] = +md->offset_vector[offset] = save_offset1; +memcpy (offsets + 2, match_block.offset_vector + 2, +min = (ecode[1] << 8) + ecode[2]; +min = 0; +min = max = (ecode[1] << 8) + ecode[2]; +min = max = 1; +min = rep_min[c]; /* Pick up values from tables; */ +minima. */ +minimize = (*ecode == OP_CRMINRANGE); +minimize = (c & 1) != 0; +minimize = *ecode == OP_MINUPTO; +minimize = *ecode == OP_NOTMINUPTO; +minimize = *ecode == OP_TYPEMINUPTO; +minimize = TRUE; +minimum number of matches are present. If min = max, continue at the same +misrepresented as being the original software. +move back, this match function fails. */ +mustn't change the current values of the data slot, because they may be set +need to recurse. */ +never be used unless previously set, but they get saved and restored, and so we +never set for an anchored regular expression, but the anchoring may be forced +newline unless endonly is set, else end of subject unless noteol is set. */ +newptrb.prev = eptrb; +newptrb.saved_eptr = eptr; +next += (next[1] << 8) + next[2]; +non-capturing bracket. Don't worry about setting the flag for the error case +number = (ecode[4] << 8) | ecode[5]; +number = (prev[4] << 8) | prev[5]; +number from a dummy opcode at the start. */ +number, then move along the subject till after the recursive match, +ocount = offsetcount - (offsetcount % 3); +ocount = re->top_backref * 3 + 3; +of (?ims) items in the pattern. They are kept in a local variable so that +of 3. */ +of subject left; this ensures that every attempt at a match fails. We +offset index into the offset vector +offset = number << 1; +offset_top current top pointer +offset_top = md->end_offset_top; +offset_top = offset + 2; +offset_top, md, ims, eptrb, match_isgroup); +offsetcount the number of elements in the vector +offsets points to a vector of ints to be filled in with offsets +offsets[0] = start_match - match_block.start_subject; +offsets[1] = match_block.end_match_ptr - match_block.start_subject; +op = OP_BRA; +opcode. */ +optimization can save a huge amount of backtracking in patterns with nested +option for each character match. Maybe that wouldn't add very much to the +options option bits +p points to characters +p--; +past the end if there is only one branch, but that's OK because that is +pchars (ecode, length, FALSE, md); +pchars (eptr, 16, TRUE, md); +pchars (eptr, length, TRUE, md); +pchars (p, length, FALSE, md); +pchars (p, length, is_subject, md) +pchars (start_match, end_subject - start_match, TRUE, &match_block); +pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount) +place we found it at last time. */ +pointer. */ +portions of the string if it matches. Two elements in the vector are set for +pre-processor statements. I suppose it's only been 10 years... */ +preceded by BRAZERO or BRAMINZERO. */ +preceding bracket, in the appropriate order. */ +preceding bracket, in the appropriate order. We need to reset any options +printf (" against backref "); +printf (" against pattern "); +printf ("%c", c); +printf (">>>> Match against: "); +printf (">>>>> Skipped %d chars to reach first character\n", +printf ("\\x%02x", c); +printf ("\n"); +printf ("end bracket %d", number); +printf ("matching subject "); +printf ("matching subject <null> against pattern "); +printf ("matching subject <null>"); +printf ("start bracket %d subject=", number); +rc = 0; +rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb, +rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2; +register const uschar *ecode; +register const uschar *eptr; +register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0); +register int *iend = iptr + resetcount; +register int *iend = iptr - resetcount / 2 + 1; +register int *iptr = match_block.offset_vector + ocount; +register int *iptr = match_block.offset_vector; +register int c = *start_match; +register int c; +register int i; +register int length = ecode[1]; +register int pp = *p++; +repeat it in the interests of efficiency. */ +repeat limits are compiled as a number of copies, with the optional ones +req_char = re->req_char; +req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ? +req_char_ptr = p; +resetcount = 2 + re->top_bracket * 2; +resetcount = ocount; +restoring at the exit of a group is easy. */ +restrictions: +return FALSE; +return PCRE_ERROR_BADMAGIC; +return PCRE_ERROR_BADOPTION; +return PCRE_ERROR_NOMATCH; +return PCRE_ERROR_NOMEMORY; +return PCRE_ERROR_NULL; +return TRUE; +return match (eptr, +return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup); +return match_block.errorcode; +return rc; +save = (int *) (pcre_malloc) ((c + 1) * sizeof (int)); +save = stacksave; +save[i] = md->offset_vector[md->offset_end - i]; +seems expensive. As a compromise, the stack is used when there are fewer +share code. This is very similar to the code for single characters, but we +similar code to character type repeats - written out again for speed. +since matching characters is likely to be quite common. First, ensure the +skipped_chars += bmtable[*start_match], +skipped_chars += bmtable[256] - 1; +skipped_chars -= bmtable[256] - 1; +skipped_chars); +skipped_chars++, +stack of such pointers, to be re-instated at the end of the group when we hit +stack, for holding the values of the subject pointer at the start of each +start of each branch to move the current point backwards, so the code at +start_bits = extra->data.start_bits; +start_match += bmtable[*start_match]; +start_match += bmtable[256] - 1; +start_match -= bmtable[256] - 1; +start_match = (const uschar *) subject + length - re->max_match_size; +start_match++ < end_subject); +start_match++; +start_offset where to start in the subject string +startline = (re->options & PCRE_STARTLINE) != 0; +static BOOL +static const char rep_max[] = +static const char rep_min[] = +static void +strings. +struct eptrblock *prev; +studied, there may be a bitmap of possible first characters. */ +subject points to the subject string +subject if the requested. +subpattern - to break infinite loops. */ +subpattern, so as to detect when an empty string has been matched by a +subsequent match. */ +such there are (offset_top records the completed total) so we just have +supersede any condition above with which it is incompatible. +switch (*ecode) +switch (ctype) +switch (op) +test once at the start (i.e. keep it out of the loop). */ +than 16 values to store; otherwise malloc is used. A problem is what to do +than the number of characters left in the string, so the match fails. +that "continue" in the code above comes out to here to repeat the main +that changed within the bracket before re-running it, so check the next +that it may occur zero times. It may repeat infinitely, or not at all - +the assertion is true. Lookbehind assertions have an OP_REVERSE item at the +the closing ket. When match() is called in other circumstances, we don't add to +the code for a repeated single character, but I haven't found a nice way of +the current subject position in the working slot at the top of the vector. We +the expression and advancing one matching character if failing, up to the +the external pcre header. */ +the file Tech.Notes for some information on the internals. +the final argument TRUE causes it to stop at the end of an assertion. */ +the group. */ +the length of the reference string explicitly rather than passing the +the loop runs just once. */ +the minimum number of bytes before we start. */ +the number from a dummy opcode at the start. */ +the point in the subject string is not moved back. Thus there can never be +the pointer while it matches the class. */ +the same bracket. +the stack. */ +the start hasn't passed this character yet. */ +the subject. */ +there were too many extractions, set the return code to zero. In the case +this level is identical to the lookahead case. */ +this makes a huge difference to execution time when there aren't many brackets +those back references that we can. In this case there need not be overflow +time taken, but character matching *is* what this is all about... */ +to save all the potential data. There may be up to 99 such values, which +to that for character classes, but repeated for efficiency. Then obey +two branches. If the condition is false, skipping the first branch takes us +typedef struct eptrblock +unless PCRE_CASELESS was given or the casing state changes within the regex. +unlimited repeats that aren't going to match. We don't know what the state of +unsigned long int ims = 0; +unsigned long int ims; +unsigned long int original_ims = ims; /* Save for resetting on ')' */ +up quickly if there are fewer than the minimum number of characters left in +using_temporary_offsets = TRUE; +values of the final offsets, in case they were set by a previous iteration of +we just need to set up the whole thing as substring 0 before returning. If +where we had to get some local store to hold offsets for backreferences, copy +while (!anchored && +while (*ecode == OP_ALT) +while (*ecode == OP_ALT); +while (*next == OP_ALT); +while (--iptr >= iend) +while (eptr >= pp) +while (iptr < iend) +while (length-- > 0) +while (p < end_subject) +while (start_match < end_subject && +while (start_match < end_subject && *start_match != first_char) +while (start_match < end_subject && start_match[-1] != '\n') +while (start_match < end_subject) +{ +{0, 0, 0, 0, 1, 1}; +{0, 0, 1, 1, 0, 0}; +} /* End of main loop */ +} diff --git a/testsuite/uniq.inp b/testsuite/uniq.inp new file mode 100644 index 0000000..b1eddf3 --- /dev/null +++ b/testsuite/uniq.inp @@ -0,0 +1,2058 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#define DPRINTF(p) /*nothing */ +#define DPRINTF(p) printf p +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define class pcre_class +#define match_condassert 0x01 /* Called to check a condition assertion */ +#define match_isgroup 0x02 /* Set if start of bracketed group */ +#else +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#ifdef DEBUG /* Sigh. Some compilers never learn. */ +#ifdef DEBUG /* Sigh. Some compilers never learn. */ +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef DEBUG +#ifdef __cplusplus +#include "internal.h" +&& length - re->max_match_size > start_offset) +((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word)) +((md->ctypes[*eptr] & ctype_word) != 0); +((md->ctypes[*eptr] & ctype_word) != 0); +((md->ctypes[eptr[-1]] & ctype_word) != 0); +((md->ctypes[eptr[-1]] & ctype_word) != 0); +(eptr == md->end_subject - 1 && *eptr != '\n')) +(eptr == md->end_subject - 1 && *eptr != '\n')) +(i.e. keep it out of the loop). Also we can test that there are at least +(md->ctypes[*eptr++] & ctype_digit) != 0) +(md->ctypes[*eptr++] & ctype_digit) == 0) +(md->ctypes[*eptr++] & ctype_space) != 0) +(md->ctypes[*eptr++] & ctype_space) == 0) +(md->ctypes[*eptr++] & ctype_word) != 0) +(md->ctypes[*eptr++] & ctype_word) == 0) +(offsetcount - 2) * sizeof (int)); +(offsets == NULL && offsetcount > 0)) +(pcre_free) (match_block.offset_vector); +(pcre_free) (match_block.offset_vector); +(pcre_free) (save); +(re->tables + fcc_offset)[req_char] : req_char; +* Match a back-reference * +* Execute a Regular Expression * +* Match from current position * +* Debugging function to print chars * +* Perl-Compatible Regular Expressions * +* Macros and tables for character handling * +*************************************************/ +*************************************************/ +*************************************************/ +*************************************************/ +*************************************************/ +*************************************************/ +*/ +*/ +*/ +*/ +*/ +*iptr = -1; +*iptr++ = -1; +*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || +*prev == OP_ONCE) +----------------------------------------------------------------------------- +----------------------------------------------------------------------------- +-1 => failed to match +/* +/* "Once" brackets are like assertion brackets except that after a match, +/* ... else fall through */ +/* ... else fall through */ +/* Advance to a possible match for an initial string after study */ +/* Allow compilation as C++ source code, should anybody want to do that. */ +/* Always fail if not enough characters left */ +/* An alternation is the end of a branch; scan along to find the end of the +/* Assert before internal newline if multiline, or before a terminating +/* Assertion brackets. Check the alternative branches in turn - the +/* At the start of a bracketed group, add the current subject pointer to the +/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating +/* Caseful comparisons */ +/* Caseful comparisons */ +/* Change option settings */ +/* Common code for all repeated single character type matches */ +/* Common code for all repeated single-character matches. We can give +/* Common code for all repeated single-character matches. We can give +/* Compute the minimum number of offsets that we need to reset each time. Doing +/* Conditional group: compilation checked that there are no more than +/* Continue as from after the assertion, updating the offsets high water +/* Continue from after the assertion, updating the offsets high water +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never gets here */ +/* Control never reaches here */ +/* Control never reaches here */ +/* Copy the offset information from temporary store if necessary */ +/* Do a single test if no case difference is set up */ +/* Do not stick any code in here without much thought; it is assumed +/* End of a group, repeated or non-repeating. If we are at the end of +/* End of subject assertion (\z) */ +/* End of subject or ending \n assertion (\Z) */ +/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched +/* First, ensure the minimum number of matches are present. */ +/* First, ensure the minimum number of matches are present. Use inline +/* First, ensure the minimum number of matches are present. We get back +/* Flag bits for the match() function */ +/* For a non-repeating ket, just continue at this level. This also +/* For a non-repeating ket, just continue at this level. This also +/* For anchored or unanchored matches, there may be a "last known required +/* For extended extraction brackets (large number), we have to fish out +/* For extended extraction brackets (large number), we have to fish out the +/* For matches anchored to the end of the pattern, we can often avoid +/* If a back reference hasn't been set, the length that is passed is greater +/* If checking an assertion for a condition, return TRUE. */ +/* If hit the end of the group (which could be repeated), fail */ +/* If max == min we can continue with the main loop without the +/* If maximizing it is worth using inline code for speed, doing the type +/* If maximizing, find the longest possible run, then work backwards. */ +/* If maximizing, find the longest string and work backwards */ +/* If min = max, continue at the same level without recursing */ +/* If min = max, continue at the same level without recursion. +/* If minimizing, keep testing the rest of the expression and advancing +/* If minimizing, keep trying and advancing the pointer */ +/* If minimizing, we have to test the rest of the pattern before each +/* If req_char is set, we know that that character must appear in the subject +/* If the expression has got more back references than the offsets supplied can +/* If the length of the reference is zero, just continue with the +/* If the reference is unset, set the length to be longer than the amount +/* If we can't find the required character, break the matching loop */ +/* If we have found the required character, save the point where we +/* In all other cases except a conditional group we have to check the +/* In case the recursion has set more capturing values, save the final +/* Include the internals header, which itself includes Standard C headers plus +/* Insufficient room for saving captured contents */ +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +/* Match a back reference, possibly repeatedly. Look past the end of the +/* Match a character class, possibly repeatedly. Look past the end of the +/* Match a negated single character */ +/* Match a negated single character repeatedly. This is almost a repeat of +/* Match a run of characters */ +/* Match a single character repeatedly; different opcodes share code. */ +/* Match a single character type repeatedly; several different opcodes +/* Match a single character type; inline for speed */ +/* Min and max values for the common repeats; for the maxima, 0 => infinity */ +/* Move the subject pointer back. This occurs only at the start of +/* Negative assertion: all branches must fail to match */ +/* Now start processing the operations. */ +/* OP_KETRMAX */ +/* OP_KETRMAX */ +/* On entry ecode points to the first opcode, and eptr to the first character +/* Opening capturing bracket. If there is space in the offset vector, save +/* Or to a non-unique first char after study */ +/* Or to a unique first char if possible */ +/* Or to just after \n for a multiline match if possible */ +/* Other types of node can be handled by a switch */ +/* Otherwise test for either case */ +/* Print a sequence of chars in printable format, stopping at the end of the +/* Recursion matches the current regex, nested. If there are any capturing +/* Reset the maximum number of extractions we might see. */ +/* Reset the value of the ims flags, in case they got changed during +/* Reset the working variable associated with each extraction. These should +/* Separate the caselesss case for speed */ +/* Set up for repetition, or handle the non-repeated case */ +/* Set up the first character to match, if available. The first_char value is +/* Skip over conditional reference data or large extraction number data if +/* Start of subject assertion */ +/* Start of subject unless notbol, or after internal newline if multiline */ +/* Structure for building a chain of data that actually lives on the +/* The code is duplicated for the caseless and caseful cases, for speed, +/* The code is duplicated for the caseless and caseful cases, for speed, +/* The condition is an assertion. Call match() to evaluate it - setting +/* The ims options can vary during the matching as a result of the presence +/* The repeating kets try the rest of the pattern or restart from the +/* The repeating kets try the rest of the pattern or restart from the +/* There's been some horrible disaster. */ +/* This "while" is the end of the "do" above */ +/* This function applies a compiled re to a subject string and picks out +/* Use a macro for debugging printing, 'cause that limits the use of #ifdef +/* We don't need to repeat the search if we haven't yet reached the +/* When a match occurs, substrings will be set for all internal extractions; +/* Word boundary assertions */ +/************************************************* +/************************************************* +/************************************************* +/************************************************* +/************************************************* +/************************************************* +1. This software is distributed in the hope that it will be useful, +2. The origin of this software must not be misrepresented, either by +3. Altered versions must be plainly marked as such, and must not be +4. If PCRE is embedded in any software that is released under the GNU +5.005. If there is an options reset, it will get obeyed in the normal +5.005. If there is an options reset, it will get obeyed in the normal +6 : 3 + (ecode[1] << 8) + ecode[2]), +< -1 => some kind of unexpected problem += 0 => success, but offsets is not big enough +Arguments: +Arguments: +Arguments: +Arguments: +BOOL anchored; +BOOL cur_is_word = (eptr < md->end_subject) && +BOOL cur_is_word = (eptr < md->end_subject) && +BOOL is_subject; +BOOL minimize = FALSE; +BOOL prev_is_word = (eptr != md->start_subject) && +BOOL prev_is_word = (eptr != md->start_subject) && +BOOL rc; +BOOL startline; +BOOL using_temporary_offsets = FALSE; +Copyright (c) 1997-2000 University of Cambridge +DPRINTF ((">>>> returning %d\n", match_block.errorcode)); +DPRINTF ((">>>> returning %d\n", rc)); +DPRINTF (("Copied offsets from temporary memory\n")); +DPRINTF (("Freeing temporary memory\n")); +DPRINTF (("Freeing temporary memory\n")); +DPRINTF (("Got memory to hold back references\n")); +DPRINTF (("Unknown opcode %d\n", *ecode)); +DPRINTF (("bracket %d failed\n", number)); +DPRINTF (("bracket 0 failed\n")); +DPRINTF (("ims reset to %02lx\n", ims)); +DPRINTF (("ims set to %02lx at group repeat\n", ims)); +DPRINTF (("ims set to %02lx\n", ims)); +DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max, +DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, +DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); +DPRINTF (("start bracket 0\n")); +GETCHAR (c, eptr) /* Get character */ +GETCHARINC (c, eptr) /* Get character; increment eptr */ +GETCHARINC (c, eptr) /* Get character; increment eptr */ +General Purpose Licence (GPL), then the terms of that licence shall +However, if the referenced string is the empty string, always treat +If the bracket fails to match, we need to restore this value and also the +If there isn't enough space in the offset vector, treat this as if it were a +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +Otherwise, we can use the vector supplied, rounding down its size to a multiple +Permission is granted to anyone to use this software for any purpose on any +REPEATCHAR: +REPEATNOTCHAR: +REPEATTYPE: +Returns: > 0 => success; value is the number of elements filled in +Returns: TRUE if matched +Returns: TRUE if matched +Returns: nothing +They are not both allowed to be zero. */ +This is a library of functions to support regular expressions whose syntax +This is the forcible breaking of infinite loops as implemented in Perl +This is the forcible breaking of infinite loops as implemented in Perl +Writing separate code makes it go faster, as does using an autoincrement and +Written by: Philip Hazel <ph10@cam.ac.uk> +a move back into the brackets. Check the alternative branches in turn - the +address of eptr, so that eptr can be a register variable. */ +an assertion "group", stop matching and return TRUE, but record the +an empty string - recursion will then try other alternatives, if any. */ +an error. Save the top 15 values on the stack, and accept that the rest +an unanchored pattern, of course. If there's no first char and the pattern was +analyzing most of the pattern. length > re->max_match_size is +anchored = ((re->options | options) & PCRE_ANCHORED) != 0; +and advance one byte in the pattern code. */ +and reinstate them after the recursion. However, we don't know how many +and semantics are as close as possible to those of the Perl 5 language. See +and the required character in fact is caseful. */ +at run time, so we have to test for anchoring. The first char may be unset for +avoid duplicate testing (which takes significant time). This covers the vast +backing off on a match. */ +bmtable = extra->data.bmtable; +both cases of the character. Otherwise set the two values the same, which will +bracketed group and go to there. */ +brackets - for testing for empty matches +brackets started but not finished, we have to save their starting points +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +break; +but WITHOUT ANY WARRANTY; without even the implied warranty of +c != md->lcc[*eptr++]) +c = *ecode++ - OP_CRSTAR; +c = *ecode++ - OP_CRSTAR; +c = *ecode++ - OP_NOTSTAR; +c = *ecode++ - OP_STAR; +c = *ecode++ - OP_TYPESTAR; +c = *ecode++; +c = *ecode++; +c = *eptr++; +c = 15; +c = max - min; +c = md->end_subject - eptr; +c = md->lcc[c]; +c = md->lcc[c]; +c = md->offset_max; +c == md->lcc[*eptr++]) +can't just fail here, because of the possibility of quantifiers with zero +case OP_ALT: +case OP_ANY: +case OP_ANY: +case OP_ANY: +case OP_ANY: +case OP_ASSERT: +case OP_ASSERTBACK: +case OP_ASSERTBACK_NOT: +case OP_ASSERT_NOT: +case OP_BEG_WORD: +case OP_BRA: /* Non-capturing bracket: optimized */ +case OP_BRAMINZERO: +case OP_BRANUMBER: +case OP_BRAZERO: +case OP_CHARS: +case OP_CIRC: +case OP_CLASS: +case OP_COND: +case OP_CREF: +case OP_CRMINPLUS: +case OP_CRMINPLUS: +case OP_CRMINQUERY: +case OP_CRMINQUERY: +case OP_CRMINRANGE: +case OP_CRMINRANGE: +case OP_CRMINSTAR: +case OP_CRMINSTAR: +case OP_CRPLUS: +case OP_CRPLUS: +case OP_CRQUERY: +case OP_CRQUERY: +case OP_CRRANGE: +case OP_CRRANGE: +case OP_CRSTAR: +case OP_CRSTAR: +case OP_DIGIT: +case OP_DIGIT: +case OP_DIGIT: +case OP_DIGIT: +case OP_DOLL: +case OP_END: +case OP_END_WORD: +case OP_EOD: +case OP_EODN: +case OP_EXACT: +case OP_KET: +case OP_KETRMAX: +case OP_KETRMIN: +case OP_MINPLUS: +case OP_MINQUERY: +case OP_MINSTAR: +case OP_MINUPTO: +case OP_NOT: +case OP_NOTEXACT: +case OP_NOTMINPLUS: +case OP_NOTMINQUERY: +case OP_NOTMINSTAR: +case OP_NOTMINUPTO: +case OP_NOTPLUS: +case OP_NOTQUERY: +case OP_NOTSTAR: +case OP_NOTUPTO: +case OP_NOT_DIGIT: +case OP_NOT_DIGIT: +case OP_NOT_DIGIT: +case OP_NOT_DIGIT: +case OP_NOT_WHITESPACE: +case OP_NOT_WHITESPACE: +case OP_NOT_WHITESPACE: +case OP_NOT_WHITESPACE: +case OP_NOT_WORDCHAR: +case OP_NOT_WORDCHAR: +case OP_NOT_WORDCHAR: +case OP_NOT_WORDCHAR: +case OP_NOT_WORD_BOUNDARY: +case OP_ONCE: +case OP_OPT: +case OP_PLUS: +case OP_QUERY: +case OP_RECURSE: +case OP_REF: +case OP_REVERSE: +case OP_SOD: +case OP_STAR: +case OP_TYPEEXACT: +case OP_TYPEMINPLUS: +case OP_TYPEMINQUERY: +case OP_TYPEMINSTAR: +case OP_TYPEMINUPTO: +case OP_TYPEPLUS: +case OP_TYPEQUERY: +case OP_TYPESTAR: +case OP_TYPEUPTO: +case OP_UPTO: +case OP_WHITESPACE: +case OP_WHITESPACE: +case OP_WHITESPACE: +case OP_WHITESPACE: +case OP_WORDCHAR: +case OP_WORDCHAR: +case OP_WORDCHAR: +case OP_WORDCHAR: +case OP_WORD_BOUNDARY: +case matching may be when this character is hit, so test for it in both its +caselessly, or if there are any changes of this flag within the regex, set up +cases if necessary. However, the different cased versions will not be set up +character" set. If the PCRE_CASELESS is set, implying that the match starts +characters and work backwards. */ +characters and work backwards. */ +code for maximizing the speed, and do the type test once at the start +code to character type repeats - written out again for speed. */ +commoning these up that doesn't require a test of the positive/negative +computer system, and to redistribute it freely, subject to the following +const char *subject; +const pcre *re; +const pcre_extra *extra; +const uschar *bmtable = NULL; +const uschar *data = ecode + 1; /* Save for matching */ +const uschar *end_subject; +const uschar *next = ecode + 1; +const uschar *next = ecode + 1; +const uschar *p = md->start_subject + md->offset_vector[offset]; +const uschar *p; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *pp = eptr; +const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; +const uschar *prev = ecode; +const uschar *req_char_ptr = start_match - 1; +const uschar *saved_eptr = eptr; +const uschar *saved_eptr = eptrb->saved_eptr; +const uschar *saved_eptr; +const uschar *start_bits = NULL; +const uschar *start_match = (const uschar *) subject + start_offset; +continue; /* With the main loop */ +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +continue; +course of events. */ +course of events. */ +ctype = *ecode++; /* Code for the character type */ +cur_is_word == prev_is_word : cur_is_word != prev_is_word) +current high water mark for use by positive assertions. Do this also +default: /* No repeat follows */ +default: /* No repeat follows */ +default: +do +do +do +do +do +do +do +do +do +do +do +each branch of a lookbehind assertion. If we are too close to the start to +each substring: the offsets to the start and end of the substring. +ecode position in code +ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ? +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += (ecode[1] << 8) + ecode[2]; +ecode += 2; +ecode += 2; +ecode += 3 + (ecode[4] << 8) + ecode[5]; +ecode += 33; /* Advance past the item */ +ecode += 3; /* Advance past the item */ +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 3; +ecode += 5; +ecode += 5; +ecode = next + 3; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +ecode++; +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else +else if ((extra->options & PCRE_STUDY_BM) != 0) +else if (first_char >= 0) +else if (start_bits != NULL) +else if (startline) +encountered */ +end_subject = match_block.end_subject; +eptr pointer in subject +eptr points into the subject +eptr += c; +eptr += length; +eptr += length; +eptr += length; +eptr += length; +eptr += min; +eptr -= (ecode[1] << 8) + ecode[2]; +eptr -= length; +eptr = md->end_match_ptr; +eptr = md->end_match_ptr; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptr++; +eptrb pointer to chain of blocks containing eptr at start of +eptrb = &newptrb; +eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ +eptrblock *eptrb; +eptrblock newptrb; +eptrblock; +exactly what going to the ket would do. */ +explicit claim or by omission. +external_extra points to "hints" from pcre_study() or is NULL +external_re points to the compiled expression +extraction by setting the offsets and bumping the high water mark. */ +first_char = match_block.lcc[first_char]; +first_char = re->first_char; +flags can contain +for (;;) +for (i = 1; i <= c; i++) +for (i = 1; i <= c; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = 1; i <= min; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min; i < max; i++) +for (i = min;; i++) +for (i = min;; i++) +for (i = min;; i++) +for (i = min;; i++) +for (i = min;; i++) +for (i = min;; i++) +for (i = min;; i++) +for the "once" (not-backup up) groups. */ +for the match to succeed. If the first character is set, req_char must be +found it, so that we don't search again next time round the loop if +from a previous iteration of this group, and be referred to by a reference +goto REPEATCHAR; +goto REPEATCHAR; +goto REPEATNOTCHAR; +goto REPEATNOTCHAR; +goto REPEATTYPE; +goto REPEATTYPE; +group number back at the start and if necessary complete handling an +happens for a repeating ket if no characters were matched in the group. +happens for a repeating ket if no characters were matched in the group. +here; that is handled in the code for KET. */ +hold, we get a temporary bit of working store to use during the matching. +i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper +if (!anchored) +if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup)) +if (!match_ref (offset, eptr, length, md, ims)) +if (!match_ref (offset, eptr, length, md, ims)) +if (!match_ref (offset, eptr, length, md, ims)) +if (!md->endonly) +if (!rc) +if (!startline && extra != NULL) +if ((*ecode++ == OP_WORD_BOUNDARY) ? +if ((data[c / 8] & (1 << (c & 7))) != 0) +if ((data[c / 8] & (1 << (c & 7))) != 0) +if ((data[c / 8] & (1 << (c & 7))) == 0) +if ((extra->options & PCRE_STUDY_MAPPED) != 0) +if ((flags & match_condassert) != 0) +if ((flags & match_condassert) != 0) +if ((flags & match_isgroup) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_CASELESS) != 0) +if ((ims & PCRE_DOTALL) == 0 && c == '\n') +if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') +if ((ims & PCRE_DOTALL) == 0) +if ((ims & PCRE_DOTALL) == 0) +if ((ims & PCRE_MULTILINE) != 0) +if ((ims & PCRE_MULTILINE) != 0) +if ((md->ctypes[*eptr++] & ctype_digit) != 0) +if ((md->ctypes[*eptr++] & ctype_digit) == 0) +if ((md->ctypes[*eptr++] & ctype_space) != 0) +if ((md->ctypes[*eptr++] & ctype_space) == 0) +if ((md->ctypes[*eptr++] & ctype_word) != 0) +if ((md->ctypes[*eptr++] & ctype_word) == 0) +if ((md->ctypes[c] & ctype_digit) != 0) +if ((md->ctypes[c] & ctype_digit) == 0) +if ((md->ctypes[c] & ctype_space) != 0) +if ((md->ctypes[c] & ctype_space) == 0) +if ((md->ctypes[c] & ctype_word) != 0) +if ((md->ctypes[c] & ctype_word) == 0) +if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) +if ((re->options & PCRE_FIRSTSET) != 0) +if ((re->options & PCRE_REQCHSET) != 0) +if ((start_bits[c / 8] & (1 << (c & 7))) == 0) +if (*ecode != OP_ONCE && *ecode != OP_ALT) +if (*ecode == OP_KET || eptr == saved_eptr) +if (*ecode == OP_KET || eptr == saved_eptr) +if (*ecode == OP_KET) +if (*ecode == OP_KETRMIN) +if (*ecode == OP_KETRMIN) +if (*ecode++ != *eptr++) +if (*ecode++ == *eptr++) +if (*eptr != '\n') +if (*eptr++ == '\n') +if (*p++ != *eptr++) +if (*p++ == req_char) +if (*prev != OP_COND) +if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || +if (bmtable != NULL) +if (bmtable[*start_match]) +if (c != *eptr++) +if (c != md->lcc[*eptr++]) +if (c < 16) +if (c == *eptr++) +if (c == md->lcc[*eptr++]) +if (c > md->end_subject - eptr) +if (cur_is_word == prev_is_word || +if (ecode[3] == OP_CREF) /* Condition is extraction test */ +if (ecode[3] == OP_OPT) +if (eptr != md->start_subject && eptr[-1] != '\n') +if (eptr != md->start_subject) +if (eptr < md->end_subject - 1 || +if (eptr < md->end_subject - 1 || +if (eptr < md->end_subject) +if (eptr < md->end_subject) +if (eptr < md->start_subject) +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) +if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) +if (eptr >= md->end_subject || *eptr == '\n') +if (eptr >= md->end_subject || c != *eptr) +if (eptr >= md->end_subject || c != md->lcc[*eptr]) +if (eptr >= md->end_subject || c == *eptr) +if (eptr >= md->end_subject || c == md->lcc[*eptr]) +if (eptr >= md->end_subject) +if (eptr >= md->end_subject) +if (eptr >= md->end_subject) +if (eptr >= md->end_subject) +if (eptr >= md->end_subject) +if (eptr++ >= md->end_subject) +if (i >= max || !match_ref (offset, eptr, length, md, ims)) +if (i >= max || eptr >= md->end_subject || +if (i >= max || eptr >= md->end_subject || +if (i >= max || eptr >= md->end_subject || c != *eptr++) +if (i >= max || eptr >= md->end_subject || c == *eptr++) +if (i >= max || eptr >= md->end_subject) +if (i >= max || eptr >= md->end_subject) +if (is_subject && length > md->end_subject - p) +if (isprint (c = *(p++))) +if (length == 0) +if (length > md->end_subject - eptr) +if (length > md->end_subject - eptr) +if (match (eptr, ecode + 3, offset_top, md, ims, NULL, +if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup)) +if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup)) +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) || +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) || +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup)) +if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || +if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0)) +if (match_block.end_offset_top > offsetcount) +if (match_block.offset_vector != NULL) +if (match_block.offset_vector == NULL) +if (max == 0) +if (max == 0) +if (max == 0) +if (max == 0) +if (max == 0) +if (max == 0) +if (max == 0) +if (md->lcc[*ecode++] != md->lcc[*eptr++]) +if (md->lcc[*ecode++] == md->lcc[*eptr++]) +if (md->lcc[*p++] != md->lcc[*eptr++]) +if (md->notbol && eptr == md->start_subject) +if (md->notempty && eptr == md->start_match) +if (md->noteol) +if (md->noteol) +if (min == max) +if (min == max) +if (min == max) +if (min == max) +if (min == max) +if (min == max) +if (min == max) +if (min > 0) +if (min > md->end_subject - eptr) +if (min > md->end_subject - eptr) +if (min > md->end_subject - eptr) +if (minimize) +if (minimize) +if (minimize) +if (minimize) +if (minimize) +if (minimize) +if (minimize) +if (number > 0) +if (number > EXTRACT_BASIC_MAX) +if (number > EXTRACT_BASIC_MAX) +if (offset < md->offset_max) +if (offset >= md->offset_max) +if (offset_top <= offset) +if (offsetcount < 2) +if (offsetcount >= 4) +if (op > OP_BRA) +if (p > req_char_ptr) +if (p >= end_subject) +if (pp == req_char || pp == req_char2) +if (re == NULL || subject == NULL || +if (re->magic_number != MAGIC_NUMBER) +if (re->max_match_size >= 0 +if (re->top_backref > 0 && re->top_backref >= ocount / 3) +if (req_char == req_char2) +if (req_char >= 0) +if (resetcount > offsetcount) +if (save != stacksave) +if (save == NULL) +if (skipped_chars) +if (start_match + bmtable[256] > end_subject) +if (start_match > match_block.start_subject + start_offset) +if (using_temporary_offsets) +if (using_temporary_offsets) +if certain parts of the pattern were not used. */ +if the malloc fails ... there is no way of returning to the top level with +implied in the second condition, because start_offset > 0. */ +ims current /i, /m, and /s options +ims the ims flags +ims = (ims & ~PCRE_IMS) | ecode[4]; +ims = ecode[1]; +ims = original_ims; +ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL); +in the pattern. */ +in the subject string, while eptrb holds the value of eptr at the start of the +initialize them to avoid reading uninitialized locations. */ +inline, and there are *still* stupid compilers about that don't like indented +inside the group. +int +int *offsets; +int *save; +int c; +int first_char = -1; +int flags; +int length; +int length; +int length; +int length; +int min, max, ctype; +int number = *prev - OP_BRA; +int number = op - OP_BRA; +int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */ +int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */ +int offset; +int offset; +int offset; +int offset_top; +int offsetcount; +int op = (int) *ecode; +int options; +int rc; +int req_char = -1; +int req_char2 = -1; +int resetcount, ocount; +int save_offset1 = md->offset_vector[offset]; +int save_offset2 = md->offset_vector[offset + 1]; +int save_offset3 = md->offset_vector[md->offset_end - number]; +int skipped_chars = 0; +int stacksave[15]; +int start_offset; +is a bit large to put on the stack, but using malloc for small numbers +is_subject TRUE if printing from within md->start_subject +it as matched, any number of times (otherwise there could be infinite +item to see if there is repeat information following. The code is similar +item to see if there is repeat information following. Then obey similar +last bracketed group - used for breaking infinite loops matching zero-length +later in the subject; otherwise the test starts at the match point. This +length length of subject string (may contain binary zeros) +length length to be matched +length number to print +length = (offset >= offset_top || md->offset_vector[offset] < 0) ? +length = md->end_subject - p; +level without recursing. Otherwise, if minimizing, keep trying the rest of +level without recursing. Otherwise, if minimizing, keep trying the rest of +loop. */ +loops). */ +main loop. */ +majority of cases. It will be suboptimal when the case flag changes in a regex +mark, since extracts may have been taken during the assertion. */ +mark, since extracts may have been taken. */ +match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0)) +match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0)) +match (eptr, ecode, offset_top, md, ims, eptrb, flags) +match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) +match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) +match_block.ctypes = re->tables + ctypes_offset; +match_block.end_subject = match_block.start_subject + length; +match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; +match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ +match_block.errorcode == PCRE_ERROR_NOMATCH && +match_block.lcc = re->tables + lcc_offset; +match_block.lcc[*start_match] != first_char) +match_block.notbol = (options & PCRE_NOTBOL) != 0; +match_block.notempty = (options & PCRE_NOTEMPTY) != 0; +match_block.noteol = (options & PCRE_NOTEOL) != 0; +match_block.offset_end = ocount; +match_block.offset_max = (2 * ocount) / 3; +match_block.offset_overflow = FALSE; +match_block.offset_overflow = TRUE; +match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int)); +match_block.offset_vector = offsets; +match_block.start_match = start_match; +match_block.start_pattern = re->code; +match_block.start_subject = (const uschar *) subject; +match_condassert - this is an assertion condition +match_condassert | match_isgroup)) +match_data *md; +match_data *md; +match_data *md; +match_data match_block; +match_isgroup - this is the start of a bracketed group +match_isgroup); +match_ref (offset, eptr, length, md, ims) +matches, we carry on as at the end of a normal bracket, leaving the subject +matching won't pass the KET for an assertion. If any one branch matches, +matching won't pass the KET for this kind of subpattern. If any one branch +max = (ecode[1] << 8) + ecode[2]; +max = (ecode[1] << 8) + ecode[2]; +max = (ecode[1] << 8) + ecode[2]; +max = (ecode[3] << 8) + ecode[4]; +max = (ecode[3] << 8) + ecode[4]; +max = INT_MAX; +max = INT_MAX; +max = INT_MAX; +max = INT_MAX; +max = INT_MAX; +max = INT_MAX; +max = INT_MAX; +max = rep_max[c]; /* zero for max => infinity */ +max = rep_max[c]; /* zero for max => infinity */ +max = rep_max[c]; /* zero for max => infinity */ +max = rep_max[c]; /* zero for max => infinity */ +max = rep_max[c]; /* zero for max => infinity */ +max, eptr)); +max, eptr)); +maximum. Alternatively, if maximizing, find the maximum number of +maximum. Alternatively, if maximizing, find the maximum number of +may be wrong. */ +md pointer to "static" info for the match +md pointer to matching data block, if is_subject is TRUE +md points to match data block +md->end_match_ptr = eptr; /* For ONCE */ +md->end_match_ptr = eptr; /* Record where we ended */ +md->end_offset_top = offset_top; /* and how many extracts were taken */ +md->end_offset_top = offset_top; +md->end_subject - eptr + 1 : +md->errorcode = PCRE_ERROR_UNKNOWN_NODE; +md->offset_overflow = TRUE; +md->offset_vector[md->offset_end - i] = save[i]; +md->offset_vector[md->offset_end - number] = eptr - md->start_subject; +md->offset_vector[md->offset_end - number] = save_offset3; +md->offset_vector[md->offset_end - number]; +md->offset_vector[offset + 1] - md->offset_vector[offset]; +md->offset_vector[offset + 1] = eptr - md->start_subject; +md->offset_vector[offset + 1] = save_offset2; +md->offset_vector[offset] = +md->offset_vector[offset] = save_offset1; +memcpy (offsets + 2, match_block.offset_vector + 2, +min = (ecode[1] << 8) + ecode[2]; +min = (ecode[1] << 8) + ecode[2]; +min = 0; +min = 0; +min = 0; +min = max = (ecode[1] << 8) + ecode[2]; +min = max = (ecode[1] << 8) + ecode[2]; +min = max = (ecode[1] << 8) + ecode[2]; +min = max = 1; +min = rep_min[c]; /* Pick up values from tables; */ +min = rep_min[c]; /* Pick up values from tables; */ +min = rep_min[c]; /* Pick up values from tables; */ +min = rep_min[c]; /* Pick up values from tables; */ +min = rep_min[c]; /* Pick up values from tables; */ +minima. */ +minimize = (*ecode == OP_CRMINRANGE); +minimize = (*ecode == OP_CRMINRANGE); +minimize = (c & 1) != 0; +minimize = (c & 1) != 0; +minimize = (c & 1) != 0; +minimize = (c & 1) != 0; +minimize = (c & 1) != 0; +minimize = *ecode == OP_MINUPTO; +minimize = *ecode == OP_NOTMINUPTO; +minimize = *ecode == OP_TYPEMINUPTO; +minimize = TRUE; +minimum number of matches are present. If min = max, continue at the same +minimum number of matches are present. If min = max, continue at the same +misrepresented as being the original software. +move back, this match function fails. */ +mustn't change the current values of the data slot, because they may be set +need to recurse. */ +never be used unless previously set, but they get saved and restored, and so we +never set for an anchored regular expression, but the anchoring may be forced +newline unless endonly is set, else end of subject unless noteol is set. */ +newptrb.prev = eptrb; +newptrb.saved_eptr = eptr; +next += (next[1] << 8) + next[2]; +next += (next[1] << 8) + next[2]; +non-capturing bracket. Don't worry about setting the flag for the error case +number = (ecode[4] << 8) | ecode[5]; +number = (prev[4] << 8) | prev[5]; +number from a dummy opcode at the start. */ +number, then move along the subject till after the recursive match, +ocount = offsetcount - (offsetcount % 3); +ocount = re->top_backref * 3 + 3; +of (?ims) items in the pattern. They are kept in a local variable so that +of 3. */ +of subject left; this ensures that every attempt at a match fails. We +offset index into the offset vector +offset = number << 1; +offset = number << 1; +offset_top current top pointer +offset_top = md->end_offset_top; +offset_top = md->end_offset_top; +offset_top = md->end_offset_top; +offset_top = offset + 2; +offset_top, md, ims, eptrb, match_isgroup); +offsetcount the number of elements in the vector +offsets points to a vector of ints to be filled in with offsets +offsets[0] = start_match - match_block.start_subject; +offsets[1] = match_block.end_match_ptr - match_block.start_subject; +op = OP_BRA; +opcode. */ +optimization can save a huge amount of backtracking in patterns with nested +option for each character match. Maybe that wouldn't add very much to the +options option bits +p points to characters +p--; +p--; +past the end if there is only one branch, but that's OK because that is +pchars (ecode, length, FALSE, md); +pchars (eptr, 16, TRUE, md); +pchars (eptr, length, TRUE, md); +pchars (eptr, length, TRUE, md); +pchars (p, length, FALSE, md); +pchars (p, length, is_subject, md) +pchars (start_match, end_subject - start_match, TRUE, &match_block); +pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount) +place we found it at last time. */ +pointer. */ +portions of the string if it matches. Two elements in the vector are set for +pre-processor statements. I suppose it's only been 10 years... */ +preceded by BRAZERO or BRAMINZERO. */ +preceding bracket, in the appropriate order. */ +preceding bracket, in the appropriate order. We need to reset any options +printf (" against backref "); +printf (" against pattern "); +printf ("%c", c); +printf (">>>> Match against: "); +printf (">>>>> Skipped %d chars to reach first character\n", +printf ("\\x%02x", c); +printf ("\n"); +printf ("\n"); +printf ("\n"); +printf ("\n"); +printf ("\n"); +printf ("end bracket %d", number); +printf ("matching subject "); +printf ("matching subject "); +printf ("matching subject <null> against pattern "); +printf ("matching subject <null>"); +printf ("start bracket %d subject=", number); +rc = 0; +rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb, +rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2; +register const uschar *ecode; +register const uschar *eptr; +register const uschar *eptr; +register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0); +register int *iend = iptr + resetcount; +register int *iend = iptr - resetcount / 2 + 1; +register int *iptr = match_block.offset_vector + ocount; +register int *iptr = match_block.offset_vector; +register int c = *start_match; +register int c; +register int i; +register int length = ecode[1]; +register int pp = *p++; +repeat it in the interests of efficiency. */ +repeat limits are compiled as a number of copies, with the optional ones +req_char = re->req_char; +req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ? +req_char_ptr = p; +resetcount = 2 + re->top_bracket * 2; +resetcount = ocount; +restoring at the exit of a group is easy. */ +restrictions: +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return FALSE; +return PCRE_ERROR_BADMAGIC; +return PCRE_ERROR_BADOPTION; +return PCRE_ERROR_NOMATCH; +return PCRE_ERROR_NOMEMORY; +return PCRE_ERROR_NULL; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return TRUE; +return match (eptr, +return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup); +return match_block.errorcode; +return rc; +save = (int *) (pcre_malloc) ((c + 1) * sizeof (int)); +save = stacksave; +save = stacksave; +save[i] = md->offset_vector[md->offset_end - i]; +seems expensive. As a compromise, the stack is used when there are fewer +share code. This is very similar to the code for single characters, but we +similar code to character type repeats - written out again for speed. +since matching characters is likely to be quite common. First, ensure the +since matching characters is likely to be quite common. First, ensure the +skipped_chars += bmtable[*start_match], +skipped_chars += bmtable[256] - 1; +skipped_chars -= bmtable[256] - 1; +skipped_chars); +skipped_chars++, +skipped_chars++, +skipped_chars++, +skipped_chars++, +stack of such pointers, to be re-instated at the end of the group when we hit +stack, for holding the values of the subject pointer at the start of each +start of each branch to move the current point backwards, so the code at +start_bits = extra->data.start_bits; +start_match += bmtable[*start_match]; +start_match += bmtable[256] - 1; +start_match -= bmtable[256] - 1; +start_match = (const uschar *) subject + length - re->max_match_size; +start_match++ < end_subject); +start_match++; +start_match++; +start_match++; +start_match++; +start_offset where to start in the subject string +startline = (re->options & PCRE_STARTLINE) != 0; +static BOOL +static BOOL +static const char rep_max[] = +static const char rep_min[] = +static void +strings. +struct eptrblock *prev; +studied, there may be a bitmap of possible first characters. */ +subject points to the subject string +subject if the requested. +subpattern - to break infinite loops. */ +subpattern, so as to detect when an empty string has been matched by a +subsequent match. */ +such there are (offset_top records the completed total) so we just have +supersede any condition above with which it is incompatible. +switch (*ecode) +switch (*ecode) +switch (ctype) +switch (ctype) +switch (ctype) +switch (op) +test once at the start (i.e. keep it out of the loop). */ +than 16 values to store; otherwise malloc is used. A problem is what to do +than the number of characters left in the string, so the match fails. +that "continue" in the code above comes out to here to repeat the main +that changed within the bracket before re-running it, so check the next +that it may occur zero times. It may repeat infinitely, or not at all - +the assertion is true. Lookbehind assertions have an OP_REVERSE item at the +the closing ket. When match() is called in other circumstances, we don't add to +the code for a repeated single character, but I haven't found a nice way of +the current subject position in the working slot at the top of the vector. We +the expression and advancing one matching character if failing, up to the +the expression and advancing one matching character if failing, up to the +the external pcre header. */ +the file Tech.Notes for some information on the internals. +the final argument TRUE causes it to stop at the end of an assertion. */ +the group. */ +the length of the reference string explicitly rather than passing the +the loop runs just once. */ +the minimum number of bytes before we start. */ +the number from a dummy opcode at the start. */ +the point in the subject string is not moved back. Thus there can never be +the pointer while it matches the class. */ +the same bracket. +the stack. */ +the start hasn't passed this character yet. */ +the subject. */ +the subject. */ +there were too many extractions, set the return code to zero. In the case +this level is identical to the lookahead case. */ +this makes a huge difference to execution time when there aren't many brackets +those back references that we can. In this case there need not be overflow +time taken, but character matching *is* what this is all about... */ +to save all the potential data. There may be up to 99 such values, which +to that for character classes, but repeated for efficiency. Then obey +two branches. If the condition is false, skipping the first branch takes us +typedef struct eptrblock +unless PCRE_CASELESS was given or the casing state changes within the regex. +unlimited repeats that aren't going to match. We don't know what the state of +unsigned long int ims = 0; +unsigned long int ims; +unsigned long int ims; +unsigned long int original_ims = ims; /* Save for resetting on ')' */ +up quickly if there are fewer than the minimum number of characters left in +up quickly if there are fewer than the minimum number of characters left in +using_temporary_offsets = TRUE; +values of the final offsets, in case they were set by a previous iteration of +we just need to set up the whole thing as substring 0 before returning. If +where we had to get some local store to hold offsets for backreferences, copy +while (!anchored && +while (*ecode == OP_ALT) +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*ecode == OP_ALT); +while (*next == OP_ALT); +while (*next == OP_ALT); +while (--iptr >= iend) +while (eptr >= pp) +while (eptr >= pp) +while (eptr >= pp) +while (eptr >= pp) +while (eptr >= pp) +while (eptr >= pp) +while (eptr >= pp) +while (iptr < iend) +while (length-- > 0) +while (length-- > 0) +while (length-- > 0) +while (length-- > 0) +while (length-- > 0) +while (p < end_subject) +while (p < end_subject) +while (start_match < end_subject && +while (start_match < end_subject && *start_match != first_char) +while (start_match < end_subject && start_match[-1] != '\n') +while (start_match < end_subject) +while (start_match < end_subject) +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{ +{0, 0, 0, 0, 1, 1}; +{0, 0, 1, 1, 0, 0}; +} /* End of main loop */ +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} +} diff --git a/testsuite/uniq.sed b/testsuite/uniq.sed new file mode 100644 index 0000000..7ec66c4 --- /dev/null +++ b/testsuite/uniq.sed @@ -0,0 +1,20 @@ +h + +:b +# On the last line, print and exit +$b +N +/^\(.*\)\n\1$/ { + # The two lines are identical. Undo the effect of + # the n command. + g + bb +} + +# If the @code{N} command had added the last line, print and exit +$b + +# The lines are different; print the first and go +# back working on the second. +P +D diff --git a/testsuite/version.gin b/testsuite/version.gin new file mode 100644 index 0000000..2ff9735 --- /dev/null +++ b/testsuite/version.gin @@ -0,0 +1,5 @@ +GNU sed version @VERSION@ +Copyright (C) 2003 Free Software Foundation, Inc. +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, +to the extent permitted by law. diff --git a/testsuite/writeout.inp b/testsuite/writeout.inp new file mode 100644 index 0000000..1cfceaf --- /dev/null +++ b/testsuite/writeout.inp @@ -0,0 +1,4 @@ +Facts are simple and facts are straight +Facts are lazy and facts are late +Facts all come with points of view +Facts don't do what I want them to diff --git a/testsuite/writeout.sed b/testsuite/writeout.sed new file mode 100644 index 0000000..f925a4d --- /dev/null +++ b/testsuite/writeout.sed @@ -0,0 +1 @@ +/^Facts ar/w writeout.wout diff --git a/testsuite/wrtout1.good b/testsuite/wrtout1.good new file mode 100644 index 0000000..1cfceaf --- /dev/null +++ b/testsuite/wrtout1.good @@ -0,0 +1,4 @@ +Facts are simple and facts are straight +Facts are lazy and facts are late +Facts all come with points of view +Facts don't do what I want them to diff --git a/testsuite/wrtout2.good b/testsuite/wrtout2.good new file mode 100644 index 0000000..2ef3f50 --- /dev/null +++ b/testsuite/wrtout2.good @@ -0,0 +1,2 @@ +Facts are simple and facts are straight +Facts are lazy and facts are late diff --git a/testsuite/xabcx.good b/testsuite/xabcx.good new file mode 100644 index 0000000..3f8bc81 --- /dev/null +++ b/testsuite/xabcx.good @@ -0,0 +1,4 @@ +roses are red +violets are blue +my feet are cold +your feet are too diff --git a/testsuite/xabcx.inp b/testsuite/xabcx.inp new file mode 100644 index 0000000..f2e2b38 --- /dev/null +++ b/testsuite/xabcx.inp @@ -0,0 +1,4 @@ +roses are red +violets are blue +my feet are cold +your feet are blue diff --git a/testsuite/xabcx.sed b/testsuite/xabcx.sed new file mode 100644 index 0000000..2a872fb --- /dev/null +++ b/testsuite/xabcx.sed @@ -0,0 +1,2 @@ +# from the ChangeLog (Fri May 21 1993) +\xfeetxs/blue/too/ diff --git a/testsuite/xbxcx.good b/testsuite/xbxcx.good new file mode 100644 index 0000000..9eadcd0 --- /dev/null +++ b/testsuite/xbxcx.good @@ -0,0 +1,7 @@ +x +xbx +xbxcx +xbxcx +xbxcx +xbxcx +xbxcx
\ No newline at end of file diff --git a/testsuite/xbxcx.inp b/testsuite/xbxcx.inp new file mode 100644 index 0000000..792d120 --- /dev/null +++ b/testsuite/xbxcx.inp @@ -0,0 +1,7 @@ + +b +bc +bac +baac +baaac +baaaac
\ No newline at end of file diff --git a/testsuite/xbxcx.sed b/testsuite/xbxcx.sed new file mode 100644 index 0000000..e6a9c3d --- /dev/null +++ b/testsuite/xbxcx.sed @@ -0,0 +1,2 @@ +# from the ChangeLog (Wed Sep 5 2001) +s/a*/x/g diff --git a/testsuite/xbxcx3.good b/testsuite/xbxcx3.good new file mode 100644 index 0000000..072a680 --- /dev/null +++ b/testsuite/xbxcx3.good @@ -0,0 +1,7 @@ + +b +bcx +bacx +baacx +baaacx +baaaacx diff --git a/testsuite/xbxcx3.inp b/testsuite/xbxcx3.inp new file mode 100644 index 0000000..cac4334 --- /dev/null +++ b/testsuite/xbxcx3.inp @@ -0,0 +1,7 @@ + +b +bc +bac +baac +baaac +baaaac diff --git a/testsuite/xbxcx3.sed b/testsuite/xbxcx3.sed new file mode 100644 index 0000000..759483c --- /dev/null +++ b/testsuite/xbxcx3.sed @@ -0,0 +1 @@ +s/a*/x/3 diff --git a/testsuite/xemacs.good b/testsuite/xemacs.good new file mode 100644 index 0000000..9fce4f1 --- /dev/null +++ b/testsuite/xemacs.good @@ -0,0 +1,67 @@ +#Makefile.in generated automatically by automake 1.5 from Makefile.am. + +#Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +#Free Software Foundation, Inc. +#This Makefile.in is free software; the Free Software Foundation +#gives unlimited permission to copy and/or distribute it, +#with or without modifications, as long as this notice is preserved. + +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY, to the extent permitted by law; without +#even the implied warranty of MERCHANTABILITY or FITNESS FOR A +#PARTICULAR PURPOSE. + +"@SET_MAKE@" + +#Automake requirements + +"SHELL = @SHELL@" + +"PACKAGE = sed" + +"EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \\" +" m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \\" +" m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \\" +" m4/obstack.m4" + +"subdir = ." +"ACLOCAL_M4 = $(top_srcdir)/aclocal.m4" +"mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs" +"CONFIG_HEADER = config.h" +"CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile" +"DIST_SOURCES =" +"DATA = $(noinst_DATA)" + +"HEADERS = $(noinst_HEADERS)" + + +"RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \\" +" uninstall-info-recursive all-recursive install-data-recursive \\" +" install-exec-recursive installdirs-recursive install-recursive \\" +" uninstall-recursive check-recursive installcheck-recursive" +"DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \\" +" COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \\" +" TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \\" +" config.sub config_h.in configure configure.ac depcomp \\" +" install-sh missing mkinstalldirs" +"DIST_SUBDIRS = $(SUBDIRS)" +"all: config.h" +" $(MAKE) $(AM_MAKEFLAGS) all-recursive" + +".SUFFIXES:" +"$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)" +" cd $(top_srcdir) && \\" +" $(AUTOMAKE) --gnu Makefile" +"Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status" +" cd $(top_builddir) && \\" +" CONFIG_HEADERS= CONFIG_LINKS= \\" +" CONFIG_FILES=$@ $(SHELL) ./config.status" + +"$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)" +" $(SHELL) ./config.status --recheck" +"$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)" +" cd $(srcdir) && $(AUTOCONF)" + +"$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4" +" cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)" +"config.h: stamp-h"
\ No newline at end of file diff --git a/testsuite/xemacs.inp b/testsuite/xemacs.inp new file mode 100644 index 0000000..0fc0414 --- /dev/null +++ b/testsuite/xemacs.inp @@ -0,0 +1,67 @@ +# Makefile.in generated automatically by automake 1.5 from Makefile.am. + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Automake requirements + +SHELL = @SHELL@ + +PACKAGE = sed + +EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \ + m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \ + m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \ + m4/obstack.m4 + +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile +DIST_SOURCES = +DATA = $(noinst_DATA) + +HEADERS = $(noinst_HEADERS) + + +RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \ + uninstall-info-recursive all-recursive install-data-recursive \ + install-exec-recursive installdirs-recursive install-recursive \ + uninstall-recursive check-recursive installcheck-recursive +DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \ + COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \ + TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \ + config.sub config_h.in configure configure.ac depcomp \ + install-sh missing mkinstalldirs +DIST_SUBDIRS = $(SUBDIRS) +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && \ + CONFIG_HEADERS= CONFIG_LINKS= \ + CONFIG_FILES=$@ $(SHELL) ./config.status + +$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck +$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES) + cd $(srcdir) && $(AUTOCONF) + +$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4 + cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +config.h: stamp-h
\ No newline at end of file diff --git a/testsuite/xemacs.sed b/testsuite/xemacs.sed new file mode 100644 index 0000000..ee2f744 --- /dev/null +++ b/testsuite/xemacs.sed @@ -0,0 +1,16 @@ +# Inspired by xemacs' config.status script +# submitted by John Fremlin (john@fremlin.de) + +/^# Generated/d +s%/\*\*/#.*%% +s/^ *# */#/ +/^##/d +/^#/ { + p + d +} +/./ { + s/\([\"]\)/\\\1/g + s/^/"/ + s/$/"/ +} diff --git a/testsuite/y-bracket.good b/testsuite/y-bracket.good new file mode 100644 index 0000000..278fee9 --- /dev/null +++ b/testsuite/y-bracket.good @@ -0,0 +1 @@ +Are you sure (y/n)? y] diff --git a/testsuite/y-bracket.inp b/testsuite/y-bracket.inp new file mode 100644 index 0000000..fe6124f --- /dev/null +++ b/testsuite/y-bracket.inp @@ -0,0 +1 @@ +Are you sure (y/n)? [y] diff --git a/testsuite/y-bracket.sed b/testsuite/y-bracket.sed new file mode 100644 index 0000000..79f3b61 --- /dev/null +++ b/testsuite/y-bracket.sed @@ -0,0 +1 @@ +y/[/ / diff --git a/testsuite/y-newline.good b/testsuite/y-newline.good new file mode 100644 index 0000000..b0f2bfe --- /dev/null +++ b/testsuite/y-newline.good @@ -0,0 +1 @@ +Are Sou Yure (S/n)? [S] $$Are Sou Yure (S/n)? [S] diff --git a/testsuite/y-newline.inp b/testsuite/y-newline.inp new file mode 100644 index 0000000..fe6124f --- /dev/null +++ b/testsuite/y-newline.inp @@ -0,0 +1 @@ +Are you sure (y/n)? [y] diff --git a/testsuite/y-newline.sed b/testsuite/y-newline.sed new file mode 100644 index 0000000..3e1dbea --- /dev/null +++ b/testsuite/y-newline.sed @@ -0,0 +1,3 @@ +H +G +y/Ss\nYy/yY$sS/ |