summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2022-07-25 11:23:12 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2022-07-25 11:23:12 +0900
commitc094078e1fdae1cdcba2c753aeec9a9a86c275f9 (patch)
tree8d60a86c245cd2f4e44f2022e781cfa74867dbd5
parent4ff923a70a224625a0b826e6c6f8171550025645 (diff)
downloadperl-XML-Twig-c094078e1fdae1cdcba2c753aeec9a9a86c275f9.tar.gz
perl-XML-Twig-c094078e1fdae1cdcba2c753aeec9a9a86c275f9.tar.bz2
perl-XML-Twig-c094078e1fdae1cdcba2c753aeec9a9a86c275f9.zip
Imported Upstream version 3.47upstream/3.47
-rw-r--r--Changes14
-rw-r--r--MANIFEST1
-rw-r--r--META.json2
-rw-r--r--META.yml2
-rw-r--r--Twig.pm25
-rwxr-xr-xTwig_pm.slow25
-rwxr-xr-x[-rw-r--r--]t/test_3_41.t0
-rwxr-xr-x[-rw-r--r--]t/test_3_42.t0
-rwxr-xr-x[-rw-r--r--]t/test_3_44.t0
-rwxr-xr-x[-rw-r--r--]t/test_3_45.t0
-rwxr-xr-xt/test_3_47.t45
-rwxr-xr-x[-rw-r--r--]t/test_changes.t2
-rwxr-xr-x[-rw-r--r--]t/test_meta_json.t0
-rwxr-xr-xt/test_xml_split.t2
-rwxr-xr-x[-rw-r--r--]t/xmlxpath_31vars.t0
-rwxr-xr-xtools/xml_grep/xml_grep4
16 files changed, 99 insertions, 23 deletions
diff --git a/Changes b/Changes
index bdc0646..dd9ff5f 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,19 @@
CHANGES
+3.47 - 2014-03-27 - minor maintenance release
+
+- fixed: missing entities when parsing HTML
+ RT #93604 https://rt.cpan.org/Public/Bug/Display.html?id=93604
+
+- fixed: tests failed when using a version of HTML::TreeBuilder with a non-numeric version
+
+- fixed in twig_handlers, '=' in regexps on attributes are turned into 'eq'
+ RT #94295 https://rt.cpan.org/Public/Bug/Display.html?id=94295
+
+3.46 - 2014-03-05 - minor maintenance release
+
+- fixed: test failed on Windows
+
3.45 - 2014-02-27 - minor maintenance release
- fixed: link to idented_a format description
diff --git a/MANIFEST b/MANIFEST
index a26704e..d42fe58 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -128,6 +128,7 @@ t/test_3_41.t
t/test_3_42.t
t/test_3_44.t
t/test_3_45.t
+t/test_3_47.t
t/test_changes.t
t/test_memory.t
t/test_wrapped.t
diff --git a/META.json b/META.json
index b98daaf..6bbe04b 100644
--- a/META.json
+++ b/META.json
@@ -42,5 +42,5 @@
"url" : "http://github.com/mirod/xmltwig"
}
},
- "version" : "3.46"
+ "version" : "3.47"
}
diff --git a/META.yml b/META.yml
index b18e745..f1de465 100644
--- a/META.yml
+++ b/META.yml
@@ -21,4 +21,4 @@ requires:
XML::Parser: 2.23
resources:
repository: http://github.com/mirod/xmltwig
-version: 3.46
+version: 3.47
diff --git a/Twig.pm b/Twig.pm
index 5f15078..7b9d66b 100644
--- a/Twig.pm
+++ b/Twig.pm
@@ -144,7 +144,7 @@ my $SEP= qr/\s*(?:$|\|)/;
BEGIN
{
-$VERSION = '3.46';
+$VERSION = '3.47';
use XML::Parser;
my $needVersion = '2.23';
@@ -154,7 +154,7 @@ croak "need at least XML::Parser version $needVersion" unless $parser_version >=
($perl_version= $])=~ s{_\d+}{};
if( $perl_version >= 5.008)
- { eval "use Encode qw( :all)";
+ { eval "use Encode qw( :all)"; ## no critic ProhibitStringyEval
$FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF;
$FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF;
}
@@ -996,6 +996,7 @@ sub _html2xml
$xml.= _as_XML( $tree);
+
_fix_xml( $tree, \$xml);
if( $options->{indent}) { _indent_xhtml( \$xml); }
@@ -1142,7 +1143,7 @@ sub _as_XML {
if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; }
}
- if ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || []} )
+ if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) )
{ $xml.= $node->starttag_XML( undef, 1 ); }
else
{ $xml.= $node->starttag_XML(undef); }
@@ -1175,7 +1176,9 @@ sub _xml_escape
)
)
}
- {&amp;}gx; # Needs to be escaped to amp
+ {&amp;}gx if 0; # Needs to be escaped to amp
+
+ $html=~ s{&}{&amp;}g;
# in old versions of HTML::TreeBuilder &amp; can come out as &Amp;
if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&amp;}g; }
@@ -1299,7 +1302,7 @@ sub add_stylesheet
$version ||= 0;
if( $disallowed{$module}) { return 0; }
if( $used{$module}) { return 1; }
- if( eval "require $module") { import $module; $used{$module}= 1;
+ if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval
if( $version)
{
## no critic (TestingAndDebugging::ProhibitNoStrict);
@@ -1316,7 +1319,7 @@ sub add_stylesheet
# used to solve the [n] predicates while avoiding getting the entire list
# needs a prototype to accept passing bare blocks
-sub _first_n(&$@) ## nocritic (Subroutines::ProhibitSubroutinePrototypes);
+sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes);
{ my $coderef= shift;
my $n= shift;
my $i=0;
@@ -1762,7 +1765,8 @@ sub _tag_cond
sub _parse_predicate_in_handler
{ my( $flag, $score)= @_[1..2];
$_[0]=~ s{( ($REG_STRING) # strings
- |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator)
+ |\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp
+ |\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator)
|=~|!~ # matching operators
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number
@@ -1777,8 +1781,8 @@ sub _parse_predicate_in_handler
|($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings)
)}
- { my( $token, $str, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag)
- = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+ { my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag)
+ = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14);
$score->{predicates}++;
@@ -1790,6 +1794,9 @@ sub _parse_predicate_in_handler
elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})}
: qq{\$elt->{'$att'}}
}
+ elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)}
+ : qq{\$elt->{'$att_re_name'}$att_re_regexp}
+ }
# for some reason Devel::Cover flags the following lines as not tested. They are though.
elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))}
: qq{defined( \$elt->{'$bare_att'})}
diff --git a/Twig_pm.slow b/Twig_pm.slow
index cefaa0d..5612e80 100755
--- a/Twig_pm.slow
+++ b/Twig_pm.slow
@@ -144,7 +144,7 @@ my $SEP= qr/\s*(?:$|\|)/;
BEGIN
{
-$VERSION = '3.46';
+$VERSION = '3.47';
use XML::Parser;
my $needVersion = '2.23';
@@ -154,7 +154,7 @@ croak "need at least XML::Parser version $needVersion" unless $parser_version >=
($perl_version= $])=~ s{_\d+}{};
if( $perl_version >= 5.008)
- { eval "use Encode qw( :all)";
+ { eval "use Encode qw( :all)"; ## no critic ProhibitStringyEval
$FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF;
$FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF;
}
@@ -996,6 +996,7 @@ sub _html2xml
$xml.= _as_XML( $tree);
+
_fix_xml( $tree, \$xml);
if( $options->{indent}) { _indent_xhtml( \$xml); }
@@ -1142,7 +1143,7 @@ sub _as_XML {
if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; }
}
- if ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || []} )
+ if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) )
{ $xml.= $node->starttag_XML( undef, 1 ); }
else
{ $xml.= $node->starttag_XML(undef); }
@@ -1175,7 +1176,9 @@ sub _xml_escape
)
)
}
- {&amp;}gx; # Needs to be escaped to amp
+ {&amp;}gx if 0; # Needs to be escaped to amp
+
+ $html=~ s{&}{&amp;}g;
# in old versions of HTML::TreeBuilder &amp; can come out as &Amp;
if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&amp;}g; }
@@ -1299,7 +1302,7 @@ sub add_stylesheet
$version ||= 0;
if( $disallowed{$module}) { return 0; }
if( $used{$module}) { return 1; }
- if( eval "require $module") { import $module; $used{$module}= 1;
+ if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval
if( $version)
{
## no critic (TestingAndDebugging::ProhibitNoStrict);
@@ -1316,7 +1319,7 @@ sub add_stylesheet
# used to solve the [n] predicates while avoiding getting the entire list
# needs a prototype to accept passing bare blocks
-sub _first_n(&$@) ## nocritic (Subroutines::ProhibitSubroutinePrototypes);
+sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes);
{ my $coderef= shift;
my $n= shift;
my $i=0;
@@ -1762,7 +1765,8 @@ sub _tag_cond
sub _parse_predicate_in_handler
{ my( $flag, $score)= @_[1..2];
$_[0]=~ s{( ($REG_STRING) # strings
- |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator)
+ |\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp
+ |\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator)
|=~|!~ # matching operators
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number
@@ -1777,8 +1781,8 @@ sub _parse_predicate_in_handler
|($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings)
)}
- { my( $token, $str, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag)
- = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+ { my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag)
+ = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14);
$score->{predicates}++;
@@ -1790,6 +1794,9 @@ sub _parse_predicate_in_handler
elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})}
: qq{\$elt->{'$att'}}
}
+ elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)}
+ : qq{\$elt->{'$att_re_name'}$att_re_regexp}
+ }
# for some reason Devel::Cover flags the following lines as not tested. They are though.
elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))}
: qq{defined( \$elt->{'$bare_att'})}
diff --git a/t/test_3_41.t b/t/test_3_41.t
index b8e08d9..b8e08d9 100644..100755
--- a/t/test_3_41.t
+++ b/t/test_3_41.t
diff --git a/t/test_3_42.t b/t/test_3_42.t
index f210c24..f210c24 100644..100755
--- a/t/test_3_42.t
+++ b/t/test_3_42.t
diff --git a/t/test_3_44.t b/t/test_3_44.t
index 6090b53..6090b53 100644..100755
--- a/t/test_3_44.t
+++ b/t/test_3_44.t
diff --git a/t/test_3_45.t b/t/test_3_45.t
index e3d5d30..e3d5d30 100644..100755
--- a/t/test_3_45.t
+++ b/t/test_3_45.t
diff --git a/t/test_3_47.t b/t/test_3_47.t
new file mode 100755
index 0000000..11fd61a
--- /dev/null
+++ b/t/test_3_47.t
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use XML::Twig;
+use Test::More tests => 3;
+
+use utf8;
+
+# test CDATA sections in HTML escaping https://rt.cpan.org/Ticket/Display.html?id=86773
+
+ # module => XML::Twig->new options
+my %html_conv= ( 'HTML::TreeBuilder' => {},
+ 'HTML::Tidy' => { use_tidy => 1 },
+ );
+foreach my $module ( sort keys %html_conv)
+ { SKIP:
+ { eval "use $module";
+ skip "$module not available", 3 if $@ ;
+
+ my $in = q{<h1>Here&amp;there v&amp;r;</h1><p>marco&amp;company; and marco&amp;company &pound; &#163; &#xA3; £</p>};
+ my $expected= q{<h1>Here&amp;there v&amp;r;</h1><p>marco&amp;company; and marco&amp;company £ £ £ £</p>};
+
+ my $parser= XML::Twig->new( %{$html_conv{$module}});
+ my $t = $parser->safe_parse_html($in);
+ print $@ if $@;
+
+ like $t->sprint, qr{\Q$expected\E}, "In and out are the same ($module)";
+
+ }
+ }
+
+{ # test RT #94295 https://rt.cpan.org/Public/Bug/Display.html?id=94295
+ # '=' in regexps on attributes are turned into 'eq'
+ my $xml= '<doc><e dn="foo=1 host=0">e1</e><e dn="foo=1 host=2">e2</e></doc>';
+ my $r;
+ my $t= XML::Twig->new( twig_handlers => { 'e[@dn =~ /host=0/]' => sub { $r.= $_->text } })
+ ->parse( $xml);
+ is( $r, 'e1', 'regexp on attribute, including an = sign');
+}
+exit;
+
+
+
diff --git a/t/test_changes.t b/t/test_changes.t
index ccdb079..a80abcc 100644..100755
--- a/t/test_changes.t
+++ b/t/test_changes.t
@@ -3,5 +3,5 @@
use Test::More;
eval 'use Test::CPAN::Changes';
plan skip_all => 'Test::CPAN::Changes required for this test' if $@;
-plan skip_all => 'uthor test. Set $ENV{TEST_AUTHOR} to a true value to run.' if ! $ENV{TEST_AUTHOR};
+plan skip_all => 'Author test. Set $ENV{TEST_AUTHOR} to a true value to run.' if ! $ENV{TEST_AUTHOR};
changes_ok();
diff --git a/t/test_meta_json.t b/t/test_meta_json.t
index 3873897..3873897 100644..100755
--- a/t/test_meta_json.t
+++ b/t/test_meta_json.t
diff --git a/t/test_xml_split.t b/t/test_xml_split.t
index 9c5db09..87e96d1 100755
--- a/t/test_xml_split.t
+++ b/t/test_xml_split.t
@@ -91,7 +91,7 @@ else
sub test_error
{ my( $command, $options, $expected)= @_;
my( $stdout, $stderr, $success, $exit_code) = IO::CaptureOutput::capture_exec( "$perl $command $options test_xml_split.xml");
- matches( $stderr, qr/^$expected/, "$command $options");
+ matches( $stderr, qr/$expected/, "$command $options");
}
sub test_out
diff --git a/t/xmlxpath_31vars.t b/t/xmlxpath_31vars.t
index c04c07f..c04c07f 100644..100755
--- a/t/xmlxpath_31vars.t
+++ b/t/xmlxpath_31vars.t
diff --git a/tools/xml_grep/xml_grep b/tools/xml_grep/xml_grep
index 6b0cde0..cfb82a1 100755
--- a/tools/xml_grep/xml_grep
+++ b/tools/xml_grep/xml_grep
@@ -12,7 +12,8 @@ my( $help, $man, @roots, @paths, $files,
$count, $nb_results, $nb_results_per_file,
$encoding, @exclude,
$wrap, $nowrap, $descr, $group, $pretty_print, $version, $text_only, $date,
- $html, $tidy,
+ $html, $tidy,
+ $add_ns,
$verbose, $strict
);
@@ -46,6 +47,7 @@ GetOptions( 'help' => \$help,
'strict' => \$strict,
'html' => \$html,
'tidy' => \$tidy,
+ 'add_ns' => \$add_ns,
'verbose' => \$verbose,
) or pod2usage(2);