1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
#!/usr/bin/env -S perl -I .
use strict;
use warnings;
use HTTPTest;
# This test checks that Wget parses "nofollow" when it appears in <meta
# name="robots"> tags, regardless of where in a list of comma-separated
# values it appears, and regardless of spelling.
#
# Three different files contain links to the file "bombshell.html", each
# with "nofollow" set, at various positions in a list of values for a
# <meta name="robots"> tag, and with various degrees of separating
# whitesspace. If bombshell.html is downloaded, the test
# has failed.
###############################################################################
my $nofollow_start = <<EOF;
<meta name="roBoTS" content="noFolLow , foo, bar ">
<a href="/bombshell.html">Don't follow me!</a>
EOF
my $nofollow_mid = <<EOF;
<meta name="rObOts" content=" foo , NOfOllow , bar ">
<a href="/bombshell.html">Don't follow me!</a>
EOF
my $nofollow_end = <<EOF;
<meta name="RoBotS" content="foo,BAr, nofOLLOw ">
<a href="/bombshell.html">Don't follow me!</a>
EOF
my $nofollow_solo = <<EOF;
<meta name="robots" content="nofollow">
<a href="/bombshell.html">Don't follow me!</a>
EOF
# code, msg, headers, content
my %urls = (
'/start.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html",
},
content => $nofollow_start,
},
'/mid.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html",
},
content => $nofollow_mid,
},
'/end.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html",
},
content => $nofollow_end,
},
'/solo.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html",
},
content => $nofollow_solo,
},
'/bombshell.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html",
},
content => 'Hello',
},
);
my $cmdline = $WgetTest::WGETPATH . " -r -nd "
. join(' ',(map "http://localhost:{{port}}/$_.html",
qw(start mid end solo)));
my $expected_error_code = 0;
my %expected_downloaded_files = (
'start.html' => {
content => $nofollow_start,
},
'mid.html' => {
content => $nofollow_mid,
},
'end.html' => {
content => $nofollow_end,
},
'solo.html' => {
content => $nofollow_solo,
}
);
###############################################################################
my $the_test = HTTPTest->new (input => \%urls,
cmdline => $cmdline,
errcode => $expected_error_code,
output => \%expected_downloaded_files);
exit $the_test->run();
# vim: et ts=4 sw=4
|