summaryrefslogtreecommitdiff
path: root/tests/Test-meta-robots.px
blob: f7e5f91166836c58416470778dc9403fb8f37733 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env -S perl -I .

use strict;
use warnings;

use HTTPTest;

# This test checks that Wget parses "nofollow" when it appears in <meta
# name="robots"> tags, regardless of where in a list of comma-separated
# values it appears, and regardless of spelling.
#
# Three different files contain links to the file "bombshell.html", each
# with "nofollow" set, at various positions in a list of values for a
# <meta name="robots"> tag, and with various degrees of separating
# whitesspace. If bombshell.html is downloaded, the test
# has failed.

###############################################################################

my $nofollow_start = <<EOF;
<meta name="roBoTS" content="noFolLow ,  foo, bar ">
<a href="/bombshell.html">Don't follow me!</a>
EOF

my $nofollow_mid = <<EOF;
<meta name="rObOts" content=" foo  ,  NOfOllow ,  bar ">
<a href="/bombshell.html">Don't follow me!</a>
EOF

my $nofollow_end = <<EOF;
<meta name="RoBotS" content="foo,BAr,   nofOLLOw    ">
<a href="/bombshell.html">Don't follow me!</a>
EOF

my $nofollow_solo = <<EOF;
<meta name="robots" content="nofollow">
<a href="/bombshell.html">Don't follow me!</a>
EOF

# code, msg, headers, content
my %urls = (
    '/start.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html",
        },
        content => $nofollow_start,
    },
    '/mid.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html",
        },
        content => $nofollow_mid,
    },
    '/end.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html",
        },
        content => $nofollow_end,
    },
    '/solo.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html",
        },
        content => $nofollow_solo,
    },
    '/bombshell.html' => {
        code => "200",
        msg => "Ok",
        headers => {
            "Content-type" => "text/html",
        },
        content => 'Hello',
    },
);

my $cmdline = $WgetTest::WGETPATH . " -r -nd "
    . join(' ',(map "http://localhost:{{port}}/$_.html",
                qw(start mid end solo)));

my $expected_error_code = 0;

my %expected_downloaded_files = (
    'start.html' => {
        content => $nofollow_start,
    },
    'mid.html' => {
        content => $nofollow_mid,
    },
    'end.html' => {
        content => $nofollow_end,
    },
    'solo.html' => {
        content => $nofollow_solo,
    }
);

###############################################################################

my $the_test = HTTPTest->new (input => \%urls,
                              cmdline => $cmdline,
                              errcode => $expected_error_code,
                              output => \%expected_downloaded_files);
exit $the_test->run();

# vim: et ts=4 sw=4