blob: 61795889350f459aff67464007d008ba7bd51aac (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#!/usr/bin/perl
# This file can find requirements of html and jhtml files (cgi, gif,
# java dependencies). It is a bit of a hack but it turns out to work
# well. We track only dependencies between Relative URLs, absolute
# URL's are assumed to be extenernal to the RPM system. We do not
# parse the HTML but look through the set of strings (text surrounded
# by quotes) for something which looks like a reference. This avoids
# writing a full HTML parsers and tends to work really well. In this
# manner we can track dependencies for: href, src, action and other
# HTML tags which have not been invented yet.
# The reference:
#
# href="http://www.perl.org/images/arrow.gif"
#
# does not create a dependency but the reference
#
# href="images/arrow.gif"
#
# will create a dependency.
# Additionally this program will find the requirements for sun jhtml
# (html with embedded java) since jhtml is deprecated so is this part
# of the code.
use File::Basename;
# this is the pattern of extensions to call requirements
$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))$'; #'
if ("@ARGV") {
foreach (@ARGV) {
process_file($_);
}
} else {
# notice we are passed a list of filenames NOT as common in unix the
# contents of the file.
foreach (<>) {
process_file($_);
}
}
foreach $key (sort keys %seen) {
print "$key\n";
}
sub process_file {
my ($file) = @_;
chomp $file;
open(FILE, "<$file")||
die("$0: Could not open file: '$file' : $!\n");
# we have to suck in the whole file at once because too many people
# split lines around <java></java> tags.
my (@file) = <FILE>;
$_= "@file";
# ignore line based comments ( careful although it has two slashes
# 'http://www.yahoo.com' is not a comment! )
s!^\s*//.*$!!mg;
s!//\s.*$!!mg;
s!\s//.*$!!mg;
# ignore multi-line comments
# (use non greedy operators)
s!/\*.*?\*/!!g;
s/<!--.*?-->//g;
# html references other html documents inside strings. Ignore non
# relative references since these dependencies can not be met. (ie,
# no package you install will ever provide 'http://www.yahoo.com').
# I use basename since I have seen too many http references which
# begin with '../' this would just kill the dependnecy tracking
# mechanism.
while ( m{\"([^\"]+)\"}g ) {
my $string = $1;
chomp $string;
if ( ( $string !~ m!http://! ) &&
( $string =~ m!$DEPS_PAT! ) ) {
$string = basename($string);
$string =~ s!\s+!!g;
$seen{"http(${string})"} = 1;
}
}
{
# This section is only for use with (Sun) jhtml dependencies, and
# since jhtml is deprecated so is this code.
# java imports in jhtml (may have stars for leaf class)
# these may span several lines
while ( m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
my $java_list = $4;
$java_list =~ s/;/ /g;
$java_list =~ s/\n+/ /g;
$java_list =~ s/\s+/ /g;
foreach $java_class ( split(/\s+/, $java_list) ) {
$seen{"java(${java_class})"} = 1;
}
}
}
close(FILE, "<$file")||
die("$0: Could not close file: '$file' : $!\n");
return ;
}
|