summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorGraydon, Tracy <tracy.graydon@intel.com>2012-08-31 11:34:41 -0700
committerGraydon, Tracy <tracy.graydon@intel.com>2012-08-31 11:34:41 -0700
commitd31c43e62fd5483de3604ef825522501eece7feb (patch)
tree176c351869a67eaf8347faba2e1f3a60fa9caeaf /bin
downloaddocbook-style-dsssl-d31c43e62fd5483de3604ef825522501eece7feb.tar.gz
docbook-style-dsssl-d31c43e62fd5483de3604ef825522501eece7feb.tar.bz2
docbook-style-dsssl-d31c43e62fd5483de3604ef825522501eece7feb.zip
Diffstat (limited to 'bin')
-rw-r--r--bin/ChangeLog58
-rw-r--r--bin/collateindex.pl737
-rw-r--r--bin/collateindex.pl.1214
3 files changed, 1009 insertions, 0 deletions
diff --git a/bin/ChangeLog b/bin/ChangeLog
new file mode 100644
index 0000000..8c75e0d
--- /dev/null
+++ b/bin/ChangeLog
@@ -0,0 +1,58 @@
+2004-10-24 <petere78@users.sourceforge.net>
+
+ * collateindex.pl: Only mention the program's base name in the error messages.
+
+2004-10-23 <petere78@users.sourceforge.net>
+
+ * Makefile: Don't put a "v" before the version number here.
+
+ * collateindex.pl: Put the options list in some kind of order, and make the error messages
+ have a consistent format.
+
+2003-03-21 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * Makefile: clean target cleans backup files and CVS junk
+
+2003-03-16 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * Makefile: fix the POD release variable
+
+2003-01-19 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * collateindex.pl: fix from Tim Waugh for when the same indexterm has too different
+ seealso's, where currently script produces invalid SGML/XML; fixes bug #551318
+
+2002-12-16 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * .cvsignore: New file.
+
+2002-08-08 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * Makefile: New file.
+
+2002-01-07 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * collateindex.pl: emit a warning if a duplicated index entry is found and removed
+
+2001-12-01 Norman Walsh <nwalsh@users.sourceforge.net>
+
+ * collateindex.pl: Patch #468645: fix for indexterm zones in print
+
+2001-04-20 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * collateindex.pl: add POD documentation; remove the usage message documentation in the name of reducing redundancy
+
+2001-04-18 Adam Di Carlo <adicarlo@users.sourceforge.net>
+
+ * collateindex.pl: add -q (quiet) and -V (report version only) arguments
+
+2001-04-03 Norman Walsh <nwalsh@users.sourceforge.net>
+
+ * collateindex.pl: Whitespace changes
+
+ * collateindex.pl: Fix bug 412898, produce links in the index, even in the presence of seealso
+
+2001-04-02 Norman Walsh <nwalsh@users.sourceforge.net>
+
+ * collateindex.pl: New file.
+
diff --git a/bin/collateindex.pl b/bin/collateindex.pl
new file mode 100644
index 0000000..e384711
--- /dev/null
+++ b/bin/collateindex.pl
@@ -0,0 +1,737 @@
+#!/usr/bin/perl -- # -*- Perl -*-
+#
+# $Id: collateindex.pl,v 1.10 2004/10/24 17:05:41 petere78 Exp $
+
+=head1 NAME
+
+collateindex.pl - generate DocBook index files
+
+=head1 SYNOPSIS
+
+B<collateindex.pl> [B<-f>] [B<-g>] [B<-i> I<id>] [B<-I> I<scope>] [B<-N>]
+ [B<-o> F<file>] [B<-p>] [B<-P> F<file>] [B<-q>] [B<-s> I<name>]
+ [B<-S> I<scope>] [B<-t> I<name>] [B<-x>] F<file>
+
+=head1 DESCRIPTION
+
+B<collateindex.pl> creates index data for DocBook XML or SGML files.
+
+=cut
+
+use File::Basename;
+use Getopt::Std;
+
+$me = basename($0);
+
+$usage = "Usage: $0 [options] file
+Try \"perldoc $me\" for documentation.\n";
+
+( $version = '$Revision: 1.10 $' ) =~ s/^\$[R]evision:\s*([^ ]*)\s*\$$/$1/;
+
+=head1 OPTIONS
+
+=over 5
+
+=item B<-f>
+
+Force the output file to be written, even if it appears to have been
+edited by hand.
+
+=item B<-g>
+
+Group terms with IndexDiv based on the first letter of the term (or
+its SortAs attribute). (This might not handle all language environments.)
+
+=item B<-i> I<id>
+
+The ID to use for the E<lt>indexE<gt> tag.
+
+=item B<-I> I<scope>
+
+The implied scope, must be C<all>, C<local>, or C<global>. IndexTerms
+which do not specify a scope will have the implied scope. If
+unspecified, C<all> is assumed.
+
+=item B<-N>
+
+New index (generates an empty index file).
+
+=item B<-o> F<file>
+
+Output to F<file>. Defaults to F<stdout>.
+
+=item B<-p>
+
+Link to points in the document. The default is to link to the closest
+containing section.
+
+=item B<-P> F<file>
+
+Read a preamble from F<file>. The contents of F<file> will be
+inserted before the E<lt>indexE<gt> tag.
+
+=item B<-q>
+
+Run quietly.
+
+=item B<-s> I<name>
+
+Name the IndexDiv that contains symbols. The default is C<Symbols>.
+Meaningless if B<-g> is not used.
+
+=item B<-S> I<scope>
+
+Scope of the index, must be C<all>, C<local>, or C<global>. If
+unspecified, C<all> is assumed.
+
+=item B<-t> I<name>
+
+Title for the index.
+
+=item B<-x>
+
+Make a SetIndex.
+
+=item B<-V>
+
+Print version number and exit.
+
+=item F<file>
+
+The file containing index data generated with the DocBook DSSSL
+HTML stylesheet (usually called F<HTML.index>).
+
+=back
+
+=cut
+
+
+die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:xqV');
+
+$linkpoints = $opt_p;
+$lettergroups = $opt_g;
+$symbolsname = $opt_s || "Symbols";
+$title = $opt_t;
+$preamble = $opt_P;
+$outfile = $opt_o || '-';
+$indexid = $opt_i;
+$scope = uc($opt_S) || 'ALL';
+$impliedscope = uc($opt_I) || 'ALL';
+$setindex = $opt_x;
+$forceoutput = $opt_f;
+$newindex = $opt_N;
+$debug = $opt_D;
+$quiet = $opt_q;
+
+if ( $opt_V ) {
+ print "collateindex.pl $version\n";
+ exit 0;
+}
+
+$indextag = $setindex ? 'setindex' : 'index';
+
+if ($newindex) {
+ safe_open(*OUT, $outfile);
+ if ($indexid) {
+ print OUT "<$indextag id='$indexid'>\n\n";
+ } else {
+ print OUT "<$indextag>\n\n";
+ }
+
+ print OUT "<!-- This file was produced by collateindex.pl. -->\n";
+ print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
+
+ print OUT "</$indextag>\n";
+ exit 0;
+}
+
+$dat = shift @ARGV || die $usage;
+die "$me: file \"$dat\" does not exist\n" if ! -f $dat;
+
+%legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
+if ($scope && !$legal_scopes{$scope}) {
+ die "$me: invalid scope: $scope\n";
+}
+if ($impliedscope && !$legal_scopes{$impliedscope}) {
+ die "$me: invalid implied scope: $impliedscope\n";
+}
+
+@term = ();
+%id = ();
+
+$termcount = 0;
+
+$quiet || print STDERR "Processing $dat...\n";
+
+# Read the index file, creating an array of objects. Each object
+# represents and indexterm and has fields for the content of the
+# indexterm
+
+open (F, $dat);
+while (<F>) {
+ chop;
+ chop if /\r$/;
+
+ if (/^\/indexterm/i) {
+ push (@term, $idx);
+ next;
+ }
+
+ if (/^indexterm (.*)$/i) {
+ $termcount++;
+ $idx = {};
+ $idx->{'zone'} = {};
+ $idx->{'href'} = $1;
+ $idx->{'count'} = $termcount;
+ $idx->{'scope'} = $impliedscope;
+ next;
+ }
+
+ if (/^indexpoint (.*)$/i) {
+ $idx->{'hrefpoint'} = $1;
+ next;
+ }
+
+ if (/^title (.*)$/i) {
+ $idx->{'title'} = $1;
+ next;
+ }
+
+ if (/^primary[\[ ](.*)$/i) {
+ if (/^primary\[(.*?)\] (.*)$/i) {
+ $idx->{'psortas'} = &escape($1);
+ $idx->{'primary'} = &escape($2);
+ } else {
+ $idx->{'psortas'} = &escape($1);
+ $idx->{'primary'} = &escape($1);
+ }
+ next;
+ }
+
+ if (/^secondary[\[ ](.*)$/i) {
+ if (/^secondary\[(.*?)\] (.*)$/i) {
+ $idx->{'ssortas'} = &escape($1);
+ $idx->{'secondary'} = &escape($2);
+ } else {
+ $idx->{'ssortas'} = &escape($1);
+ $idx->{'secondary'} = &escape($1);
+ }
+ next;
+ }
+
+ if (/^tertiary[\[ ](.*)$/i) {
+ if (/^tertiary\[(.*?)\] (.*)$/i) {
+ $idx->{'tsortas'} = &escape($1);
+ $idx->{'tertiary'} = &escape($2);
+ } else {
+ $idx->{'tsortas'} = &escape($1);
+ $idx->{'tertiary'} = &escape($1);
+ }
+ next;
+ }
+
+ if (/^see (.*)$/i) {
+ $idx->{'see'} = &escape($1);
+ next;
+ }
+
+ if (/^seealso (.*)$/i) {
+ $idx->{'seealso'} = &escape($1);
+ next;
+ }
+
+ if (/^significance (.*)$/i) {
+ $idx->{'significance'} = &escape($1);
+ next;
+ }
+
+ if (/^class (.*)$/i) {
+ $idx->{'class'} = &escape($1);
+ next;
+ }
+
+ if (/^scope (.*)$/i) {
+ $idx->{'scope'} = &escape(uc($1));
+ next;
+ }
+
+ if (/^startref (.*)$/i) {
+ $idx->{'startref'} = $1;
+ next;
+ }
+
+ if (/^id (.*)$/i) {
+ $idx->{'id'} = $1;
+ $id{$1} = $idx;
+ next;
+ }
+
+ if (/^zone (.*)$/i) {
+ my($href) = $1;
+ $_ = scalar(<F>);
+ chop;
+ die "$me: invalid zone: $_\n" if !/^title (.*)$/i;
+ $idx->{'zone'}->{$href} = $1;
+ next;
+ }
+
+ die "$me: unrecognized tag in input: $_\n";
+}
+close (F);
+
+$quiet || print STDERR "$termcount entries loaded...\n";
+
+# Fixup the startrefs...
+# In DocBook, STARTREF is a #CONREF attribute; support this by copying
+# all of the fields from the indexterm with the id specified by STARTREF
+# to the indexterm that has the STARTREF.
+foreach $idx (@term) {
+ my($ididx, $field);
+ if ($idx->{'startref'}) {
+ $ididx = $id{$idx->{'startref'}};
+ foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
+ 'psortas', 'ssortas', 'tsortas', 'significance',
+ 'class', 'scope') {
+ $idx->{$field} = $ididx->{$field};
+ }
+ }
+}
+
+# Sort the index terms
+@term = sort termsort @term;
+
+# Move all of the non-alphabetic entries to the front of the index.
+@term = sortsymbols(@term);
+
+safe_open(*OUT, $outfile);
+
+# Write the index...
+if ($indexid) {
+ print OUT "<$indextag id='$indexid'>\n\n";
+} else {
+ print OUT "<$indextag>\n\n";
+}
+
+print OUT "<!-- This file was produced by collateindex.pl. -->\n";
+print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
+
+print OUT "<!-- ULINK is abused here.
+
+ The URL attribute holds the URL that points from the index entry
+ back to the appropriate place in the output produced by the HTML
+ stylesheet. (It's much easier to calculate this URL in the first
+ pass.)
+
+ The Role attribute holds the ID (either real or manufactured) of
+ the corresponding INDEXTERM. This is used by the print backends
+ to produce page numbers.
+
+ The entries below are sorted and collated into the correct order.
+ Duplicates may be removed in the HTML backend, but in the print
+ backends, it is impossible to suppress duplicate pages or coalesce
+ sequences of pages into a range.
+-->\n\n";
+
+print OUT "<title>$title</title>\n\n" if $title;
+
+$last = {}; # the last indexterm we processed
+$first = 1; # this is the first one
+$group = ""; # we're not in a group yet
+$lastout = ""; # we've not put anything out yet
+@seealsos = (); # See also stack.
+
+foreach $idx (@term) {
+ next if $idx->{'startref'}; # no way to represent spans...
+ next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
+ next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
+ next if &same($idx, $last); # suppress duplicates
+
+ $termcount--;
+
+ # If primary changes, output a whole new index term, otherwise just
+ # output another secondary or tertiary, as appropriate. We know from
+ # sorting that the terms will always be in the right order.
+ if (!&tsame($last, $idx, 'primary')) {
+ print "DIFF PRIM\n" if $debug;
+ &end_entry() if not $first;
+
+ if ($lettergroups) {
+ # If we're grouping, make the right indexdivs
+ $letter = $idx->{'psortas'};
+ $letter = $idx->{'primary'} if !$letter;
+ $letter = uc(substr($letter, 0, 1));
+
+ # symbols are a special case
+ if (($letter lt 'A') || ($letter gt 'Z')) {
+ if (($group eq '')
+ || (($group ge 'A') && ($group le 'Z'))) {
+ print OUT "</indexdiv>\n" if !$first;
+ print OUT "<indexdiv><title>$symbolsname</title>\n\n";
+ $group = $letter;
+ }
+ } elsif (($group eq '') || ($group ne $letter)) {
+ print OUT "</indexdiv>\n" if !$first;
+ print OUT "<indexdiv><title>$letter</title>\n\n";
+ $group = $letter;
+ }
+ }
+
+ $first = 0; # there can only be on first ;-)
+
+ print OUT "<indexentry>\n";
+ print OUT " <primaryie>", $idx->{'primary'};
+ $lastout = "primaryie";
+
+ if ($idx->{'secondary'}) {
+ print OUT "\n </primaryie>\n";
+ print OUT " <secondaryie>", $idx->{'secondary'};
+ $lastout = "secondaryie";
+ };
+
+ if ($idx->{'tertiary'}) {
+ print OUT "\n </secondaryie>\n";
+ print OUT " <tertiaryie>", $idx->{'tertiary'};
+ $lastout = "tertiaryie";
+ }
+ } elsif (!&tsame($last, $idx, 'secondary')) {
+ print "DIFF SEC\n" if $debug;
+
+ print OUT "\n </$lastout>\n" if $lastout;
+
+ foreach (@seealsos) {
+ # it'd be nice to make this a link...
+ print OUT $indent, " <seealsoie>", &escape($_), "</seealsoie>\n";
+ }
+ @seealsos = ();
+
+ print OUT " <secondaryie>", $idx->{'secondary'};
+ $lastout = "secondaryie";
+ if ($idx->{'tertiary'}) {
+ print OUT "\n </secondaryie>\n";
+ print OUT " <tertiaryie>", $idx->{'tertiary'};
+ $lastout = "tertiaryie";
+ }
+ } elsif (!&tsame($last, $idx, 'tertiary')) {
+ print "DIFF TERT\n" if $debug;
+
+ print OUT "\n </$lastout>\n" if $lastout;
+
+ foreach (@seealsos) {
+ # it'd be nice to make this a link...
+ print OUT $indent, " <seealsoie>", &escape($_), "</seealsoie>\n";
+ }
+ @seealsos = ();
+
+ if ($idx->{'tertiary'}) {
+ print OUT " <tertiaryie>", $idx->{'tertiary'};
+ $lastout = "tertiaryie";
+ }
+ }
+
+ &print_term($idx);
+
+ $last = $idx;
+}
+
+# Termcount is > 0 iff some entries were skipped.
+$quiet || print STDERR "$termcount entries ignored...\n";
+
+&end_entry();
+
+print OUT "</indexdiv>\n" if $lettergroups;
+print OUT "</$indextag>\n";
+
+close (OUT);
+
+$quiet || print STDERR "Done.\n";
+
+sub same {
+ my($a) = shift;
+ my($b) = shift;
+
+ my($aP) = $a->{'psortas'} || $a->{'primary'};
+ my($aS) = $a->{'ssortas'} || $a->{'secondary'};
+ my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
+
+ my($bP) = $b->{'psortas'} || $b->{'primary'};
+ my($bS) = $b->{'ssortas'} || $b->{'secondary'};
+ my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
+
+ my($same);
+
+ $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
+ $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
+ $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
+ $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
+ $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
+ $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
+
+# print "[$aP]=[$bP]\n";
+# print "[$aS]=[$bS]\n";
+# print "[$aT]=[$bT]\n";
+
+ # Two index terms are the same if:
+ # 1. the primary, secondary, and tertiary entries are the same
+ # (or have the same SORTAS)
+ # AND
+ # 2. They occur in the same titled section
+ # AND
+ # 3. They point to the same place
+ #
+ # Notes: Scope is used to suppress some entries, but can't be used
+ # for comparing duplicates.
+ # Interpretation of "the same place" depends on whether or
+ # not $linkpoints is true.
+
+ $same = (($aP eq $bP)
+ && ($aS eq $bS)
+ && ($aT eq $bT)
+ && ($a->{'title'} eq $b->{'title'})
+ && ($a->{'href'} eq $b->{'href'}));
+
+ # If we're linking to points, they're only the same if they link
+ # to exactly the same spot.
+ $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
+ if $linkpoints;
+
+ if ($same) {
+ warn "$me: duplicated index entry found: $aP $aS $aT\n";
+ }
+
+ $same;
+}
+
+sub tsame {
+ # Unlike same(), tsame only compares a single term
+ my($a) = shift;
+ my($b) = shift;
+ my($term) = shift;
+ my($sterm) = substr($term, 0, 1) . "sortas";
+ my($A, $B);
+
+ $A = $a->{$sterm} || $a->{$term};
+ $B = $b->{$sterm} || $b->{$term};
+
+ $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
+ $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
+
+ return $A eq $B;
+}
+
+sub end_entry {
+ # End any open elements...
+ print OUT "\n </$lastout>\n" if $lastout;
+
+ foreach (@seealsos) {
+ # it'd be nice to make this a link...
+ print OUT $indent, " <seealsoie>", &escape($_), "</seealsoie>\n";
+ }
+ @seealsos = ();
+
+ print OUT "</indexentry>\n\n";
+ $lastout = "";
+}
+
+sub print_term {
+ # Print out the links for an indexterm. There can be more than
+ # one if the term has a ZONE that points to more than one place.
+ # (do we do the right thing in that case?)
+ my($idx) = shift;
+ my($key, $indent, @hrefs);
+ my(%href) = ();
+ my(%phref) = ();
+
+ $indent = " ";
+
+ if ($idx->{'see'}) {
+ # it'd be nice to make this a link...
+ if ($lastout) {
+ print OUT "\n </$lastout>\n";
+ $lastout = "";
+ }
+ print OUT $indent, "<seeie>", &escape($idx->{'see'}), "</seeie>\n";
+ return;
+ }
+
+ if (keys %{$idx->{'zone'}}) {
+ foreach $key (keys %{$idx->{'zone'}}) {
+ $href{$key} = $idx->{'zone'}->{$key};
+ $phref{$key} = $key;
+ }
+ } else {
+ $href{$idx->{'href'}} = $idx->{'title'};
+ $phref{$idx->{'href'}} = $idx->{'hrefpoint'};
+ }
+
+ # We can't use <LINK> because we don't know the ID of the term in the
+ # original source (and, in fact, it might not have one).
+ print OUT ",\n";
+ @hrefs = keys %href;
+ while (@hrefs) {
+ my($linkend) = "";
+ my($role) = "";
+ $key = shift @hrefs;
+ if ($linkpoints) {
+ $linkend = $phref{$key};
+ } else {
+ $linkend = $key;
+ }
+
+ $role = $phref{$key};
+ $role = $1 if $role =~ /\#(.*)$/;
+ $role = $1 if $role =~ /(.*)\./;
+
+ print OUT $indent;
+ print OUT "<ulink url=\"$linkend\" role=\"$role\">";
+ print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
+ print OUT &escape($href{$key});
+ print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
+ print OUT "</ulink>";
+ }
+
+ if ($idx->{'seealso'}) {
+ push @seealsos, $idx->{'seealso'};
+ }
+}
+
+sub termsort {
+ my($aP) = $a->{'psortas'} || $a->{'primary'};
+ my($aS) = $a->{'ssortas'} || $a->{'secondary'};
+ my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
+ my($ap) = $a->{'count'};
+
+ my($bP) = $b->{'psortas'} || $b->{'primary'};
+ my($bS) = $b->{'ssortas'} || $b->{'secondary'};
+ my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
+ my($bp) = $b->{'count'};
+
+ $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
+ $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
+ $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
+ $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
+ $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
+ $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
+
+ if ($aP eq $bP) {
+ if ($aS eq $bS) {
+ if ($aT eq $bT) {
+ # make sure seealso's always sort to the bottom
+ return 1 if ($a->{'seealso'});
+ return -1 if ($b->{'seealso'});
+ # if everything else is the same, keep these elements
+ # in document order (so the index links are in the right
+ # order)
+ return $ap <=> $bp;
+ } else {
+ return $aT cmp $bT;
+ }
+ } else {
+ return $aS cmp $bS;
+ }
+ } else {
+ return $aP cmp $bP;
+ }
+}
+
+sub sortsymbols {
+ my(@term) = @_;
+ my(@new) = ();
+ my(@sym) = ();
+ my($letter);
+ my($idx);
+
+ # Move the non-letter things to the front. Should digits be thier
+ # own group? Maybe...
+ foreach $idx (@term) {
+ $letter = $idx->{'psortas'};
+ $letter = $idx->{'primary'} if !$letter;
+ $letter = uc(substr($letter, 0, 1));
+
+ if (($letter lt 'A') || ($letter gt 'Z')) {
+ push (@sym, $idx);
+ } else {
+ push (@new, $idx);
+ }
+ }
+
+ return (@sym, @new);
+}
+
+sub safe_open {
+ local(*OUT) = shift;
+ local(*F, $_);
+
+ if (($outfile ne '-') && (!$forceoutput)) {
+ my($handedit) = 1;
+ if (open (OUT, $outfile)) {
+ while (<OUT>) {
+ if (/<!-- Remove this comment if you edit this file by hand! -->/){
+ $handedit = 0;
+ last;
+ }
+ }
+ close (OUT);
+ } else {
+ $handedit = 0;
+ }
+
+ if ($handedit) {
+ print STDERR "$me: file \"$outfile\" appears to have been edited by hand\n";
+ print STDERR "Use the -f option or specify a different output file name.\n";
+ exit 1;
+ }
+ }
+
+ open (OUT, ">$outfile") || die "$me: could not open file \"$outfile\": $!\n";
+
+ if ($preamble) {
+ # Copy the preamble
+ if (open(F, $preamble)) {
+ while (<F>) {
+ print OUT $_;
+ }
+ close(F);
+ } else {
+ warn "$me: could not open preamble file \"$preamble\": $!\n";
+ }
+ }
+}
+
+sub escape {
+ # make sure & and < don't show up in the index
+ local $_ = shift;
+ s/&/&amp;/sg;
+ s/</&lt;/sg;
+ s/>/&gt;/sg; # what the heck
+
+ return $_;
+}
+
+
+
+=head1 EXAMPLE
+
+B<collateindex.pl> B<-o> F<index.sgml> F<HTML.index>
+
+=head1 EXIT STATUS
+
+=over 5
+
+=item B<0>
+
+Success
+
+=item B<1>
+
+Failure
+
+=back
+
+=head1 AUTHOR
+
+Norm Walsh E<lt>ndw@nwalsh.comE<gt>
+
+Minor updates by Adam Di Carlo E<lt>adam@onshore.comE<gt> and Peter Eisentraut E<lt>peter_e@gmx.netE<gt>
+
+=cut
+
diff --git a/bin/collateindex.pl.1 b/bin/collateindex.pl.1
new file mode 100644
index 0000000..a1b7020
--- /dev/null
+++ b/bin/collateindex.pl.1
@@ -0,0 +1,214 @@
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.14
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sh \" Subsection heading
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. | will give a
+.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
+.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
+.\" expand to `' in nroff, nothing in troff, for use with C<>.
+.tr \(*W-|\(bv\*(Tr
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. nr % 0
+. rr F
+.\}
+.\"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.hy 0
+.if n .na
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "COLLATEINDEX 1"
+.TH COLLATEINDEX 1 "2004-11-04" "docbook-dsssl 1.79" "DocBook DSSSL"
+.SH "NAME"
+collateindex.pl \- generate DocBook index files
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+\&\fBcollateindex.pl\fR [\fB\-f\fR] [\fB\-g\fR] [\fB\-i\fR \fIid\fR] [\fB\-I\fR \fIscope\fR] [\fB\-N\fR]
+ [\fB\-o\fR \fIfile\fR] [\fB\-p\fR] [\fB\-P\fR \fIfile\fR] [\fB\-q\fR] [\fB\-s\fR \fIname\fR]
+ [\fB\-S\fR \fIscope\fR] [\fB\-t\fR \fIname\fR] [\fB\-x\fR] \fIfile\fR
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+\&\fBcollateindex.pl\fR creates index data for DocBook \s-1XML\s0 or \s-1SGML\s0 files.
+.SH "OPTIONS"
+.IX Header "OPTIONS"
+.IP "\fB\-f\fR" 5
+.IX Item "-f"
+Force the output file to be written, even if it appears to have been
+edited by hand.
+.IP "\fB\-g\fR" 5
+.IX Item "-g"
+Group terms with IndexDiv based on the first letter of the term (or
+its SortAs attribute). (This might not handle all language environments.)
+.IP "\fB\-i\fR \fIid\fR" 5
+.IX Item "-i id"
+The \s-1ID\s0 to use for the <index> tag.
+.IP "\fB\-I\fR \fIscope\fR" 5
+.IX Item "-I scope"
+The implied scope, must be \f(CW\*(C`all\*(C'\fR, \f(CW\*(C`local\*(C'\fR, or \f(CW\*(C`global\*(C'\fR. IndexTerms
+which do not specify a scope will have the implied scope. If
+unspecified, \f(CW\*(C`all\*(C'\fR is assumed.
+.IP "\fB\-N\fR" 5
+.IX Item "-N"
+New index (generates an empty index file).
+.IP "\fB\-o\fR \fIfile\fR" 5
+.IX Item "-o file"
+Output to \fIfile\fR. Defaults to \fIstdout\fR.
+.IP "\fB\-p\fR" 5
+.IX Item "-p"
+Link to points in the document. The default is to link to the closest
+containing section.
+.IP "\fB\-P\fR \fIfile\fR" 5
+.IX Item "-P file"
+Read a preamble from \fIfile\fR. The contents of \fIfile\fR will be
+inserted before the <index> tag.
+.IP "\fB\-q\fR" 5
+.IX Item "-q"
+Run quietly.
+.IP "\fB\-s\fR \fIname\fR" 5
+.IX Item "-s name"
+Name the IndexDiv that contains symbols. The default is \f(CW\*(C`Symbols\*(C'\fR.
+Meaningless if \fB\-g\fR is not used.
+.IP "\fB\-S\fR \fIscope\fR" 5
+.IX Item "-S scope"
+Scope of the index, must be \f(CW\*(C`all\*(C'\fR, \f(CW\*(C`local\*(C'\fR, or \f(CW\*(C`global\*(C'\fR. If
+unspecified, \f(CW\*(C`all\*(C'\fR is assumed.
+.IP "\fB\-t\fR \fIname\fR" 5
+.IX Item "-t name"
+Title for the index.
+.IP "\fB\-x\fR" 5
+.IX Item "-x"
+Make a SetIndex.
+.IP "\fB\-V\fR" 5
+.IX Item "-V"
+Print version number and exit.
+.IP "\fIfile\fR" 5
+.IX Item "file"
+The file containing index data generated with the DocBook \s-1DSSSL\s0
+\&\s-1HTML\s0 stylesheet (usually called \fI\s-1HTML\s0.index\fR).
+.SH "EXAMPLE"
+.IX Header "EXAMPLE"
+\&\fBcollateindex.pl\fR \fB\-o\fR \fIindex.sgml\fR \fI\s-1HTML\s0.index\fR
+.SH "EXIT STATUS"
+.IX Header "EXIT STATUS"
+.IP "\fB0\fR" 5
+.IX Item "0"
+Success
+.IP "\fB1\fR" 5
+.IX Item "1"
+Failure
+.SH "AUTHOR"
+.IX Header "AUTHOR"
+Norm Walsh <ndw@nwalsh.com>
+.PP
+Minor updates by Adam Di Carlo <adam@onshore.com> and Peter Eisentraut <peter_e@gmx.net>