#!/usr/bin/perl # Read the source-form of the NASM manual and generate the various # output forms. # TODO: # # Ellipsis support would be nice. # Source-form features: # --------------------- # # Bullet \b # Bullets the paragraph. Rest of paragraph is indented to cope. In # HTML, consecutive groups of bulleted paragraphs become unordered # lists. # # Emphasis \e{foobar} # produces `_foobar_' in text and italics in HTML, PS, RTF # # Inline code \c{foobar} # produces ``foobar'' in text, and fixed-pitch font in HTML, PS, RTF # # Display code # \c line one # \c line two # produces fixed-pitch font where appropriate, and doesn't break # pages except sufficiently far into the middle of a display. # # Chapter, header and subheader # \C{intro} Introduction # \H{whatsnasm} What is NASM? # \S{free} NASM Is Free # dealt with as appropriate. Chapters begin on new sides, possibly # even new _pages_. (Sub)?headers are good places to begin new # pages. Just _after_ a (sub)?header isn't. # The keywords can be substituted with \K and \k. # # Keyword \K{cintro} \k{cintro} # Expands to `Chapter 1', `Section 1.1', `Section 1.1.1'. \K has an # initial capital whereas \k doesn't. In HTML, will produce # hyperlinks. # # Web link \W{http://foobar/}{text} or \W{mailto:me@here}\c{me@here} # the \W prefix is ignored except in HTML; in HTML the last part # becomes a hyperlink to the first part. # # Literals \{ \} \\ # In case it's necessary, they expand to the real versions. # # Nonbreaking hyphen \- # Need more be said? # # Source comment \# # Causes everything after it on the line to be ignored by the # source-form processor. # # Indexable word \i{foobar} (or \i\e{foobar} or \i\c{foobar}, equally) # makes word appear in index, referenced to that point # \i\c comes up in code style even in the index; \i\e doesn't come # up in emphasised style. # # Indexable non-displayed word \I{foobar} or \I\c{foobar} # just as \i{foobar} except that nothing is displayed for it # # Index rewrite # \IR{foobar} \c{foobar} operator, uses of # tidies up the appearance in the index of something the \i or \I # operator was applied to # # Index alias # \IA{foobar}{bazquux} # aliases one index tag (as might be supplied to \i or \I) to # another, so that \I{foobar} has the effect of \I{bazquux}, and # \i{foobar} has the effect of \I{bazquux}foobar # # Metadata # \M{key}{something} # defines document metadata, such as authorship, title and copyright; # different output formats use this differently. # $diag = 1, shift @ARGV if $ARGV[0] eq "-d"; $| = 1; $tstruct_previtem = $node = "Top"; $nodes = ($node); $tstruct_level{$tstruct_previtem} = 0; $tstruct_last[$tstruct_level{$tstruct_previtem}] = $tstruct_previtem; $MAXLEVEL = 10; # really 3, but play safe ;-) # Read the file; pass a paragraph at a time to the paragraph processor. print "Reading input..."; $pname = "para000000"; @pnames = @pflags = (); $para = undef; while (<>) { chomp; if (!/\S/ || /^\\(IA|IR|M)/) { # special case: \IA \IR \M imply new-paragraph &got_para($para); $para = undef; } if (/\S/) { s/\\#.*$//; # strip comments $para .= " " . $_; } } &got_para($para); print "done.\n"; # Now we've read in the entire document and we know what all the # heading keywords refer to. Go through and fix up the \k references. print "Fixing up cross-references..."; &fixup_xrefs; print "done.\n"; # Sort the index tags, according to the slightly odd order I've decided on. print "Sorting index tags..."; &indexsort; print "done.\n"; if ($diag) { print "Writing index-diagnostic file..."; &indexdiag; print "done.\n"; } # OK. Write out the various output files. print "Producing text output: "; &write_txt; print "done.\n"; print "Producing HTML output: "; &write_html; print "done.\n"; print "Producing Texinfo output: "; &write_texi; print "done.\n"; print "Producing WinHelp output: "; &write_hlp; print "done.\n"; print "Producing Documentation Intermediate Paragraphs: "; &write_dip; print "done.\n"; sub got_para { local ($_) = @_; my $pflags = "", $i, $w, $l, $t; return if !/\S/; @$pname = (); # Strip off _leading_ spaces, then determine type of paragraph. s/^\s*//; $irewrite = undef; if (/^\\c[^{]/) { # A code paragraph. The paragraph-array will contain the simple # strings which form each line of the paragraph. $pflags = "code"; while (/^\\c (([^\\]|\\[^c])*)(.*)$/) { $l = $1; $_ = $3; $l =~ s/\\{/{/g; $l =~ s/\\}/}/g; $l =~ s/\\\\/\\/g; push @$pname, $l; } $_ = ''; # suppress word-by-word code } elsif (/^\\C/) { # A chapter heading. Define the keyword and allocate a chapter # number. $cnum++; $hnum = 0; $snum = 0; $xref = "chapter-$cnum"; $pflags = "chap $cnum :$xref"; die "badly formatted chapter heading: $_\n" if !/^\\C{([^}]*)}\s*(.*)$/; $refs{$1} = "chapter $cnum"; $node = "Chapter $cnum"; &add_item($node, 1); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\A/) { # An appendix heading. Define the keyword and allocate an appendix # letter. $cnum++; $cnum = 'A' if $cnum =~ /[0-9]+/; $hnum = 0; $snum = 0; $xref = "appendix-$cnum"; $pflags = "appn $cnum :$xref"; die "badly formatted appendix heading: $_\n" if !/^\\A{([^}]*)}\s*(.*)$/; $refs{$1} = "appendix $cnum"; $node = "Appendix $cnum"; &add_item($node, 1); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\H/) { # A major heading. Define the keyword and allocate a section number. $hnum++; $snum = 0; $xref = "section-$cnum.$hnum"; $pflags = "head $cnum.$hnum :$xref"; die "badly formatted heading: $_\n" if !/^\\[HP]{([^}]*)}\s*(.*)$/; $refs{$1} = "section $cnum.$hnum"; $node = "Section $cnum.$hnum"; &add_item($node, 2); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\S/) { # A sub-heading. Define the keyword and allocate a section number. $snum++; $xref = "section-$cnum.$hnum.$snum"; $pflags = "subh $cnum.$hnum.$snum :$xref"; die "badly formatted subheading: $_\n" if !/^\\S{([^}]*)}\s*(.*)$/; $refs{$1} = "section $cnum.$hnum.$snum"; $node = "Section $cnum.$hnum.$snum"; &add_item($node, 3); $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node; $xrefs{$1} = $xref; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\IR/) { # An index-rewrite. die "badly formatted index rewrite: $_\n" if !/^\\IR{([^}]*)}\s*(.*)$/; $irewrite = $1; $_ = $2; # the standard word-by-word code will happen next } elsif (/^\\IA/) { # An index-alias. die "badly formatted index alias: $_\n" if !/^\\IA{([^}]*)}{([^}]*)}\s*$/; $idxalias{$1} = $2; return; # avoid word-by-word code } elsif (/^\\M/) { # Metadata die "badly formed metadata: $_\n" if !/^\\M{([^}]*)}{([^}]*)}\s*$/; $metadata{$1} = $2; return; # avoid word-by-word code } elsif (/^\\b/) { # A bulleted paragraph. Strip off the initial \b and let the # word-by-word code take care of the rest. $pflags = "bull"; s/^\\b\s*//; } else { # A normal paragraph. Just set $pflags: the word-by-word code does # the rest. $pflags = "norm"; } # The word-by-word code: unless @$pname is already defined (which it # will be in the case of a code paragraph), split the paragraph up # into words and push each on @$pname. # # Each thing pushed on @$pname should have a two-character type # code followed by the text. # # Type codes are: # "n " for normal # "da" for a dash # "es" for first emphasised word in emphasised bit # "e " for emphasised in mid-emphasised-bit # "ee" for last emphasised word in emphasised bit # "eo" for single (only) emphasised word # "c " for code # "k " for cross-ref # "kK" for capitalised cross-ref # "w " for Web link # "wc" for code-type Web link # "x " for beginning of resolved cross-ref; generates no visible output, # and the text is the cross-reference code # "xe" for end of resolved cross-ref; text is same as for "x ". # "i " for point to be indexed: the text is the internal index into the # index-items arrays # "sp" for space while (/\S/) { s/^\s*//, push @$pname, "sp" if /^\s/; $indexing = $qindex = 0; if (/^(\\[iI])?\\c/) { $qindex = 1 if $1 eq "\\I"; $indexing = 1, s/^\\[iI]// if $1; s/^\\c//; die "badly formatted \\c: \\c$_\n" if !/{(([^\\}]|\\.)*)}(.*)$/; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; (push @$pname,"i"),$lastp = $#$pname if $indexing; push @$pname,"c $w" if !$qindex; $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing; } elsif (/^\\[iIe]/) { /^(\\[iI])?(\\e)?/; $emph = 0; $qindex = 1 if $1 eq "\\I"; $indexing = 1, $type = "\\i" if $1; $emph = 1, $type = "\\e" if $2; s/^(\\[iI])?(\\e?)//; die "badly formatted $type: $type$_\n" if !/{(([^\\}]|\\.)*)}(.*)$/; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; $t = $emph ? "es" : "n "; @ientry = (); (push @$pname,"i"),$lastp = $#$pname if $indexing; foreach $i (split /\s+/,$w) { # \e and \i can be multiple words push @$pname,"$t$i","sp" if !$qindex; ($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp" if $indexing; $t = $emph ? "e " : "n "; } $w =~ tr/A-Z/a-z/, pop @ientry if $indexing; $$pname[$lastp] = &addidx($node, $w, @ientry) if $indexing; pop @$pname if !$qindex; # remove final space if (substr($$pname[$#$pname],0,2) eq "es" && !$qindex) { substr($$pname[$#$pname],0,2) = "eo"; } elsif ($emph && !$qindex) { substr($$pname[$#$pname],0,2) = "ee"; } } elsif (/^\\[kK]/) { $t = "k "; $t = "kK" if /^\\K/; s/^\\[kK]//; die "badly formatted \\k: \\c$_\n" if !/{([^}]*)}(.*)$/; $_ = $2; push @$pname,"$t$1"; } elsif (/^\\W/) { s/^\\W//; die "badly formatted \\W: \\W$_\n" if !/{([^}]*)}(\\i)?(\\c)?{(([^\\}]|\\.)*)}(.*)$/; $l = $1; $w = $4; $_ = $6; $t = "w "; $t = "wc" if $3 eq "\\c"; $indexing = 1 if $2; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; (push @$pname,"i"),$lastp = $#$pname if $indexing; push @$pname,"$t<$l>$w"; $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing; } else { die "what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/; die "painful death! $_\n" if !length $1; $w = $1; $_ = $3; $w =~ s/\\{/{/g; $w =~ s/\\}/}/g; $w =~ s/\\-/-/g; $w =~ s/\\\\/\\/g; if ($w eq "-") { push @$pname,"da"; } else { push @$pname,"n $w"; } } } if ($irewrite ne undef) { &addidx(undef, $irewrite, @$pname); @$pname = (); } else { push @pnames, $pname; push @pflags, $pflags; $pname++; } } sub addidx { my ($node, $text, @ientry) = @_; $text = $idxalias{$text} || $text; if ($node eq undef || !$idxmap{$text}) { @$ientry = @ientry; $idxmap{$text} = $ientry; $ientry++; } if ($node) { $idxnodes{$node,$text} = 1; return "i $text"; } } sub indexsort { my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval; @itags = map { # get back the original data as the 1st elt of each list $_->[0] } sort { # compare auxiliary (non-first) elements of lists $a->[1] cmp $b->[1] || $a->[2] cmp $b->[2] || $a->[0] cmp $b->[0] } map { # transform array into list of 3-element lists my $ientry = $idxmap{$_}; my $a = substr($$ientry[0],2); $a =~ tr/A-Za-z//cd; [$_, uc($a), substr($$ientry[0],0,2)] } keys %idxmap; # Having done that, check for comma-hood. $cval = 0; foreach $iitem (@itags) { $ientry = $idxmap{$iitem}; $clrcval = 1; $pcval = $cval; FL:for ($i=0; $i <= $#$ientry; $i++) { if ($$ientry[$i] =~ /^(n .*,)(.*)/) { $$ientry[$i] = $1; splice @$ientry,$i+1,0,"n $2" if length $2; $commapos{$iitem} = $i+1; $cval = join("\002", @$ientry[0..$i]); $clrcval = 0; last FL; } } $cval = undef if $clrcval; $commanext{$iitem} = $commaafter{$piitem} = 1 if $cval and ($cval eq $pcval); $piitem = $iitem; } } sub indexdiag { my $iitem,$ientry,$w,$ww,$foo,$node; open INDEXDIAG,">index.diag"; foreach $iitem (@itags) { $ientry = $idxmap{$iitem}; print INDEXDIAG "<$iitem> "; foreach $w (@$ientry) { $ww = &word_txt($w); print INDEXDIAG $ww unless $ww eq "\001"; } print INDEXDIAG ":"; $foo = " "; foreach $node (@nodes) { (print INDEXDIAG $foo,$node), $foo = ", " if $idxnodes{$node,$iitem}; } print INDEXDIAG "\n"; } close INDEXDIAG; } sub fixup_xrefs { my $pname, $p, $i, $j, $k, $caps, @repl; for ($p=0; $p<=$#pnames; $p++) { next if $pflags[$p] eq "code"; $pname = $pnames[$p]; for ($i=$#$pname; $i >= 0; $i--) { if ($$pname[$i] =~ /^k/) { $k = $$pname[$i]; $caps = ($k =~ /^kK/); $k = substr($k,2); $repl = $refs{$k}; die "undefined keyword `$k'\n" unless $repl; substr($repl,0,1) =~ tr/a-z/A-Z/ if $caps; @repl = (); push @repl,"x $xrefs{$k}"; foreach $j (split /\s+/,$repl) { push @repl,"n $j"; push @repl,"sp"; } pop @repl; # remove final space push @repl,"xe$xrefs{$k}"; splice @$pname,$i,1,@repl; } } } } sub write_txt { # This is called from the top level, so I won't bother using # my or local. # Open file. print "writing file..."; open TEXT,">nasmdoc.txt"; select TEXT; # Preamble. $title = "The Netwide Assembler: NASM"; $spaces = ' ' x ((75-(length $title))/2); ($underscore = $title) =~ s/./=/g; print "$spaces$title\n$spaces$underscore\n"; for ($para = 0; $para <= $#pnames; $para++) { $pname = $pnames[$para]; $pflags = $pflags[$para]; $ptype = substr($pflags,0,4); print "\n"; # always one of these before a new paragraph if ($ptype eq "chap") { # Chapter heading. "Chapter N: Title" followed by a line of # minus signs. $pflags =~ /chap (.*) :(.*)/; $title = "Chapter $1: "; foreach $i (@$pname) { $ww = &word_txt($i); $title .= $ww unless $ww eq "\001"; } print "$title\n"; $title =~ s/./-/g; print "$title\n"; } elsif ($ptype eq "appn") { # Appendix heading. "Appendix N: Title" followed by a line of # minus signs. $pflags =~ /appn (.*) :(.*)/; $title = "Appendix $1: "; foreach $i (@$pname) { $ww = &word_txt($i); $title .= $ww unless $ww eq "\001"; } print "$title\n"; $title =~ s/./-/g; print "$title\n"; } elsif ($ptype eq "head" || $ptype eq "subh") { # Heading or subheading. Just a number and some text. $pflags =~ /.... (.*) :(.*)/; $title = sprintf "%6s ", $1; foreach $i (@$pname) { $ww = &word_txt($i); $title .= $ww unless $ww eq "\001"; } print "$title\n"; } elsif ($ptype eq "code") { # Code paragraph. Emit each line with a seven character indent. foreach $i (@$pname) { warn "code line longer than 68 chars: $i\n" if length $i > 68; print ' 'x7, $i, "\n"; } } elsif ($ptype eq "bull" || $ptype eq "norm") { # Ordinary paragraph, optionally bulleted. We wrap, with ragged # 75-char right margin and either 7 or 11 char left margin # depending on bullets. if ($ptype eq "bull") { $line = ' 'x7 . '(*) '; $next = ' 'x11; } else { $line = $next = ' 'x7; } @a = @$pname; $wd = $wprev = ''; do { do { $w = &word_txt(shift @a) } while $w eq "\001"; # nasty hack $wd .= $wprev; if ($wprev =~ /-$/ || $w eq ' ' || $w eq '' || $w eq undef) { if (length ($line . $wd) > 75) { $line =~ s/\s*$//; # trim trailing spaces print "$line\n"; $line = $next; $wd =~ s/^\s*//; # trim leading spaces } $line .= $wd; $wd = ''; } $wprev = $w; } while ($w ne '' && $w ne undef); if ($line =~ /\S/) { $line =~ s/\s*$//; # trim trailing spaces print "$line\n"; } } } # Close file. select STDOUT; close TEXT; } sub word_txt { my ($w) = @_; my $wtype, $wmajt; return undef if $w eq '' || $w eq undef; $wtype = substr($w,0,2); $wmajt = substr($wtype,0,1); $w = substr($w,2); $w =~ s/<.*>// if $wmajt eq "w"; # remove web links if ($wmajt eq "n" || $wtype eq "e " || $wtype eq "w ") { return $w; } elsif ($wtype eq "sp") { return ' '; } elsif ($wtype eq "da") { return '-'; } elsif ($wmajt eq "c" || $wtype eq "wc") { return "`${w}'"; } elsif ($wtype eq "es") { return "_${w}"; } elsif ($wtype eq "ee") { return "${w}_"; } elsif ($wtype eq "eo") { return "_${w}_"; } elsif ($wmajt eq "x" || $wmajt eq "i") { return "\001"; } else { die "panic in word_txt: $wtype$w\n"; } } sub write_html { # This is called from the top level, so I won't bother using # my or local. # Write contents file. Just the preamble, then a menu of links to the # separate chapter files and the nodes therein. print "writing contents file..."; open TEXT,">nasmdoc0.html"; select TEXT; &html_preamble(0); print "
This manual documents NASM, the Netwide Assembler: an assembler\n"; print "targetting the Intel x86 series of processors, with portable source.\n"; print "
"; for ($node = $tstruct_next{'Top'}; $node; $node = $tstruct_next{$node}) { if ($tstruct_level{$node} == 1) { # Invent a file name. ($number = lc($xrefnodes{$node})) =~ s/.*-//; $fname="nasmdocx.html"; substr($fname,8 - length $number, length $number) = $number; $html_fnames{$node} = $fname; $link = $fname; print "
";
} else {
# Use the preceding filename plus a marker point.
$link = $fname . "#$xrefnodes{$node}";
}
$title = "$node: ";
$pname = $tstruct_pname{$node};
foreach $i (@$pname) {
$ww = &word_html($i);
$title .= $ww unless $ww eq "\001";
}
print "$title
\n";
}
print "
Index\n"; print "