3 # Copyright 1994-1996 Werner Almesberger.
6 # See file COPYING for details.
8 #-----------------------------------------------------------------------------
12 # Usually doesn't check for prepended backslashes, e.g. things like
13 # \\begin{verbatim} would be processed incorrectly.
15 # Tokenization should be done once at the beginning, not on the fly
16 # with cleanup and check procedures at the end of each step.
18 #-----------------------------------------------------------------------------
24 $m{"\\\\ldots"} = "...";
28 print STDERR "[".length($t)."] Reading the file\n";
34 $N = "\000"; # non-character
35 $X = "\007"; # generic marker
36 $Y = "\010"; # another generic marker
37 $Z = "\011"; # yet another generic marker
40 $BS = "\013"; # second begin
41 $ES = "\014"; # second end
42 $CO = "\003"; # curly open
43 $CC = "\004"; # curly close
45 # commands to the output formatter
47 $SI = "\020"; # increase indentation by one
48 $SO = "\021"; # decrease indentation by one
49 $B1 = "\022"; # one blank line
50 $B2 = "\023"; # two blank lines
57 $l =~ s/\\([_~&%^\$\#\[\]|\-])/\1/g;# unescape special characters
58 $l =~ s/\\,//g; # remove small spaces
59 $l =~ s/\\backslash */$X/g; # \backslash ->\
60 if ($l =~ /\\([A-Za-z]+|.)/) {
61 warn "unrecognized command $& ($l)";
62 $l = $`."\n!!! UNRECOGNIZED COMMAND: $&\n$'";
65 $l =~ tr/{}//d; # delete stray curly braces
66 $l =~ s/$CO/{/g; # put escaped braces back
75 print STDERR "[".length($t)."] Loading macros\n";
76 while ($t =~ /\n%%(def|cmd)([^\n]*)\n/) {
79 $2 =~ /([^\\])=/ || die "= missing in $2";
90 # remove %%beginskip ... %%endskip pairs
92 print STDERR "[".length($t)."] Removing %%beginskip ... %%endskip pairs\n";
93 while ($t =~ /\n%%beginskip\s*\n/) { $t = $`.$B.$'; }
94 while ($t =~ /\n%%endskip\s*\n/) { $t = $`.$E.$'; }
95 while ($t =~ /$B[^$B$E]*$E/) { $t = $`."\n".$'; }
96 $t !~ /[$B$E]/ || die "%%beginskip/%%endskip mismatch";
100 print STDERR "[".length($t)."] Processing macros (may take a while)\n";
107 eval "\$t = \$`.\"$m{$_}\".\$';";
110 eval "\$t = \$`.$c{$_}.\$';";
112 die "syntax error: $@" if $@;
116 print STDERR "[".length($t)."] "." next pass\n";
117 # perfectionist's approach:
121 # if (length($&) > $l) {
128 # $t =~ /$i/ || die "internal error";
129 # eval "\$t = \$`.\"$m{$i}\".\$'";
130 # die "syntax error: $@" if $@;
131 # print STDERR "[".length($t)."] "."$i\n";
134 # handle verbatim sections (we're not trying to be perfect here)
136 print STDERR "[".length($t)."] Handling verbatim sections\n";
137 while ($t =~ /\\begin{verbatim}([ \t]*\n)?/) { $t = $`."\n\n".$B.$'; }
138 while ($t =~ /\\end{verbatim}([ \t]*\n)?/) { $t = $`.$E."\n\n".$'; }
139 while ($t =~ /\\verb([^a-zA-Z \t\n])/ && $t =~ /\\verb$1([^$1]*)$1/) {
142 while ($t =~ /$B([^$B$E]*)$E/) {
143 ($a,$b,$c) = ($`,$1,$');
144 die "no support for \\t yet, sorry" if $b =~ /\t/;
145 $b =~ s/\\/\\backslash /g;
146 $b =~ s/[~^_%#&{}\$\-]/\\$&/g;
147 $b =~ s/[`']/\\$&~/g;
149 $b =~ s/\n\n\n/$B2/g;
154 if ($t =~ /[$B$E]/) {
155 if ($t =~ /..........[$B$E]........../) { print STDERR "$&\n"; }
156 die "verbatim conflict";
159 # hide escaped curly braces
161 print STDERR "[".length($t)."] Hiding escaped curly braces\n";
165 # discard comments and italic corrections
167 print STDERR "[".length($t)."] Discarding comments and italic corrections\n";
168 while ($t =~ s/([^\\])%[^\n]*\n/$1/g) {};
173 print STDERR "[".length($t)."] No math mode\n";
174 while ($t =~ s/([^\\])\$/$1/g) {};
176 # remove tabs and massage blanks
178 print STDERR "[".length($t)."] Removing tabs and massaging blanks\n";
179 $t =~ s/\\ / /g; # \cmd\ blah
182 # various minor issues
184 print STDERR "[".length($t)."] Dealing with various minor issues\n";
185 $t =~ s/\\rightarrow\s*/->/g;
186 $t =~ s/\\quad\s*/~/g;
187 $t =~ s/\\qquad\s*/~~/g;
190 $t =~ s/\\LaTeX/LaTeX/g;
193 $t =~ s/\\protect//g;
194 $t =~ s/\\newpage\s*//g;
197 while ($t =~ /\\cite{([^}]+)}/) {
200 for (split(",",$1)) {
201 if (defined $cite{$_}) { $t .= "$cite{$_},"; }
203 $cite{$_} = ++$citation;
204 $bibref[$citation] = $_;
206 die "unmatched ref $_" unless $after =~ /\\bibitem{$_}/;
207 $after = $`."\\item[\[$citation\]] ".$';
214 s/\\begin{thebibliography}{[^}]*}/\\section{References}\\begin{description}/;
215 $t =~ s/\\end{thebibliography/\\end{description}/;
219 print STDERR "[".length($t)."] Handling footnotes\n";
220 $t =~ s/\\footnote{/\\footnotemark\\footnotetext{/g;
221 $t =~ s/\\footnotemark/$X/g;
222 $t =~ s/\\footnotetext{/$Y/g;
223 while ($t =~ /$X([^$Y]*)$Y/) {
224 ($a,$b,$c) = ($`,$',$1);
225 $t =~ /^[^$Y]*$Y$B1/;
227 for ($s = "*"; $d =~ /$Z/; $d = $`.$Y.$') { $s .= "*"; }
229 while ($b =~ /^([^}]*){([^{}]*)}/) { $b = $`.$1.$B.$2.$E.$'; }
230 $b =~ /^([^{}]*)}/ || die "{ } confusion";
234 $d = "$B1$Z\\begin{description}\\item[$s] $b\\end{description}$B1";
235 if ($t =~ /$B1([^$Z][^$N]*)$/) { $t = $`.$d.$1; }
240 if ($t =~ /[$X$Y$Z$B$E]/) {
241 if ($t =~ /..............[$X$Y$Z$B$E]/) { print STDERR "HEY $&\n"; }
242 die "footnote confusion";
245 # process simple tables ...
247 print STDERR "[".length($t)."] Processing simple tables\n";
248 while ($t =~ /\\begin{tabular}/) { $t = $`.$B.$'; }
249 while ($t =~ /\\end{tabular}/) { $t = $`.$E.$'; }
250 while ($t =~ /$B\{([rlc|]+)\}([^$B$E]*)$E/) {
251 ($a,$b,$c,$d) = ($`,$',$2,$1);
253 $c =~ s/[\s\n]*\\hline[\s\n]*/$X&/g;
254 ($e = $d) =~ tr/|//cd;
256 while ($d =~ /^(\|*)[a-z](\|*)/) {
261 while ($c =~ /([^\\])&/) {
274 if ($w[$f] < length($_)) { $w[$f] = length($_); }
277 $l = @d+2*length($e)-1;
278 for (@w) { $l += $_; }
282 if ($_ eq $X) { $a .= ("-" x $l)."\\\\"; }
285 if ($d[$f] =~ /^\|/) { $a .= "| "; }
286 $g = $w[$f]-length($_);
287 if ($d[$f] =~ /l/) { $a .= $_.("~" x $g); }
289 $a .= ("~" x int($g/2)).$_.("~" x ($g-int($g/2)));
291 if ($d[$f] =~ /r/) { $a .= ("~" x $g).$_; }
293 if ($d[$f] =~ /\|$/) { $a .= "| "; }
294 if ($f == $#d) { $a .= "\\\\"; }
300 if ($t =~ /[$B$E$X]/) {
301 if ($t =~ /(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)[$B$E$X](.|\n)(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)/) { print STDERR "$&\n"; }
302 die "\\begin/end{tabular} mismatch";
307 print STDERR "[".length($t)."] Formatting lists\n";
308 while ($t =~ /\\begin{itemize}\s*/) { $t = $`.$B.$'; }
309 while ($t =~ /\\end{itemize}\s*/) { $t = $`.$E.$'; }
310 while ($t =~ /$B[^$B$E]*$E/) {
311 ($a,$b,$c) = ($`,$&,$');
312 while ($b =~ /\\item\s*/) { $b = $`.$X.$'; }
313 while ($b =~ /$X([^$X]*)([$X$E])/) {
314 $b = $`."- ".$SI.$SI.$1.$SO.$SO."\\\\"."$2".$';
316 $b =~ /$B([^$B$E]*)$E/;
317 $t = $a.$SI.$SI.$B1.$1.$SO.$SO."$B1".$c;
319 $t !~ /[$B$E]/ || die "\\begin/\\end{itemize} mismatch";
320 while ($t =~ /\\begin{description}\s*/) { $t = $`.$B.$'; }
321 while ($t =~ /\\end{description}\s*/) { $t = $`.$E.$'; }
322 while ($t =~ /$B[^$B$E]*$E/) {
323 ($a,$b,$c) = ($`,$&,$');
324 while ($b =~ /\\item\[/) { $b = $`.$X."[".$'; }
325 while ($b =~ /$X\[/) {
327 while ($e =~ s/\[([^\[\]]*)\]/$BS$1$ES/g) {};
328 $e =~ /^([^\[\]]*)]\s*([^$X]*)([$X$E])/ || die "\item problem (1)";
329 $b = $d.$1."~~".$SI.$SI.$2.$SO.$SO."\\\\".$3.$';
333 $b =~ /$B([^$B$E]*)$E/;
334 $t = $a.$SI.$SI.$B1.$1.$SO.$SO.$B1.$c;
336 $t !~ /[$X]/ || die "\item problem (2)";
337 $t !~ /[$B$E]/ || die "\\begin/\\end{description} mismatch";
341 print STDERR "[".length($t)."] Removing figures\n";
342 while ($t =~ /\\begin{figure}\s*/) { $t = $`.$B.$'; }
343 while ($t =~ /\\end{figure}\s*/) { $t = $`.$E.$'; }
344 while ($t =~ /$B[^$B$E]*$E/) {
345 ($a,$b,$c) = ($`,$&,$');
347 if ($b =~ /\\label{([^}]*)}/) {
351 if ($b =~ /\\caption{([^}]*)}/) {
358 # process sections and labels
360 print STDERR "[".length($t)."] Processing sections and labels\n";
361 $t =~ s/\\begin{abstract}/\\section{Abstract}/g;
362 $t =~ s/\\end{abstract}//g;
363 $LB = "\005"; # they don't necessarily have to be unique
365 while ($t =~ /\\label{/) { $t = $`.$LB."{".$'; }
366 while ($t =~ /\\((sub)*)section\*?{/) { $t = $`.$SC.$1."{".$'; }
369 if ($t =~ /^([^$LB$SC]*)$LB\{([^{}]*)\}/) {
373 if ($t =~ /$SC((sub)*){/) {
374 ($a,$b,$c) = ($`,$',$1);
375 while ($b =~ /^([^}]*){([^{}]*)}/) { $b = $`.$1.$B.$2.$E.$'; }
376 $b =~ /^([^{}]*)}\s*/ || die "{ } confusion";
382 if (($u = ("=","-","- ","")[length($c)/3]) ne "") {
383 $u = "\\\\".substr($u x length($b),0,length($b));
385 $t = $a.$B2.$b.$u.$B1.$d;
394 print STDERR "[".length($t)."] Handling references\n";
395 $t =~ s/[Pp]age \\pageref({[^{}]*})/\\ref$1/g;
396 $t =~ s/\\pageref{[^{}]*}/???/g;
397 while ($t =~ /\\ref{([^{}]*)}/) {
398 $t = $`.(defined($l{$1}) ? $l{$1} : "???").$';
401 # collapse whitespace
403 print STDERR "[".length($t)."] Collapsing whitespace\n";
404 $t =~ s/\\par\s*/\n\n/g;
405 $t =~ s/ *(\n+) */$1/g;
407 $t =~ tr/ \t/ /s; # again
411 print STDERR "[".length($t)."] Handling line breaks\n";
414 $t =~ s/\\par\s*/$B1/g;
416 # handle accents, umlauts, and double quotes
418 print STDERR "[".length($t)."] Handling accents, umlauts, double quotes ".
420 $t =~ s/\\[`']([AEOUaeou])/$1/g;
421 $t =~ s/\\([`'])~/$1/g;
422 $t =~ s/\\"([AOUaou])/$1e/g;
426 # apply ultimate set of fixes to newlines
428 print STDERR "[".length($t)."] Applying ultimate set of fixes to newlines\n";
429 while ($t =~ s/([\n$B1$B2]+)([$SI$SO])/$2$1/g) {};
430 $t =~ s/([\n$B1$B2]*)\s+([\n$B1$B2]+)/$1$2/g;
432 $t =~ s/\n?($B1)[\n$B1]*/\n\n/g;
433 $t =~ s/\n*($B2)[\n$B2]*/\n\n\n/g;
435 # translate what's left
437 print STDERR "[".length($t)."] Final translation\n";
443 # okay, now format and print it
445 print STDERR "[".length($t)."] "."Formatting (may take a while)\n";
448 while ($t =~ /([$SI$SO\n]| +)/) {
449 if ($` ne "" || substr($1,0,1) eq " ") {
450 if (length($l)+length($`) > $w && $l ne "") {
454 if ($l eq "") { $l = " " x $m; }
455 $l = $l.$`.(substr($1,0,1) eq " " ? $1 : "");
458 if ($1 eq $SI) { $m++; }
459 if ($1 eq $SO) { $m--; }
463 # $t = s/^ *(\S.*)/\1/;
466 print "$l\n" if $l ne "";
467 print STDERR "Done\n";