1
# This file contains subroutines for use by the latex2html system.
2
# This file is executed due to a \usepackage{bacula} statement
3
# in the LaTeX source. The subroutines here impliment functionality
4
# specific to the generation of html manuals for the Bacula project.
5
# Some of the added functionality is designed to extend the capabiltites
6
# of latex2html and some is to change its behavior.
9
# Returns the minimum of any number of numeric arguments.
12
while ($test = shift) {
13
$tmp = $test if ($test < $tmp);
18
# These two are copied from
19
# /usr/lib/latex2html/style/hthtml.perl,
20
# from the subroutine do_cmd_htmladdnormallink.
21
# They have been renamed, then removed the
22
# name argument and reversed the other two arguments.
25
local($text, $url, $href);
26
local($opt, $dummy) = &get_next_optional_argument;
27
$text = &missing_braces unless
28
((s/$next_pair_pr_rx/$text = $2; ''/eo)
29
||(s/$next_pair_rx/$text = $2; ''/eo));
30
$url = &missing_braces unless
31
((s/$next_pair_pr_rx/$url = $2; ''/eo)
32
||(s/$next_pair_rx/$url = $2; ''/eo));
33
$*=1; s/^\s+/\n/; $*=0;
34
$href = &make_href($url,$text);
35
print "\nHREF:$href" if ($VERBOSITY > 3);
42
local($opt, $dummy) = &get_next_optional_argument;
43
$text = &missing_braces unless
44
((s/$next_pair_pr_rx/$text = $2; ''/eo)
45
||(s/$next_pair_rx/$text = $2; ''/eo));
46
&process_ref($cross_ref_mark,$cross_ref_mark,$text);
49
sub do_cmd_lt { join('',"\<",$_[0]); }
50
sub do_cmd_gt { join('',"\>",$_[0]); }
52
# KEC Copied from latex2html.pl and modified to prevent
53
# filename collisions. This is done with a static hash of
54
# already-used filenames. An integer is appended to the
55
# filename if a collision would result without it.
56
# The addition of the integer is done by removing
57
# character(s) before .html if adding the integer would result
58
# in a filename longer than 32 characters. Usually just removing
59
# the character before .html would resolve the collision, but we
60
# add the integer anyway. The first integer that resolves the
62
# If a filename is desired that is 'index.html' or any case
63
# variation of that, it is changed to index_page.html,
64
# index_page1.html, etc.
67
#RRM Extended to allow customised filenames, set $CUSTOM_TITLES
68
# or long title from the section-name, set $LONG_TITLES
70
{ my %used_names; # Static hash.
72
local($sec_name, $packed_curr_sec_id) = @_;
73
local($title,$making_name,$saved) = ('',1,'');
77
# This alerts the subroutine textohtmlindex not to increment its index counter on the next call.
78
&do_cmd_textohtmlindex("\001noincrement");
79
&process_command($sections_rx, $_) if /^$sections_rx/;
80
$title = &make_bacula_title($TITLE)
81
unless ((! $TITLE) || ($TITLE eq $default_title));
83
} elsif ($CUSTOM_TITLES) {
85
# This alerts the subroutine textohtmlindex not to increment its index counter on the next call.
86
&do_cmd_textohtmlindex("\001noincrement");
87
&process_command($sections_rx, $_) if /^$sections_rx/;
88
$title = &custom_title_hook($TITLE)
89
unless ((! $TITLE) || ($TITLE eq $default_title));
93
#ensure no more than 32 characters, including .html extension
94
$title =~ s/^(.{1,27}).*$/$1/;
96
$final_name = join("", ${PREFIX}, $title, $EXTN);
98
# Remove 0's from the end of $packed_curr_sec_id
99
$packed_curr_sec_id =~ s/(_0)*$//;
100
$packed_curr_sec_id =~ s/^\d+$//o; # Top level file
101
$final_name = join("",($packed_curr_sec_id ?
102
"${PREFIX}$NODE_NAME". ++$OUT_NODE : $sec_name), $EXTN);
105
# Change the name from index to index_page to avoid conflicts with
107
$final_name =~ s/^(index)\.html$/$1_Page.html/i;
109
# If the $final_name is already used, put an integer before the
110
# # .html to make it unique.
112
my $saved_name = $final_name;
113
while (exists($used_names{$final_name})) {
114
$final_name = $saved_name;
115
my ($filename,$ext) = $final_name =~ /(.*)(\..*)$/;
116
my $numlen = length(++$integer);
118
# If the filename (after adding the integer) would be longer than
119
# 32 characters, insert the integer within it.
120
if (((my $namelen = length($final_name)) + $numlen) >= 32) {
121
substr($filename,-$numlen) = $integer;
123
$filename .= $integer;
125
$final_name = $filename . $ext;
128
# Save the $final_name in the hash to mark it as being used.
129
$used_names{$final_name} = undef;
131
# Save the first name evaluated here. This is the name of the top-level html file, and
132
# can be used to produce the index.html hard link at the end.
133
$OVERALL_TITLE = $final_name if (!defined $OVERALL_TITLE);
139
sub make_bacula_title {
141
local($num_words) = $LONG_TITLES;
142
#RRM: scan twice for short words, due to the $4 overlap
143
# Cannot use \b , else words break at accented letters
144
$_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig;
145
$_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig;
146
#remove leading numbering, unless that's all there is.
148
if (!(/^\d+(\.\d*)*\s*$/)&&(s/^\s*(\d+(\.\d*)*)\s*/$sec_num=$1;''/e))
150
&remove_markers; s/<[^>]*>//g; #remove tags
151
#revert entities, etc. to TeX-form...
152
s/([\200-\377])/"\&#".ord($1).";"/eg;
153
$_ = &revert_to_raw_tex($_);
155
# get $LONG_TITLES number of words from what remains
156
$_ = &get_bacula_words($_, $num_words) if ($num_words);
157
# ...and cleanup accents, spaces and punctuation
158
$_ = join('', ($SHOW_SECTION_NUMBERS ? $sec_num : ''), $_);
161
s/\'s/s/ig; # Replace 's with just the s.
170
# KEC 2-21-05 Changed completely again.
172
# We take the first real words specified by $min from the string.
173
# REmove all markers and markups.
174
# Split the line into words.
175
# Determine how many words we should process.
176
# Return if no words to process.
177
# Determine lengths of the words.
178
# Reduce the length of the longest words in the list until the
179
# total length of all the words is acceptable.
180
# Put the words back together and return the result.
182
sub get_bacula_words {
183
local($_, $min) = @_;
187
my ($oalength,@lengths,$last,$thislen);
190
#no limit if $min is negative
191
$min = 1000 if ($min < 0);
194
#strip unwanted HTML constructs
195
s/<\/?(P|BR|H)[^>]*>/ /g;
196
#remove leading white space and \001 characters
199
s/(<[^>]*>(#[^#]*#)?)//ge;
201
# Split $_ into a list of words.
202
my @wrds = split /\s+|\-{3,}/;
203
$last = &min($min - 1,$#wrds);
204
return '' if ($last < 0);
206
# Get a list of word lengths up to the last word we're to process.
207
# Add one to each for the separator.
208
@lengths = map (length($_)+1,@wrds[0..$last]);
210
$thislen = $maxlen + 1; # One more than the desired max length.
213
@lengths = map (&min($_,$thislen),@lengths);
215
foreach (@lengths) {$oalength += $_;}
216
} until ($oalength <= $maxlen);
217
$words = join(" ",map (substr($wrds[$_],0,$lengths[$_]-1),0..$last));
221
sub do_cmd_htmlfilename {
224
my ($id,$filename) = $input =~ /^<#(\d+)#>(.*?)<#\d+#>/;
228
# do_cmd_addcontentsline adds support for the addcontentsline latex command. It evaluates
229
# the arguments to the addcontentsline command and determines where to put the information. Three
230
# global lists are kept: for table of contents, list of tables, and list of figures entries.
231
# Entries are saved in the lists in the order they are encountered so they can be retrieved
234
sub do_cmd_addcontentsline {
235
&do_cmd_real_addcontentsline(@_);
237
sub do_cmd_real_addcontentsline {
239
my ($extension,$pat,$unit,$entry);
241
# The data is sent to us as fields delimited by their ID #'s. Extract the
242
# fields. The first is the extension of the file to which the cross-reference
243
# would be written by LaTeX, such as {toc}, {lot} or {lof}. The second is either
244
# {section}, {subsection}, etc. for a toc entry, or , {table}, or {figure}
245
# for a lot, or lof extension (must match the first argument), and
246
# the third is the name of the entry. The position in the document represents
247
# and anchor that must be built to provide the linkage from the entry.
248
$extension = &missing_braces unless (
249
($data =~ s/$next_pair_pr_rx/$extension=$2;''/eo)
250
||($data =~ s/$next_pair_rx/$extension=$2;''/eo));
251
$unit = &missing_braces unless (
252
($data =~ s/$next_pair_pr_rx/$unit=$2;''/eo)
253
||($data =~ s/$next_pair_rx/$unit=$2;''/eo));
254
$entry = &missing_braces unless (
255
($data =~ s/$next_pair_pr_rx/$pat=$1;$entry=$2;''/eo)
256
||($data =~ s/$next_pair_rx/$pat=$1;$entry=$2;''/eo));
258
$contents_entry = &make_contents_entry($extension,$pat,$entry,$unit);
259
return ($contents_entry . $data);
262
# Creates and saves a contents entry (toc, lot, lof) to strings for later use,
263
# and returns the entry to be inserted into the stream.
265
sub make_contents_entry {
266
local($extension,$br_id, $str, $unit) = @_;
270
# If TITLE is not yet available use $before.
271
$TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title)));
272
$TITLE = $before unless $TITLE;
274
if ($SHOW_SECTION_NUMBERS) {
275
$words = &get_first_words($TITLE, 1);
277
$words = &get_first_words($TITLE, 4);
279
$words = 'no title' unless $words;
282
# any \label in the $str will have already
283
# created a label where the \addcontentsline occurred.
284
# This has to be removed, so that the desired label
285
# will be found on the toc page.
287
if ($str =~ /tex2html_anchor_mark/ ) {
288
$str =~ s/><tex2html_anchor_mark><\/A><A//g;
291
# resolve and clean-up the hyperlink entries
292
# so they can be saved
294
if ($str =~ /$cross_ref_mark/ ) {
295
my ($label,$id,$ref_label);
296
$str =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
297
do { ($label,$id) = ($1,$2);
298
$ref_label = $external_labels{$label} unless
299
($ref_label = $ref_files{$label});
300
'"' . "$ref_label#$label" . '">' .
301
&get_ref_mark($label,$id)}
304
$str =~ s/<\#[^\#>]*\#>//go;
306
# recognise \char combinations, for a \backslash
308
$str =~ s/\&\#;\'134/\\/g; # restore \\s
309
$str =~ s/\&\#;\`<BR> /\\/g; # ditto
310
$str =~ s/\&\#;*SPMquot;92/\\/g; # ditto
312
$thisref = &make_named_href('',"$CURRENT_FILE#$br_id",$str);
315
# Now we build the actual entry that will go in the lot and lof.
316
# If this is the first entry, we have to put a leading newline.
317
if ($unit eq 'table' ) {
318
if (!$table_captions) { $table_captions = "\n";}
319
$table_captions .= "<LI>$thisref\n";
320
} elsif ($unit eq 'figure') {
321
if (!$figure_captions) { $figure_captions = "\n"; }
322
$figure_captions .= "<LI>$thisref\n";
324
"<A NAME=\"$br_id\">$anchor_invisible_mark<\/A>";
327
# This is needed to keep latex2html from trying to make an image for the registered
328
# trademark symbol (R). This wraps the command in a deferred wrapper so it can be
329
# processed as a normal command later on. If this subroutine is not put in latex2html
330
# invokes latex to create an image for the symbol, which looks bad.
331
sub wrap_cmd_textregistered {
332
local($cmd, $_) = @_;
333
(&make_deferred_wrapper(1).$cmd.&make_deferred_wrapper(0),$_)
337
# Copied from latex2html.pl and modified to create a file of image translations.
338
# The problem is that latex2html creates new image filenames like imgXXX.png, where
339
# XXX is a number sequentially assigned. This is fine but makes for very unfriendly
340
# image filenames. I looked into changing this behavior and it seems very much embedded
341
# into the latex2html code, not easy to change without risking breaking something.
342
# So I'm taking the approach here to write out a file of image filename translations,
343
# to reference the original filenames from the new filenames. THis was post-processing
344
# can be done outside of latex2html to rename the files and substitute the meaningful
345
# image names in the html code generated by latex2html. This post-processing is done
346
# by a program external to latex2html.
348
# What we do is this: This subroutine is called to output images.tex, a tex file passed to
349
# latex to convert the original images to .ps. The string $latex_body contains info for
350
# each image file, in the form of a unique id and the orininal filename. We extract both, use
351
# the id is used to look up the new filename in the %id_map hash. The new and old filenames
352
# are output into the file 'filename_translations' separated by \001.
354
sub make_image_file {
356
print "\nWriting image file ...\n";
357
open(ENV,">.$dd${PREFIX}images.tex")
358
|| die "\nCannot write '${PREFIX}images.tex': $!\n";
359
print ENV &make_latex($latex_body);
362
©_file($FILE, "bbl");
363
©_file($FILE, "aux");
364
} if ((%latex_body) && ($latex_body =~ /newpage/));
369
# Copied from latex2html.pl and modified to create a file of image translations.
371
# The problem is that latex2html creates new image filenames like imgXXX.png, where
372
# XXX is a number sequentially assigned. This is fine but makes for very unfriendly
373
# image filenames. I looked into changing this behavior and it seems very much embedded
374
# into the latex2html code, not easy to change without risking breaking something.
375
# So I'm taking the approach here to write out a file of image filename translations,
376
# to reference the original filenames from the new filenames. THis post-processing
377
# can be done outside of latex2html to rename the files and substitute the meaningful
378
# image names in the html code generated by latex2html. This post-processing is done
379
# by a program external to latex2html.
381
# What we do is this: This subroutine is called to output process images. Code has been inserted
382
# about 100 lines below this to create the list of filenames to translate. See comments there for
386
# Generate images for unknown environments, equations etc, and replace
387
# the markers in the main text with them.
388
# - $cached_env_img maps encoded contents to image URL's
389
# - $id_map maps $env$id to page numbers in the generated latex file and after
390
# the images are generated, maps page numbers to image URL's
391
# - $page_map maps page_numbers to image URL's (temporary map);
392
# Uses global variables $id_map and $cached_env_img,
393
# $new_page_num and $latex_body
397
local($name, $contents, $raw_contents, $uucontents, $page_num,
398
$uucontents, %page_map, $img);
399
# It is necessary to run LaTeX this early because we need the log file
400
# which contains information used to determine equation alignment
401
if ( $latex_body =~ /newpage/) {
404
# dump a pre-compiled format
405
if (!(-f "${PREFIX}images.fmt")) {
406
print "$INILATEX ./${PREFIX}images.tex\n"
407
if (($DEBUG)||($VERBOSITY > 1));
408
print "dumping ${PREFIX}images.fmt\n"
409
unless ( L2hos->syswait("$INILATEX ./${PREFIX}images.tex"));
411
local ($img_fmt) = (-f "${PREFIX}images.fmt");
413
# use the pre-compiled format
414
print "$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex\n"
415
if (($DEBUG)||($VERBOSITY > 1));
416
L2hos->syswait("$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex");
417
} elsif (-f "${PREFIX}images.dvi") {
418
print "${PREFIX}images.fmt failed, proceeding anyway\n";
420
print "${PREFIX}images.fmt failed, trying without it\n";
421
print "$LATEX ./${PREFIX}images.tex\n"
422
if (($DEBUG)||($VERBOSITY > 1));
423
L2hos->syswait("$LATEX ./${PREFIX}images.tex");
425
} else { &make_latex_images() }
426
# local($latex_call) = "$LATEX .$dd${PREFIX}images.tex";
427
# print "$latex_call\n" if (($DEBUG)||($VERBOSITY > 1));
428
# L2hos->syswait("$latex_call");
429
## print "$LATEX ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1));
430
## L2hos->syswait("$LATEX ./${PREFIX}images.tex");
433
&process_log_file("./${PREFIX}images.log"); # Get image size info
436
my $img = "image.$IMAGE_TYPE";
437
my $img_path = "$LATEX2HTMLDIR${dd}icons$dd$img";
438
L2hos->Copy($img_path, ".$dd$img")
439
if(-e $img_path && !-e $img);
441
elsif ((!$NOLATEX) && ($latex_body =~ /newpage/) && !($LaTeXERROR)) {
442
print "\nGenerating postscript images using dvips ...\n";
443
&make_tmp_dir; # sets $TMPDIR and $DESTDIR
444
$IMAGE_PREFIX =~ s/^_//o if ($TMPDIR);
447
"$DVIPS -S1 -i $DVIPSOPT -o$TMPDIR$dd$IMAGE_PREFIX .${dd}${PREFIX}images.dvi\n";
448
print $dvips_call if (($DEBUG)||($VERBOSITY > 1));
450
if ((($PREFIX=~/\./)||($TMPDIR=~/\./)) && not($DVIPS_SAFE)) {
451
print " *** There is a '.' in $TMPDIR or $PREFIX filename;\n"
452
. " dvips will fail, so image-generation is aborted ***\n";
454
&close_dbm_database if $DJGPP;
455
L2hos->syswait($dvips_call) && print "Error: $!\n";
456
&open_dbm_database if $DJGPP;
459
# append .ps suffix to the filenames
460
if(opendir(DIR, $TMPDIR || '.')) {
461
# use list-context instead; thanks De-Wei Yin <yin@asc.on.ca>
462
my @ALL_IMAGE_FILES = grep /^$IMAGE_PREFIX\d+$/o, readdir(DIR);
463
foreach (@ALL_IMAGE_FILES) {
464
L2hos->Rename("$TMPDIR$dd$_", "$TMPDIR$dd$_.ps");
468
print "\nError: Cannot read dir '$TMPDIR': $!\n";
471
do {print "\n\n*** LaTeXERROR"; return()} if ($LaTeXERROR);
472
return() if ($LaTeXERROR); # empty .dvi file
473
L2hos->Unlink(".$dd${PREFIX}images.dvi") unless $DEBUG;
475
print "\n *** updating image cache\n" if ($VERBOSITY > 1);
476
while ( ($uucontents, $_) = each %cached_env_img) {
477
delete $cached_env_img{$uucontents}
478
if ((/$PREFIX$img_rx\.$IMAGE_TYPE/o)&&!($DESTDIR&&$NO_SUBDIR));
479
$cached_env_img{$uucontents} = $_
480
if (s/$PREFIX$img_rx\.new/$PREFIX$1.$IMAGE_TYPE/go);
483
# Modified from the original latex2html to translate image filenames to meaningful ones.
485
print "\nWriting imagename_translations file\n";
486
open KC,">imagename_translations" or die "Cannot open filename translation file for writing";
487
my ($oldname_kc,$newname_kc,$temp_kc,%done_kc);
488
while ((undef,$temp_kc) = each %id_map) {
489
# Here we generate the file containing the list if old and new filenames.
490
# The old and new names are extracted from variables in scope at the time
491
# this is run. The values of the %id_map has contain either the number of the
492
# image file to be created (if an old image file doesn't exist) or the tag to be placed
493
# inside the html file (if an old image file does exist). We extract the info in either
495
if ($temp_kc =~ /^\d+\#\d+$/) {
497
$kcname = $orig_name_map{$temp_kc};
498
$kcname =~ s/\*/star/;
499
($oldname_kc) = $img_params{$kcname} =~ /ALT=\"\\includegraphics\{(.*?)\}/s;
500
($newname_kc) = split (/#/,$temp_kc);
501
$newname_kc = "img" . $newname_kc . ".png";
503
($newname_kc,$oldname_kc) = $temp_kc =~ /SRC=\"(.*?)\".*ALT=\"\\includegraphics\{(.*?)\}/s;
505
# If this is a math-type image, $oldname_kc will be blank. Don't do anything in that case since
506
# there is no meaningful image filename.
507
if (!exists($done_kc{$newname_kc}) and $oldname_kc) {
508
print KC "$newname_kc\001$oldname_kc\n";
510
$done_kc{$newname_kc} = '';
514
print "\n *** removing unnecessary images ***\n" if ($VERBOSITY > 1);
515
while ( ($name, $page_num) = each %id_map) {
516
$contents = $latex_body{$name};
518
if ($page_num =~ /^\d+\#\d+$/) { # If it is a page number
519
do { # Extract the page, convert and save it
520
$img = &extract_image($page_num,$orig_name_map{$page_num});
521
if ($contents =~ /$htmlimage_rx/) {
522
$uucontents = &special_encoding($env,$2,$contents);
523
} elsif ($contents =~ /$htmlimage_pr_rx/) {
524
$uucontents = &special_encoding($env,$2,$contents);
526
$uucontents = &encode(&addto_encoding($contents,$contents));
528
if (($HTML_VERSION >=3.2)||!($contents=~/$order_sensitive_rx/)){
529
$cached_env_img{$uucontents} = $img;
531
# Blow it away so it is not saved for next time
532
delete $cached_env_img{$uucontents};
533
print "\nimage $name not recycled, contents may change (e.g. numbering)";
535
$page_map{$page_num} = $img;
536
} unless ($img = $page_map{$page_num}); # unless we've just done it
537
$id_map{$name} = $img;
539
$img = $page_num; # it is already available from previous runs
541
print STDOUT " *** image done ***\n" if ($VERBOSITY > 2);
544
"\nOne of the images is more than one page long.\n".
545
"This may cause the rest of the images to get out of sync.\n\n")
546
if (-f sprintf("%s%.3d%s", $IMAGE_PREFIX, ++$new_page_num, ".ps"));
547
print "\n *** no more images ***\n" if ($VERBOSITY > 1);
548
# MRO: The following cleanup seems to be incorrect: The DBM is
549
# still open at this stage, this causes a lot of unlink errors
551
#do { &cleanup; print "\n *** clean ***\n" if ($VERBOSITY > 1);}
555
## KEC: Copied &text_cleanup here to modify it. It was filtering out double
556
# dashes such as {-}{-}sysconfig. This would be used as an illustration
557
# of a command-line arguement. It was being changed to a single dash.
559
# This routine must be called once on the text only,
560
# else it will "eat up" sensitive constructs.
562
# MRO: replaced $* with /m
563
s/(\s*\n){3,}/\n\n/gom; # Replace consecutive blank lines with one
564
s/<(\/?)P>\s*(\w)/<$1P>\n$2/gom; # clean up paragraph starts and ends
565
s/$O\d+$C//go; # Get rid of bracket id's
566
s/$OP\d+$CP//go; # Get rid of processed bracket id's
567
# KEC: This is the line causing trouble...
568
#s/(<!)?--?(>)?/(length($1) || length($2)) ? "$1--$2" : "-"/ge;
569
s/(<!)?--?(>)?/(length($1) || length($2)) ? "$1--$2" : $&/ge;
572
#JKR: There should be no more comments in the source now.
573
#s/([^\\]?)%/$1/go; # Remove the comment character
574
# Cannot treat \, as a command because , is a delimiter ...
576
# Replace tilde's with non-breaking spaces
580
# remove redundant (not <P></P>) empty tags, incl. with attributes
581
s/\n?<([^PD >][^>]*)>\s*<\/\1>//g;
582
s/\n?<([^PD >][^>]*)>\s*<\/\1>//g;
583
# remove redundant empty tags (not </P><P> or <TD> or <TH>)
584
s/<\/(TT|[^PTH][A-Z]+)><\1>//g;
585
s/<([^PD ]+)(\s[^>]*)?>\n*<\/\1>//g;
589
# Replace ^^ special chars (according to p.47 of the TeX book)
590
# Useful when coming from the .aux file (german umlauts, etc.)
591
s/\^\^([^0-9a-f])/chr((64+ord($1))&127)/ge;
592
s/\^\^([0-9a-f][0-9a-f])/chr(hex($1))/ge;
598
1; # Must be present as the last line.