1
# This module does multiple indices, supporting the style of the LaTex 'index'
5
# 16-Feb-2005 -- Original Creation. Karl E. Cunningham
6
# 14-Mar-2005 -- Clarified and Consolodated some of the code.
7
# Changed to smoothly handle single and multiple indices.
9
# Two LaTeX index formats are supported...
10
# --- SINGLE INDEX ---
11
# \usepackage{makeidx}
19
# --- MULTIPLE INDICES ---
21
# \usepackage{makeidx}
23
# \makeindex -- latex2html doesn't care but LaTeX does.
24
# \newindex{ref1}{ext1}{ext2}{title1}
25
# \newindex{ref2}{ext1}{ext2}{title2}
26
# \newindex{ref3}{ext1}{ext2}{title3}
27
# \index[ref1]{entry1}
28
# \index[ref1]{entry2}
29
# \index[ref3]{entry3}
30
# \index[ref2]{entry4}
32
# \index[ref3]{entry6}
40
# For the multiple-index style, each index is identified by the ref argument to \newindex, \index,
41
# and \printindex. A default index is allowed, which is indicated by omitting the optional
42
# argument. The default index does not require a \newindex command. As \index commands
43
# are encountered, their entries are stored according
44
# to the ref argument. When the \printindex command is encountered, the stored index
45
# entries for that argument are retrieved and printed. The title for each index is taken
46
# from the last argument in the \newindex command.
47
# While processing \index and \printindex commands, if no argument is given the index entries
48
# are built into a default index. The title of the default index is simply "Index".
49
# This makes the difference between single- and multiple-index processing trivial.
51
# Another method can be used by omitting the \printindex command and just using \include to
52
# pull in index files created by the makeindex program. These files will start with
53
# \begin{theindex}. This command is used to determine where to print the index. Using this
54
# approach, the indices will be output in the same order as the newindex commands were
55
# originally found (see below). Using a combination of \printindex and \include{indexfile} has not
56
# been tested and may produce undesireable results.
58
# The index data are stored in a hash for later sorting and output. As \printindex
59
# commands are handled, the order in which they were found in the tex filea is saved,
60
# associated with the ref argument to \printindex.
62
# We use the original %index hash to store the index data into. We append a \002 followed by the
63
# name of the index to isolate the entries in different indices from each other. This is necessary
64
# so that different indices can have entries with the same name. For the default index, the \002 is
65
# appended without the name.
67
# Since the index order in the output cannot be determined if the \include{indexfile}
68
# command is used, the order will be assumed from the order in which the \newindex
69
# commands were originally seen in the TeX files. This order is saved as well as the
70
# order determined from any printindex{ref} commands. If \printindex commnads are used
71
# to specify the index output, that order will be used. If the \include{idxfile} command
72
# is used, the order of the original newindex commands will be used. In this case the
73
# default index will be printed last since it doesn't have a corresponding \newindex
74
# command and its order cannot be determined. Mixing \printindex and \include{idxfile}
75
# commands in the same file is likely to produce less than satisfactory results.
78
# The hash containing index data is named %indices. It contains the following data:
81
# $ref1 => $indextitle ,
82
# $ref2 => $indextitle ,
85
# 'newcmdorder' => [ ref1, ref2, ..., * ], # asterisk indicates the position of the default index.
86
# 'printindorder' => [ ref1, ref2, ..., * ], # asterisk indicates the position of the default index.
90
# Globals to handle multiple indices.
93
# This tells the system to use up to 7 words in index entries.
97
# Handles the \newindex command. This is called if the \newindex command is
98
# encountered in the LaTex source. Gets the index ref and title from the arguments.
99
# Saves the index ref and title.
100
# Note that we are called once to handle multiple \newindex commands that are
102
sub do_cmd_newindex {
104
# The data is sent to us as fields delimited by their ID #'s. We extract the
106
foreach my $line (split("\n",$data)) {
107
my @fields = split (/(?:\<\#\d+?\#\>)+/,$line);
109
# The index name and title are the second and fourth fields in the data.
110
if ($line =~ /^</ or $line =~ /^\\newindex/) {
111
my ($indexref,$indextitle) = ($fields[1],$fields[4]);
112
$indices{'title'}{$indexref} = $indextitle;
113
push (@{$indices{'newcmdorder'}},$indexref);
119
# KEC -- Copied from makeidx.perl and modified to do multiple indices.
120
# Processes an \index entry from the LaTex file.
121
# Gets the optional argument from the index command, which is the name of the index
122
# into which to place the entry.
123
# Drops the brackets from the index_name
124
# Puts the index entry into the html stream
125
# Creates the tokenized index entry (which also saves the index entry info
128
local($pat,$idx_entry,$index_name);
129
# catches opt-arg from \index commands for index.sty
130
$index_name = &get_next_optional_argument;
131
$index_name = "" unless defined $index_name;
132
# Drop leading and trailing brackets from the index name.
133
$index_name =~ s/^\[|\]$//g;
135
$idx_entry = &missing_braces unless (
136
(s/$next_pair_pr_rx/$pat=$1;$idx_entry=$2;''/e)
137
||(s/$next_pair_rx/$pat=$1;$idx_entry=$2;''/e));
139
if ($index_name and defined $idx_entry and
140
!defined $indices{'title'}{$index_name}) {
141
print STDERR "\nInvalid Index Name: \\index \[$index_name\]\{$idx_entry\}\n";
144
$idx_entry = &named_index_entry($pat, $idx_entry,$index_name);
148
# Creates and saves an index entry in the index hashes.
149
# Modified to do multiple indices.
150
# Creates an index_key that allows index entries to have the same characteristics but be in
151
# different indices. This index_key is the regular key with the index name appended.
152
# Save the index order for the entry in the %index_order hash.
153
sub named_index_entry {
154
local($br_id, $str, $index_name) = @_;
156
# escape the quoting etc characters
160
$* = 1; $str =~ s/\n\s*/ /g; $* = 0; # remove any newlines
161
# protect \001 occurring with images
162
$str =~ s/\001/\016/g; # 0x1 to 0xF
163
$str =~ s/\\\\/\011/g; # Double backslash -> 0xB
164
$str =~ s/\\;SPMquot;/\012/g; # \;SPMquot; -> 0xC
165
$str =~ s/;SPMquot;!/\013/g; # ;SPMquot; -> 0xD
166
$str =~ s/!/\001/g; # Exclamation point -> 0x1
167
$str =~ s/\013/!/g; # 0xD -> Exclaimation point
168
$str =~ s/;SPMquot;@/\015/g; # ;SPMquot;@ to 0xF
169
$str =~ s/@/\002/g; # At sign -> 0x2
170
$str =~ s/\015/@/g; # 0xF to At sign
171
$str =~ s/;SPMquot;\|/\017/g; # ;SMPquot;| to 0x11
172
$str =~ s/\|/\003/g; # Vertical line to 0x3
173
$str =~ s/\017/|/g; # 0x11 to vertical line
174
$str =~ s/;SPMquot;(.)/\1/g; # ;SPMquot; -> whatever the next character is
175
$str =~ s/\012/;SPMquot;/g; # 0x12 to ;SPMquot;
176
$str =~ s/\011/\\\\/g; # 0x11 to double backslash
177
local($key_part, $pageref) = split("\003", $str, 2);
179
# For any keys of the form: blablabla!blablabla, which want to be split at the
180
# exclamation point, replace the ! with a comma and a space. We don't do it
181
# that way for this index.
182
$key_part =~ s/\001/, /g;
183
local(@keys) = split("\001", $key_part);
184
# If TITLE is not yet available use $before.
185
$TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title)));
186
$TITLE = $before unless $TITLE;
189
if ($SHOW_SECTION_NUMBERS) { $words = &make_idxnum; }
190
elsif ($SHORT_INDEX) { $words = &make_shortidxname; }
191
else { $words = &make_idxname; }
192
local($super_key) = '';
193
local($sort_key, $printable_key, $cur_key);
194
foreach $key (@keys) {
195
$key =~ s/\016/\001/g; # revert protected \001s
196
($sort_key, $printable_key) = split("\002", $key);
199
# any \label in the printable-key will have already
200
# created a label where the \index occurred.
201
# This has to be removed, so that the desired label
202
# will be found on the Index page instead.
204
if ($printable_key =~ /tex2html_anchor_mark/ ) {
205
$printable_key =~ s/><tex2html_anchor_mark><\/A><A//g;
206
local($tmpA,$tmpB) = split("NAME=\"", $printable_key);
207
($tmpA,$tmpB) = split("\"", $tmpB);
208
$ref_files{$tmpA}='';
209
$index_labels{$tmpA} = 1;
212
# resolve and clean-up the hyperlink index-entries
213
# so they can be saved in an index.pl file
215
if ($printable_key =~ /$cross_ref_mark/ ) {
216
local($label,$id,$ref_label);
217
# $printable_key =~ s/$cross_ref_mark#(\w+)#(\w+)>$cross_ref_mark/
218
$printable_key =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
219
do { ($label,$id) = ($1,$2);
220
$ref_label = $external_labels{$label} unless
221
($ref_label = $ref_files{$label});
222
'"' . "$ref_label#$label" . '">' .
223
&get_ref_mark($label,$id)}
226
$printable_key =~ s/<\#[^\#>]*\#>//go;
228
# recognise \char combinations, for a \backslash
230
$printable_key =~ s/\&\#;\'134/\\/g; # restore \\s
231
$printable_key =~ s/\&\#;\`<BR> /\\/g; # ditto
232
$printable_key =~ s/\&\#;*SPMquot;92/\\/g; # ditto
234
# $sort_key .= "@$printable_key" if !($printable_key); # RRM
235
$sort_key .= "@$printable_key" if !($sort_key); # RRM
236
$sort_key =~ tr/A-Z/a-z/;
238
$cur_key = $super_key . "\001" . $sort_key;
239
$sub_index{$super_key} .= $cur_key . "\004";
241
$cur_key = $sort_key;
244
# Append the $index_name to the current key with a \002 delimiter. This will
245
# allow the same index entry to appear in more than one index.
246
$index_key = $cur_key . "\002$index_name";
248
$index{$index_key} .= "";
252
# if there is no printable key, but one is known from
253
# a previous index-entry, then use it.
255
if (!($printable_key) && ($printable_key{$index_key}))
256
{ $printable_key = $printable_key{$index_key}; }
257
# if (!($printable_key) && ($printable_key{$cur_key}))
258
# { $printable_key = $printable_key{$cur_key}; }
260
# do not overwrite the printable_key if it contains an anchor
262
if (!($printable_key{$index_key} =~ /tex2html_anchor_mark/ ))
263
{ $printable_key{$index_key} = $printable_key || $key; }
264
# if (!($printable_key{$cur_key} =~ /tex2html_anchor_mark/ ))
265
# { $printable_key{$cur_key} = $printable_key || $key; }
267
$super_key = $cur_key;
271
# page-ranges, from |( and |) and |see
274
if ($pageref eq "\(" ) {
277
} elsif ($pageref eq "\)" ) {
279
local($next) = $index{$index_key};
280
# local($next) = $index{$cur_key};
281
# $next =~ s/[\|] *$//;
282
$next =~ s/(\n )?\| $//;
283
$index{$index_key} = "$next to ";
284
# $index{$cur_key} = "$next to ";
289
$pageref =~ s/\s*$//g; # remove trailing spaces
290
if (!$pageref) { $pageref = ' ' }
291
$pageref =~ s/see/<i>see <\/i> /g;
294
# check if $pageref corresponds to a style command.
295
# If so, apply it to the $words.
297
local($tmp) = "do_cmd_$pageref";
299
$words = &$tmp("<#0#>$words<#0#>");
300
$words =~ s/<\#[^\#]*\#>//go;
306
# any \label in the pageref section will have already
307
# created a label where the \index occurred.
308
# This has to be removed, so that the desired label
309
# will be found on the Index page instead.
312
if ($pageref =~ /tex2html_anchor_mark/ ) {
313
$pageref =~ s/><tex2html_anchor_mark><\/A><A//g;
314
local($tmpA,$tmpB) = split("NAME=\"", $pageref);
315
($tmpA,$tmpB) = split("\"", $tmpB);
316
$ref_files{$tmpA}='';
317
$index_labels{$tmpA} = 1;
320
# resolve and clean-up any hyperlinks in the page-ref,
321
# so they can be saved in an index.pl file
323
if ($pageref =~ /$cross_ref_mark/ ) {
324
local($label,$id,$ref_label);
325
# $pageref =~ s/$cross_ref_mark#(\w+)#(\w+)>$cross_ref_mark/
326
$pageref =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
327
do { ($label,$id) = ($1,$2);
328
$ref_files{$label} = ''; # ???? RRM
329
if ($index_labels{$label}) { $ref_label = ''; }
330
else { $ref_label = $external_labels{$label}
331
unless ($ref_label = $ref_files{$label});
333
'"' . "$ref_label#$label" . '">' . &get_ref_mark($label,$id)}/geo;
335
$pageref =~ s/<\#[^\#>]*\#>//go;
337
if ($pageref eq ' ') { $index{$index_key}='@'; }
338
else { $index{$index_key} .= $pageref . "\n | "; }
340
local($thisref) = &make_named_href('',"$CURRENT_FILE#$br_id",$words);
342
$index{$index_key} .= $thisref."\n | ";
344
#print "\nREF: $sort_key : $index_key :$index{$index_key}";
346
#join('',"<A NAME=$br_id>$anchor_invisible_mark<\/A>",$_);
348
"<A NAME=\"$br_id\">$anchor_invisible_mark<\/A>";
352
# KEC. -- Copied from makeidx.perl, then modified to do multiple indices.
353
# Feeds the index entries to the output. This is called for each index to be built.
355
# Generates a list of lookup keys for index entries, from both %printable_keys
357
# Sorts the keys according to index-sorting rules.
358
# Removes keys with a 0x01 token. (duplicates?)
359
# Builds a string to go to the index file.
360
# Adds the index entries to the string if they belong in this index.
361
# Keeps track of which index is being worked on, so only the proper entries
363
# Places the index just built in to the output at the proper place.
364
{ my $index_number = 0;
366
print "\nDoing the index ... Index Number $index_number\n";
367
local($key, @keys, $next, $index, $old_key, $old_html);
368
my ($idx_ref,$keyref);
369
# RRM, 15.6.96: index constructed from %printable_key, not %index
370
@keys = keys %printable_key;
372
while (/$idx_mark/) {
373
# Get the index reference from what follows the $idx_mark and
374
# remove it from the string.
375
s/$idxmark\002(.*?)\002/$idxmark/;
378
# include non- makeidx index-entries
379
foreach $key (keys %index) {
380
next if $printable_key{$key};
382
if ($key =~ s/###(.*)$//) {
383
next if $printable_key{$key};
385
$printable_key{$key} = $key;
386
if ($index{$old_key} =~ /HREF="([^"]*)"/i) {
388
$old_html =~ /$dd?([^#\Q$dd\E]*)#/;
390
} else { $old_html = '' }
391
$index{$key} = $index{$old_key} . $old_html."</A>\n | ";
394
@keys = sort makeidx_keysort @keys;
395
@keys = grep(!/\001/, @keys);
397
foreach $key (@keys) {
398
my ($keyref) = $key =~ /.*\002(.*)/;
399
next unless ($idx_ref eq $keyref); # KEC.
400
$index .= &add_idx_key($key);
403
print "$cnt Index Entries Added\n";
404
$index = '<DD>'.$index unless ($index =~ /^\s*<D(D|T)>/);
405
$index_number++; # KEC.
407
print "(compact version with Legend)";
408
local($num) = ( $index =~ s/\<D/<D/g );
410
s/$idx_mark/$preindex<HR><DL>\n$index\n<\/DL>$preindex/o;
412
s/$idx_mark/$preindex<HR><DL>\n$index\n<\/DL>/o;
415
s/$idx_mark/<DL COMPACT>\n$index\n<\/DL>/o; }
420
# KEC. Copied from latex2html.pl and modified to support multiple indices.
421
# The bibliography and the index should be treated as separate sections
422
# in their own HTML files. The \bibliography{} command acts as a sectioning command
423
# that has the desired effect. But when the bibliography is constructed
424
# manually using the thebibliography environment, or when using the
425
# theindex environment it is not possible to use the normal sectioning
426
# mechanism. This subroutine inserts a \bibliography{} or a dummy
427
# \textohtmlindex command just before the appropriate environments
428
# to force sectioning.
429
sub add_bbl_and_idx_dummy_commands {
430
local($id) = $global{'max_id'};
432
s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg;
433
## if ($bbl_cnt == 1) {
434
s/([\\]begin\s*$O\d+$C\s*thebibliography)/$id++; "\\bibliography$O$id$C$O$id$C $1"/geo;
436
$global{'max_id'} = $id;
437
# KEC. Modified to global substitution to place multiple index tokens.
438
s/[\\]begin\s*($O\d+$C)\s*theindex/\\textohtmlindex$1/go;
439
# KEC. Modified to pick up the optional argument to \printindex
440
s/[\\]printindex\s*(\[.*?\])?/
441
do { (defined $1) ? "\\textohtmlindex $1" : "\\textohtmlindex []"; } /ego;
442
&lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands);
445
# KEC. Copied from latex2html.pl and modified to support multiple indices.
446
# For each textohtmlindex mark found, determine the index titles and headers.
447
# We place the index ref in the header so the proper index can be generated later.
448
# For the default index, the index ref is blank.
450
# One problem is that this routine is called twice.. Once for processing the
451
# command as originally seen, and once for processing the command when
452
# doing the name for the index file. We can detect that by looking at the
453
# id numbers (or ref) surrounding the \theindex command, and not incrementing
454
# index_number unless a new id (or ref) is seen. This has the side effect of
455
# having to unconventionally start the index_number at -1. But it works.
457
# Gets the title from the list of indices.
458
# If this is the first index, save the title in $first_idx_file. This is what's referenced
459
# in the navigation buttons.
460
# Increment the index_number for next time.
461
# If the indexname command is defined or a newcommand defined for indexname, do it.
462
# Save the index TITLE in the toc
463
# Save the first_idx_file into the idxfile. This goes into the nav buttons.
464
# Build index_labels if needed.
465
# Create the index headings and put them in the output stream.
467
{ my $index_number = 0; # Will be incremented before use.
468
my $first_idx_file; # Static
469
my $no_increment = 0;
471
sub do_cmd_textohtmlindex {
473
my ($idxref,$idxnum,$index_name);
475
# We get called from make_name with the first argument = "\001noincrement". This is a sign
476
# to not increment $index_number the next time we are called. We get called twice, once
477
# my make_name and once by process_command. Unfortunately, make_name calls us just to set the name
478
# but doesn't use the result so we get called a second time by process_command. This works fine
479
# except for cases where there are multiple indices except if they aren't named, which is the case
480
# when the index is inserted by an include command in latex. In these cases we are only able to use
481
# the index number to decide which index to draw from, and we don't know how to increment that index
482
# number if we get called a variable number of times for the same index, as is the case between
483
# making html (one output file) and web (multiple output files) output formats.
484
if (/\001noincrement/) {
489
# Remove (but save) the index reference
490
s/^\s*\[(.*?)\]/{$idxref = $1; "";}/e;
492
# If we have an $idxref, the index name was specified. In this case, we have all the
493
# information we need to carry on. Otherwise, we need to get the idxref
494
# from the $index_number and set the name to "Index".
496
$index_name = $indices{'title'}{$idxref};
498
if (defined ($idxref = $indices{'newcmdorder'}->[$index_number])) {
499
$index_name = $indices{'title'}{$idxref};
502
$index_name = "Index";
506
$idx_title = "Index"; # The name displayed in the nav bar text.
508
# Only set $idxfile if we are at the first index. This will point the
509
# navigation panel to the first index file rather than the last.
510
$first_idx_file = $CURRENT_FILE if ($index_number == 0);
511
$idxfile = $first_idx_file; # Pointer for the Index button in the nav bar.
512
$toc_sec_title = $index_name; # Index link text in the toc.
513
$TITLE = $toc_sec_title; # Title for this index, from which its filename is built.
514
if (%index_labels) { &make_index_labels(); }
515
if (($SHORT_INDEX) && (%index_segment)) { &make_preindex(); }
516
else { $preindex = ''; }
517
local $idx_head = $section_headings{'textohtmlindex'};
518
local($heading) = join(''
519
, &make_section_heading($TITLE, $idx_head)
520
, $idx_mark, "\002", $idxref, "\002" );
521
local($pre,$post) = &minimize_open_tags($heading);
522
$index_number++ unless ($no_increment);
524
join('',"<BR>\n" , $pre, $_);
528
# Returns an index key, given the key passed as the first argument.
529
# Not modified for multiple indices.
532
local($index, $next);
533
if (($index{$key} eq '@' )&&(!($index_printed{$key}))) {
534
if ($SHORT_INDEX) { $index .= "<DD><BR>\n<DT>".&print_key."\n<DD>"; }
535
else { $index .= "<DT><DD><BR>\n<DT>".&print_key."\n<DD>"; }
536
} elsif (($index{$key})&&(!($index_printed{$key}))) {
538
$next = "<DD>".&print_key."\n : ". &print_idx_links;
540
$next = "<DT>".&print_key."\n<DD>". &print_idx_links;
542
$index .= $next."\n";
543
$index_printed{$key} = 1;
546
if ($sub_index{$key}) {
547
local($subkey, @subkeys, $subnext, $subindex);
548
@subkeys = sort(split("\004", $sub_index{$key}));
550
$index .= "<DD>".&print_key unless $index_printed{$key};
553
$index .= "<DT>".&print_key."\n<DD>" unless $index_printed{$key};
554
$index .= "<DL COMPACT>\n";
556
foreach $subkey (@subkeys) {
557
$index .= &add_sub_idx_key($subkey) unless ($index_printed{$subkey});
564
1; # Must be present as the last line.