2
# Finds potential problems in tex files, and issues warnings to the console
3
# about what it finds. Takes a list of files as its only arguments,
4
# and does checks on all the files listed. The assumption is that these are
5
# valid (or close to valid) LaTeX files. It follows \include statements
6
# recursively to pick up any included tex files.
10
# Currently the following checks are made:
12
# -- Multiple hyphens not inside a verbatim environment (or \verb). These
13
# should be placed inside a \verb{} contruct so they will not be converted
14
# to single hyphen by latex and latex2html.
17
# Original creation 3-8-05 by Karl Cunningham karlc -at- keckec -dot- com
23
# The following builds the test string to identify and change multiple
24
# hyphens in the tex files. Several constructs are identified but only
25
# multiple hyphens are changed; the others are fed to the output
27
my $b = '\\\\begin\\*?\\s*\\{\\s*'; # \begin{
28
my $e = '\\\\end\\*?\\s*\\{\\s*'; # \end{
29
my $c = '\\s*\\}'; # closing curly brace
31
# This captures entire verbatim environments. These are passed to the output
33
my $verbatimenv = $b . "verbatim" . $c . ".*?" . $e . "verbatim" . $c;
35
# This captures \verb{..{ constructs. They are passed to the output unchanged.
36
my $verb = '\\\\verb\\*?(.).*?\\1';
38
# This captures multiple hyphens with a leading and trailing space. These are not changed.
39
my $hyphsp = '\\s\\-{2,}\\s';
41
# This identifies other multiple hyphens.
42
my $hyphens = '\\-{2,}';
44
# This identifies \hyperpage{..} commands, which should be ignored.
45
my $hyperpage = '\\\\hyperpage\\*?\\{.*?\\}';
47
# This builds the actual test string from the above strings.
48
#my $teststr = "$verbatimenv|$verb|$tocentry|$hyphens";
49
my $teststr = "$verbatimenv|$verb|$hyphsp|$hyperpage|$hyphens";
53
# Get a list of include files from the top-level tex file. The first
54
# argument is a pointer to the list of files found. The rest of the
55
# arguments is a list of filenames to check for includes.
57
my ($fileline,$includefile,$includes);
59
while (my $filename = shift) {
60
# Get a list of all the html files in the directory.
61
open my $if,"<$filename" or die "Cannot open input file $filename\n";
67
# If a file is found in an include, process it.
68
if (($includefile) = /\\include\s*\{(.*?)\}/) {
70
# Append .tex to the filename
71
$includefile .= '.tex';
73
# If the include file has already been processed, issue a warning
74
# and don't do it again.
77
if ($_ eq $includefile) {
83
print "$includefile found at line $fileline in $filename was previously included\n";
85
# The file has not been previously found. Save it and
86
# recursively process it.
87
push (@$files,$includefile);
88
get_includes($files,$includefile);
99
my ($filedata,$this,$linecnt,$before);
101
# Build the test string to check for the various environments.
102
# We only do the conversion if the multiple hyphens are outside of a
103
# verbatim environment (either \begin{verbatim}...\end{verbatim} or
104
# \verb{--}). Capture those environments and pass them to the output
107
foreach my $file (@files) {
108
# Open the file and load the whole thing into $filedata. A bit wasteful but
109
# easier to deal with, and we don't have a problem with speed here.
111
open IF,"<$file" or die "Cannot open input file $file";
117
# Set up to process the file data.
120
# Go through the file data from beginning to end. For each match, save what
121
# came before it and what matched. $filedata now becomes only what came
123
# Chech the match to see if it starts with a multiple-hyphen. If so
124
# warn the user. Keep track of line numbers so they can be output
125
# with the warning message.
126
while ($filedata =~ /$teststr/os) {
130
$linecnt += $before =~ tr/\n/\n/;
132
# Check if the multiple hyphen is present outside of one of the
133
# acceptable constructs.
134
if ($this =~ /^\-+/) {
135
print "Possible unwanted multiple hyphen found in line ",
136
"$linecnt of file $file\n";
138
$linecnt += $this =~ tr/\n/\n/;
142
##################################################################
144
##################################################################
148
# Examine the file pointed to by the first argument to get a list of
150
get_includes(\@includes,@ARGV);
152
check_hyphens(@includes);