1
# /=====================================================================\ #
2
# | LaTeXML::Global | #
3
# | Global constants, accessors and constructors | #
4
# |=====================================================================| #
5
# | Part of LaTeXML: | #
6
# | Public domain software, produced as part of work done by the | #
7
# | United States Government & not subject to copyright in the US. | #
8
# |---------------------------------------------------------------------| #
9
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
10
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
11
# \=========================================================ooo==U==ooo=/ #
13
#======================================================================
14
# This module collects all the commonly useful constants and constructors
15
# that other modules and package implementations are likely to need.
16
# This should be used in a context where presumably all the required
17
# LaTeXML modules that implement the various classes have already been loaded.
19
# Yes, a lot of stuff is exported, polluting your namespace.
20
# Thus, you use this module only if you _need_ the functionality!
21
#======================================================================
22
package LaTeXML::Global;
24
use LaTeXML::Common::XML;
27
use base qw(Exporter);
29
# Global STATE; This gets bound by LaTeXML.pm
32
qw( CC_ESCAPE CC_BEGIN CC_END CC_MATH
33
CC_ALIGN CC_EOL CC_PARAM CC_SUPER
34
CC_SUB CC_IGNORE CC_SPACE CC_LETTER
35
CC_OTHER CC_ACTIVE CC_COMMENT CC_INVALID
36
CC_CS CC_NOTEXPANDED ),
38
qw( &T_BEGIN &T_END &T_MATH &T_ALIGN &T_PARAM &T_SUB &T_SUPER &T_SPACE
39
&T_LETTER &T_OTHER &T_ACTIVE &T_COMMENT &T_CS
42
&Tokenize &TokenizeInternal &Explode &UnTeX
43
&StartSemiverbatim &EndSemiverbatim),
44
# Number & Dimension constructors
45
qw( &Number &Float &Dimension &MuDimension &Glue &MuGlue &Pair &PairList),
46
# Error & Progress reporting
47
qw( &NoteProgress &NoteBegin &NoteEnd &Fatal &Error &Warn &Info),
49
qw(&Stringify &ToString &Equals),
50
# And, anything exported from LaTeXML::Common::XML
51
@LaTeXML::Common::XML::EXPORT
54
#======================================================================
55
# Catcodes & Standard Token constructors.
56
# CC_whatever names the catcode numbers
57
# T_whatever creates a token with the corresponding catcode,
58
# some take a string argument, if they don't have a `standard' character.
60
use constant CC_ESCAPE => 0;
61
use constant CC_BEGIN => 1;
62
use constant CC_END => 2;
63
use constant CC_MATH => 3;
64
use constant CC_ALIGN => 4;
65
use constant CC_EOL => 5;
66
use constant CC_PARAM => 6;
67
use constant CC_SUPER => 7;
68
use constant CC_SUB => 8;
69
use constant CC_IGNORE => 9;
70
use constant CC_SPACE => 10;
71
use constant CC_LETTER => 11;
72
use constant CC_OTHER => 12;
73
use constant CC_ACTIVE => 13;
74
use constant CC_COMMENT => 14;
75
use constant CC_INVALID => 15;
76
# Extended Catcodes for expanded output.
77
use constant CC_CS => 16;
78
use constant CC_NOTEXPANDED => 17;
79
# Can use constants here; they should never be modified.
80
our $CONSTANT_T_BEGIN = bless ['{', 1], 'LaTeXML::Token';
81
our $CONSTANT_T_END = bless ['}', 2], 'LaTeXML::Token';
82
our $CONSTANT_T_MATH = bless ['$', 3], 'LaTeXML::Token';
83
our $CONSTANT_T_ALIGN = bless ['&', 4], 'LaTeXML::Token';
84
our $CONSTANT_T_PARAM = bless ['#', 6], 'LaTeXML::Token';
85
our $CONSTANT_T_SUPER = bless ['^', 7], 'LaTeXML::Token';
86
our $CONSTANT_T_SUB = bless ['_', 8], 'LaTeXML::Token';
87
our $CONSTANT_T_SPACE = bless [' ', 10], 'LaTeXML::Token';
88
our $CONSTANT_T_CR = bless ["\n", 10], 'LaTeXML::Token';
90
# Too bad we can't REALLY get inlining here...
91
sub T_BEGIN() { $CONSTANT_T_BEGIN; }
92
sub T_END() { $CONSTANT_T_END; }
93
sub T_MATH() { $CONSTANT_T_MATH; }
94
sub T_ALIGN() { $CONSTANT_T_ALIGN; }
95
sub T_PARAM() { $CONSTANT_T_PARAM; }
96
sub T_SUPER() { $CONSTANT_T_SUPER; }
97
sub T_SUB() { $CONSTANT_T_SUB; }
98
sub T_SPACE() { $CONSTANT_T_SPACE; }
99
sub T_CR() { $CONSTANT_T_CR; }
100
sub T_LETTER { bless [$_[0],11], 'LaTeXML::Token'; }
101
sub T_OTHER { bless [$_[0],12], 'LaTeXML::Token'; }
102
sub T_ACTIVE { bless [$_[0],13], 'LaTeXML::Token'; }
103
sub T_COMMENT { bless ['%'.($_[0]||''),14], 'LaTeXML::Token'; }
104
sub T_CS { bless [$_[0],16], 'LaTeXML::Token'; }
108
bless [$string,(defined $cc ? $cc : CC_OTHER)], 'LaTeXML::Token'; }
110
#======================================================================
111
# These belong to Mouth, but make more sense here.
113
# WARNING: These two utilities bind $STATE to simple State objects with known fixed catcodes.
114
# The State normally contains ALL the bindings, etc and links to other important objects.
115
# We CAN do that here, since we are ONLY tokenizing from a new Mouth, bypassing stomach & gullet.
116
# However, be careful with any changes.
121
# Tokenize($string); Tokenizes the string using the standard cattable, returning a LaTeXML::Tokens
124
$STD_CATTABLE = LaTeXML::State->new(catcodes=>'standard') unless $STD_CATTABLE;
125
local $LaTeXML::STATE = $STD_CATTABLE;
126
LaTeXML::Mouth->new($string)->readTokens; }
128
# TokenizeInternal($string); Tokenizes the string using the internal cattable, returning a LaTeXML::Tokens
129
sub TokenizeInternal {
131
$STY_CATTABLE = LaTeXML::State->new(catcodes=>'style') unless $STY_CATTABLE;
132
local $LaTeXML::STATE = $STY_CATTABLE;
133
LaTeXML::Mouth->new($string)->readTokens; }
135
sub StartSemiverbatim() {
136
$LaTeXML::STATE->pushFrame;
138
map($LaTeXML::STATE->assignCatcode($_=>CC_OTHER,'local'),'^','_','@','~','&','$','#','%',"'",' ');
139
$LaTeXML::STATE->assignCatcode('math:\''=>0,'local');
142
sub EndSemiverbatim() { $LaTeXML::STATE->popFrame; }
144
#======================================================================
145
# Token List constructors.
147
# Return a LaTeXML::Tokens made from the arguments (tokens)
150
# Flatten any Tokens to Token's
151
@tokens = map( ( (((ref $_)||'') eq 'LaTeXML::Tokens') ? $_->unlist : $_), @tokens);
152
# And complain about any remaining Non-Token's
153
map( ((ref $_) && $_->isaToken)|| Fatal(":misdefined:<unknown> Expected Token, got ".Stringify($_)), @tokens);
154
LaTeXML::Tokens->new(@tokens); }
156
# Explode a string into a list of tokens w/catcode OTHER (except space).
159
map(($_ eq ' ' ? T_SPACE() : T_OTHER($_)),split('',$string)); }
163
(defined $thing ? ToString(Tokens(ref $thing ? $thing->revert : Explode($thing))) : undef); }
165
#======================================================================
166
# Constructors for number and dimension types.
168
sub Number { LaTeXML::Number->new(@_); }
169
sub Float { LaTeXML::Float->new(@_); }
170
sub Dimension { LaTeXML::Dimension->new(@_); }
171
sub MuDimension { LaTeXML::MuDimension->new(@_); }
172
sub Glue { LaTeXML::Glue->new(@_); }
173
sub MuGlue { LaTeXML::MuGlue->new(@_); }
174
sub Pair { LaTeXML::Pair->new(@_); }
175
sub PairList { LaTeXML::PairList->new(@_); }
176
#**********************************************************************
177
# Error & Progress reporting.
181
print STDERR @_ if $LaTeXML::Global::STATE->lookupValue('VERBOSITY') >= 0;
187
$note_timers{$state}=[Time::HiRes::gettimeofday];
188
print STDERR "\n($state..." if $LaTeXML::Global::STATE->lookupValue('VERBOSITY') >= 0; }
192
if(my $start = $note_timers{$state}){
193
my $elapsed = Time::HiRes::tv_interval($start,[Time::HiRes::gettimeofday]);
194
undef $note_timers{$state};
195
print STDERR sprintf(" %.2f sec)",$elapsed) if $LaTeXML::Global::STATE->lookupValue('VERBOSITY') >= 0; }}
199
if(!$LaTeXML::Error::InHandler && defined($^S)){
200
$LaTeXML::Global::STATE->noteStatus('fatal');
202
= LaTeXML::Error::generateMessage("Fatal",$message,1,
203
($LaTeXML::Global::STATE->lookupValue('VERBOSITY') > 0
204
? ("Stack Trace:",LaTeXML::Error::stacktrace()):()));
206
local $LaTeXML::Error::InHandler=1;
210
# Note that "100" is hardwired into TeX, The Program!!!
213
# Should be fatal if strict is set, else warn.
216
if($LaTeXML::Global::STATE->lookupValue('STRICT')){
219
$LaTeXML::Global::STATE->noteStatus('error');
220
print STDERR LaTeXML::Error::generateMessage("Error",$msg,1,"Continuing... Expect trouble.\n")
221
unless $LaTeXML::Global::STATE->lookupValue('VERBOSITY') < -2; }
222
if(($LaTeXML::Global::STATE->getStatus('error')||0) > $MAXERRORS){
223
Fatal(":too_many:$MAXERRORS Too many errors!"); }
226
# Warning message; results may be OK, but somewhat unlikely
229
$LaTeXML::Global::STATE->noteStatus('warning');
230
print STDERR LaTeXML::Error::generateMessage("Warning",$msg,0)
231
unless $LaTeXML::Global::STATE->lookupValue('VERBOSITY') < -1;
234
# Informational message; results likely unaffected
235
# but the message may give clues about subsequent warnings or errors
238
$LaTeXML::Global::STATE->noteStatus('info');
239
print STDERR LaTeXML::Error::generateMessage("Info",$msg,0)
240
unless $LaTeXML::Global::STATE->lookupValue('VERBOSITY') < -1;
243
#**********************************************************************
245
our %NOBLESS= map(($_=>1), qw( SCALAR HASH ARRAY CODE REF GLOB LVALUE));
249
if(!defined $object){ 'undef'; }
250
elsif(!ref $object){ $object; }
251
elsif($NOBLESS{ref $object}){ "$object"; }
252
elsif($object->can('stringify')){ $object->stringify; }
253
# Have to handle LibXML stuff explicitly (unless we want to add methods...?)
254
elsif($object->isa('XML::LibXML::Node')){
255
if($object->nodeType == XML_ELEMENT_NODE){
256
my $tag = $LaTeXML::Global::STATE->getModel->getNodeQName($object);
258
foreach my $attr ($object->attributes){
259
my $name = $attr->nodeName;
260
next if $name =~ /^_/;
261
my $val = $attr->getData;
262
$val = substr($val,0,30)."..." if length($val)>35;
263
$attributes .= ' '. $name. "=\"".$val."\""; }
264
"<".$tag.$attributes. ($object->hasChildNodes ? ">..." : "/>");
266
elsif($object->nodeType == XML_TEXT_NODE){
267
"XMLText[".$object->data."]"; }
268
elsif($object->nodeType == XML_DOCUMENT_NODE){
269
"XMLDocument[".$$object."]"; }
275
(defined $object ? (((ref $object) && !$NOBLESS{ref $object}) ? $object->toString : "$object"):''); }
277
# Just how deep of an equality test should this be?
280
return 1 if !(defined $a) && !(defined $b); # both undefined, equal, I guess
281
return 0 unless (defined $a) && (defined $b); # else both must be defined
282
my $refa = (ref $a) || '_notype_';
283
my $refb = (ref $b) || '_notype_';
284
return 0 if $refa ne $refb; # same type?
285
return $a eq $b if ($refa eq '_notype_') || $NOBLESS{$refa}; # Deep comparison of builtins?
286
return 1 if $a->equals($b); # semi-shallow comparison?
287
# Special cases? (should be methods, but that embeds State knowledge too low)
288
if($refa eq 'LaTeXML::Token'){ # Check if they've been \let to the same defn.
289
my $defa = $LaTeXML::Global::STATE->lookupDefinition($a);
290
my $defb = $LaTeXML::Global::STATE->lookupDefinition($b);
291
return $defa && $defb && ($defa eq $defb); }
294
# && ( ((ref $a) && (ref $b) && ((ref $a) eq (ref $b)) && !$NOBLESS{ref $a})
295
# ? $a->equals($b) : ($a eq $b)); }
297
#**********************************************************************
306
C<LaTeXML::Global> - global exports used within LaTeXML, and in Packages.
314
This module exports the various constants and constructors that are useful
315
throughout LaTeXML, and in Package implementations.
323
This is bound to the currently active L<LaTeXML::State> by an instance
324
of L<LaTeXML> during processing.
332
=item C<< $catcode = CC_ESCAPE; >>
334
Constants for the category codes:
336
CC_BEGIN, CC_END, CC_MATH, CC_ALIGN, CC_EOL,
337
CC_PARAM, CC_SUPER, CC_SUB, CC_IGNORE,
338
CC_SPACE, CC_LETTER, CC_OTHER, CC_ACTIVE,
339
CC_COMMENT, CC_INVALID, CC_CS, CC_NOTEXPANDED.
341
[The last 2 are (apparent) extensions,
342
with catcodes 16 and 17, respectively].
344
=item C<< $token = Token($string,$cc); >>
346
Creates a L<LaTeXML::Token> with the given content and catcode.
347
The following shorthand versions are also exported for convenience:
349
T_BEGIN, T_END, T_MATH, T_ALIGN, T_PARAM,
350
T_SUB, T_SUPER, T_SPACE, T_LETTER($letter),
351
T_OTHER($char), T_ACTIVE($char),
352
T_COMMENT($comment), T_CS($cs)
354
=item C<< $tokens = Tokens(@token); >>
356
Creates a L<LaTeXML::Tokens> from a list of L<LaTeXML::Token>'s
358
=item C<< $tokens = Tokenize($string); >>
360
Tokenizes the C<$string> according to the standard cattable, returning a L<LaTeXML::Tokens>.
362
=item C<< $tokens = TokenizeInternal($string); >>
364
Tokenizes the C<$string> according to the internal cattable (where @ is a letter),
365
returning a L<LaTeXML::Tokens>.
367
=item C<< @tokens = Explode($string); >>
369
Returns a list of the tokens corresponding to the characters in C<$string>.
371
=item C<< StartSemiVerbatim(); ... ; EndSemiVerbatim(); >>
373
Desable disable most TeX catcodes.
381
=item C<< $number = Number($num); >>
383
Creates a Number object representing C<$num>.
385
=item C<< $number = Float($num); >>
387
Creates a floating point object representing C<$num>;
388
This is not part of TeX, but useful.
390
=item C<< $dimension = Dimension($dim); >>
392
Creates a Dimension object. C<$num> can be a string with the number and units
393
(with any of the usual TeX recognized units), or just a number standing for
396
=item C<< $mudimension = MuDimension($dim); >>
398
Creates a MuDimension object; similar to Dimension.
400
=item C<< $glue = Glue($gluespec); >>
402
=item C<< $glue = Glue($sp,$plus,$pfill,$minus,$mfill); >>
404
Creates a Glue object. C<$gluespec> can be a string in the
405
form that TeX recognizes (number units optional plus and minus parts).
406
Alternatively, the dimension, plus and minus parts can be given separately:
407
C<$pfill> and C<$mfill> are 0 (when the C<$plus> or C<$minus> part is in sp)
408
or 1,2,3 for fil, fill or filll.
410
=item C<< $glue = MuGlue($gluespec); >>
412
=item C<< $glue = MuGlue($sp,$plus,$pfill,$minus,$mfill); >>
414
Creates a MuGlue object, similar to Glue.
417
=item C<< $pair = Pair($num1,$num2); >>
419
Creates an object representing a pair of numbers;
420
Not a part of TeX, but useful for graphical objects.
421
The two components can be any numerical object.
423
=item C<< $pair = PairList(@pairs); >>
425
Creates an object representing a list of pairs of numbers;
426
Not a part of TeX, but useful for graphical objects.
430
=head2 Error Reporting
434
=item C<< Fatal($message); >>
436
Signals an fatal error, printing C<$message> along with some context.
437
In verbose mode a stack trace is printed.
439
=item C<< Error($message); >>
441
Signals an error, printing C<$message> along with some context.
442
If in strict mode, this is the same as Fatal().
443
Otherwise, it attempts to continue processing..
445
=item C<< Warn($message); >>
447
Prints a warning message along with a short indicator of
448
the input context, unless verbosity is quiet.
450
=item C<< NoteProgress($message); >>
452
Prints C<$message> unless the verbosity level below 0.
456
=head2 Generic functions
460
=item C<< Stringify($object); >>
462
Returns a short string identifying C<$object>, for debugging.
463
Works on any values and objects, but invokes the stringify method on
465
More informative than the default perl conversion to a string.
467
=item C<< ToString($object); >>
469
Converts C<$object> to string; most useful for Tokens or Boxes where the
470
string content is desired. Works on any values and objects, but invokes
471
the toString method on blessed objects.
473
=item C<< Equals($a,$b); >>
475
Compares the two objects for equality. Works on any values and objects,
476
but invokes the equals method on blessed objects, which does a
477
deep comparison of the two objects.
483
Bruce Miller <bruce.miller@nist.gov>
487
Public domain software, produced as part of work done by the
488
United States Government & not subject to copyright in the US.