2
# $Id: exceltex 155 2006-04-30 11:25:16Z pez $
4
# helper script for the exceltex latex package. reads contents of
5
# M$ Excel files using the Spreadsheet::ParseExcel perl module.
7
# (c) 2004-2006 by Hans-Peter Doerr <doerr@cip.physik.uni-freiburg.de>
9
# exceltex is free software. you can redistribute or modify it under
10
# the terms of the GNU GENERAL PUBLIC LICENSE Version 2. See COPYING for
15
my $VERSION = "0.5.1";
18
# this is the default encoding for all files written by exceltex
19
my $ENCODING = "latin1";
23
{ my $i = 1; for ('A' .. 'Z') { $L2N{$_} = $i; ++$i } }
26
{ for ('A' .. 'Z') { push(@N2L, $_) } }
29
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
30
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
33
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
41
print STDERR "DEBUG: $msg\n";
46
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
52
return undef if (! ($cell =~ m/^([A-Z]+)([0-9]+)$/));
54
# return (column, row)
55
return (col2x($1), $2-1);
59
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
64
# is is likely to be slow as hell
65
return undef if (! ($x =~ m/^[0-9]+$/g && $y =~ m/^[0-9]+$/g));
68
return x2col($x) . "$y";
72
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
82
if ($len == 1) { $n = $L2N{$l} - 1 }
83
if ($len == 2) { $n = 26 * $L2N{$S[0]} + $L2N{$S[1]} - 1 }
89
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
94
return undef if (! $n =~ m/[0-9]+/);
96
if ($n > 25) { $l = $N2L[int($n/26)-1] . $N2L[$n % 26] }
97
else { $l = $N2L[$n] }
106
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
107
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
114
my $Workbooks = {}; # workbook cache
115
my $nItems = 0; # number of processed items
116
my $nCells = 0; # number of processed cells
117
my $nTabs = 0; # number of processed tabs
118
my $Warnings = 0; # number of warnings
120
my $Jobname = ""; # jobname of .tex file
121
my $inputDir = ""; # data goes here
125
my $optFormat = 1; # use formatting?
126
my $optColor = 1; # use colors?
127
my $optComma = 0; # use comma for decimals
128
my $optFmtSci = 0; # reformat scientific numbers?
129
my $ignWarn = 0; # ignore warnings?
130
my $euroSymbol = "EUR"; # use this for the EUR currency symbol
132
my $cleanup = 0; # cleanup inputDir
134
my $EXTNS = "excltx";
136
# read global and local config files
137
# will be overwritten by commandline options
138
readConfigFile("$ENV{HOME}/.exceltexrc") if (-f "$ENV{HOME}/.exceltexrc");
139
readConfigFile(".exceltexrc") if (-f ".exceltexrc");
141
# handle commandline switches
142
GetOptions('format!' => \$optFormat,
143
'color!' => \$optColor,
144
'comma!' => \$optComma,
145
'reformat-sn!' => \$optFmtSci,
146
'p|plain' => sub{$optFormat=0; $optColor=0; $optFmtSci=0;},
147
'd|debug' => \$DEBUG,
148
'c|cleanup' => \$cleanup,
149
'e|encoding=s' => \$ENCODING,
150
'o|euro-symbol=s' => \$euroSymbol,
151
'h|help' => sub{ usage(); exit 0;},
152
'v|version' => sub{ print "exceltex, version $VERSION\n"; exit 0},
153
'w|ignore-warnings' => \$ignWarn,
163
# encoding of output files
164
$ENCODING = lc($ENCODING);
165
if (! ($ENCODING eq "latin1" ||
166
$ENCODING eq "iso-8859-1" ||
167
$ENCODING eq "latin9" ||
168
$ENCODING eq "iso-8859-15" ||
169
$ENCODING eq "utf8"))
171
print STDERR "Unsupported encoding: $ENCODING\n";
172
print STDERR "currently supported: iso-8859-1 (latin1), iso-8859-15 "
173
. "(latin9), utf8\n";
177
print "exceltex helper script, version $VERSION\n";
180
# get jobname from first argument
181
if ($ARGV[0] =~ m/(\w+)\.tex/g) {
184
else { $Jobname = $ARGV[0]; }
186
if (! defined($Jobname))
188
print STDERR "can't determine jobname.\n";
192
util::DEBUG("jobname is: $Jobname\n");
195
# data from spreadsheet goes here
196
$inputDir = $Jobname . '-' . $EXTNS;
197
$indexFile = $Jobname . '.' . $EXTNS;
208
print STDERR "can't read index '$indexFile: $!.\n";
209
print STDERR "run latex first to create the index.\n";
216
if (! mkdir $inputDir)
218
print STDERR "can't create data directory '$inputDir': $!\n";
225
processIndex($indexFile);
231
print STDERR "exceltex finished with $nItems items ($nCells cells "
232
. "and $nTabs tabulars)\n";
233
print STDERR "*** not all items proccessed fine, there were "
234
. "$Warnings Warnings\n";
235
exit 0 if ($ignWarn);
240
print "exceltex successfully finished with $nItems items ($nCells cells "
241
. "and $nTabs tabulars)\n";
248
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
251
print STDERR "aborting.\n";
255
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
259
exceltex version $VERSION
260
usage: exceltex [options] file[.tex]
262
-h|--help show this help
263
-v|--version show program version and exit
264
-c|--cleanup remove temporary files created by previous runs
265
-w|--ignore-warnings exit with status zero, even on warnings
266
-o|--euro-symbol=sym use sym for displaying the euro currency symbol [EUR]
267
-e|--encoding=enc set encoding to enc. Currently supported
268
encodings are: latin1, latin9, utf8 [latin1]
269
--[no]reformat-sn (dont) reformat scientific numbers to A X 10^B notation
270
--[no]comma (dont) use comma for decimal numbers
271
--[no]format (dont) use formatting
272
--[no]nocolor (dont) use colors
273
-p|--plain shorthand for --noformat --nocolor --noreformat-sn
278
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
283
open(I, "<$file") || return;
287
my ($key, $val) = split('=', $_);
289
if ($key eq "encoding") { $ENCODING = $val }
290
elsif ($key eq "reformat-sn") { $optFmtSci = $val }
291
elsif ($key eq "color") { $optColor = $val }
292
elsif ($key eq "format") { $optFormat = $val }
293
elsif ($key eq "comma") { $optComma = $val }
294
elsif ($key eq "euro-symbol") { $euroSymbol = $val }
295
elsif ($key eq "plain") { $optColor=0; $optFormat=0}
296
else { print STDERR "unsupported config option in '$file': "
297
. "$key, ignoring\n" }
304
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
309
print "removing $indexFile\n";
312
return if (! defined($inputDir));
313
return if (! -d $inputDir);
315
print "cleaning up $inputDir/\n";
316
unlink <$inputDir/c-*>;
317
unlink <$inputDir/t-*>;
322
# read index file, extract data, write to files
323
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
328
if (! open(I, "< $index"))
330
print STDERR "can't read index '$index': $!. Run latex first?\n";
339
# if cellrefs is switched on, we store data in files with name
340
# inputDir/[c,t]-file!sheet!cell
341
if ($. == 1 && $_ eq ";cellrefs")
343
print "using cell referencing by name\n";
347
# ignore lines beginning with ;
348
next if ($_ =~ m/^;/);
355
if ($cellrefs == 1) {($type, $source, @flags) = split(':', $_)}
356
else {($type, $idx, $source, @flags) = split(':', $_)}
365
$ok = 1 if ($_ eq 'plain');
367
print STDERR "Index '$indexFile' corrupt at line $.: bad flag\n";
371
if (! defined($source))
373
print STDERR "Index '$indexFile' corrupt at line $.\n";
377
my ($f, $s, $c1, $c2) = parseSource($source, $type);
380
print STDERR "Index '$indexFile' corrupt at line $.\n";
387
$string = getTabString($f, $s, $c1, $c2);
391
$string = getCellString($f, $s, $c1);
395
print STDERR "Index '$indexFile' corrupt at line $.: "
396
. "unknown $type '$type'\n";
400
# no need to write empty data
401
next if (! defined($string));
403
$string = decode_utf8($string, 0);
404
$string = encode($ENCODING, $string, 0);
406
if (! writeBuf($type, $idx, $string, $source))
408
print STDERR "can't write cell data: $!.\n";
418
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
421
my $file = $Jobname . ".xls";
422
return $file if (-f $file);
426
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
429
my ($string, $type) = @_;
431
my @tokens = split('!', $string);
452
else { return undef; }
470
else { return undef; }
472
else { return (undef, undef, undef, undef); }
474
return ($file, $sheet, $cel1, $cel2);
478
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
481
my ($type, $idx, $string, $source) = @_;
482
return 1 if ($string eq "");
486
if (! defined($idx)) { $wfile = $inputDir . "/" . $type . "-" . $source }
487
else { $wfile = $inputDir . "/" . $type . "-" . $idx }
489
return 0 if (! open (O, ">$wfile"));
490
return 0 if (! print O $string);
491
return 0 if (! close(O));
496
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
502
print STDERR "can't read '$file': it does not exists.\n";
506
if (! defined($Workbooks->{$file}))
509
if ($file =~ m/^\w+\.(\w+)$/g) { $type = $1 };
513
print("reading '$file'\n");
514
# readExcel->new() will exit, if anything bad happens
515
$Workbooks->{$file} = readExcel->new($file);
519
return $Workbooks->{$file};
523
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
528
my $MATH = 0; # wether to enclose string in $$ for math mode
529
my $string = $cel->{value};
531
return "" if (! defined($string) || $string eq "");
533
# escape latex control characters
534
$string =~ s/\{/\\\{/g;
535
$string =~ s/\}/\\\}/g;
536
# escape backslash if not followed by { or }
537
$string =~ s/\\(?!({|}))/\\ensuremath\{\\backslash\}/g;
538
$string =~ s/#/\\#/g;
539
$string =~ s/\$/\\\$/g;
540
$string =~ s/%/\\%/g;
541
$string =~ s/&/\\&/g;
542
$string =~ s/~/\\~/g;
543
$string =~ s/_/\\_/g;
544
$string =~ s/\^/\{\\verb ^ \}/g;
546
# FIXME: needs more testing!
547
# hack for EUR sign, works for me but is definetly ugly :(
548
# will need some information on how exactly this
549
# user-entered formatting stuff works (in excel)
550
$string =~ s/\[.+\xac-{0,1}\d{1,3}\]/$euroSymbol/g; # EUR in currency cells
551
$string =~ s/\[.+EUR\]/$euroSymbol/g; # "
552
$string =~ s/.{1,1}\xac/$euroSymbol/g; # single EUR sign
555
# use decimal comma if requested
556
$string =~ s/(\d)\.(\d)/$1,$2/g if ($optComma == 1);
558
# reformat scientific numbers if requested
561
$MATH = 1 if ($string =~ s/([0-9]+)[eE]([+-][0-9]+)/
562
simplifyScientificNumber($1, $2)/ge);
565
# apply cell formatting and colors if not requested otherweise
566
$string = applyFormatting($string, $cel, $MATH) if($optFormat == 1);
568
# same goes for colors
569
$string = applyColor($string, $cel) if ($optColor == 1);
571
util::DEBUG " texified => '$string'";
573
return "\$$string\$" if ($MATH);
578
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
581
my ($s, $c, $m) = @_;
584
if ($c->{bold} && $m) { $s = "\\mathbf{$s}"; }
585
elsif ($c->{bold}) { $s = "\\textbf{$s}"; }
588
if ($c->{italic} && $m) { $s = "\\mathit{$s}"; }
589
elsif ($c->{italic}) { $s = "\\textit{$s}"; }
591
# underlined? striked out?
592
$s = "\\uline{$s}" if ($c->{underline});
593
$s = "\\sout{$s}" if ($c->{strikeout});
599
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
604
# only evaluate color if it is not black anyway
605
if ($c->{color} != 8)
607
# convert rrggbb hex color triplet to float rgb
608
$s = "\\textcolor[rgb]{" . colormap($c->{color}) . "}{$s}";
614
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
619
util::DEBUG(" color => $color\n");
621
# get rgb triplet as floats
622
return join(', ', map {hex($_)/255.0}
623
unpack('a2a2a2', Spreadsheet::ParseExcel->ColorIdxToRGB($color)));
628
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
629
sub simplifyScientificNumber
634
my $s = substr($e, 0, 1);
635
$s = '' if ($s eq "+");
637
# remove signum & trailing zeros
639
$e =~ s/^0*(\d+)/$1/;
641
return $f if ($e == 0);
642
return sprintf("%s \\times 10^{%s%s}", $f, $s, $e);
646
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
652
print STDERR "warning: " . $msg . "\n";
657
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
660
my ($file, $sheet, $cell) = @_;
662
util::DEBUG("get $file -> $sheet -> $cell\n");
664
my $w = getWorkbook($file);
665
return undef if (! defined($w));
667
my ($x, $y) = util::cell2xy($cell);
668
if (! defined($x) || ! defined($y)) {
669
warning("cell '$cell' is invalid\n");
673
my $c = $w->getCell($sheet, $x, $y);
674
warning($w->error()) if (! defined($c->{value}));
678
return texifyCell($c);
682
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
685
my ($file, $sheet, $cell1, $cell2) = @_;
687
# cell1 is upper-left and cell2 is lower-right corner
688
# of the selected cell-range
689
my ($lfx, $upy) = util::cell2xy($cell1);
690
my ($rtx, $loy) = util::cell2xy($cell2);
694
warning "cell '$cell1' is invalid\n";
699
warning "cell '$cell2' is invlaid\n";
703
my $book = getWorkbook($file);
708
for (my $y = $upy; $y <= $loy; ++$y)
710
for (my $x = $lfx; $x <= $rtx; ++$x)
712
my $c = $book->getCell($sheet, $x, $y);
714
warning($book->error()) if (! defined($c));
715
$empty = 0 if(defined($c->{value}));
716
$buf .= texifyCell($c);
717
$buf .= " & " if ($x < $rtx);
722
warning("table $cell1!$cell2 is empty\n") if ($empty);
733
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
734
#XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
738
use Spreadsheet::ParseExcel;
741
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
744
my ($class, $file) = @_;
751
print STDERR "can't read '$file', it does not exists.\n";
756
Spreadsheet::ParseExcel::Workbook->Parse($file);
758
if (! defined($self->{book}))
760
print STDERR "SpreadSheet::ParseExcel Error: "
761
. "can't parse '$file' : $!\n";
765
$self->{file} = $file;
772
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
775
my ($self, $sheet, $x, $y) = @_;
777
my $ws = $self->{book}->Worksheet($sheet);
780
$self->_setError("sheet '$sheet' does not exist");
784
my $cell = $ws->Cell($y, $x);
788
$self->_setError("cell '$sheet!" . util::xy2cell($x, $y) . "' is emtpy "
789
. "or out of range");
791
return { value => undef }
794
# we're internally operating with utf8, for now
795
my $value = encode_utf8($cell->Value());
796
util::DEBUG " value => '$value'";
798
# return hash containing value and formatting
799
return { value => $value,
800
bold => $cell->{Format}->{Font}->{Bold},
801
italic => $cell->{Format}->{Font}->{Italic},
802
color => $cell->{Format}->{Font}->{Color},
803
underline => $cell->{Format}->{Font}->{Underline},
804
strikeout => $cell->{Format}->{Font}->{Strikeout}
809
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
812
my ($self, $err) = @_;
813
$self->{ERROR} = $err;
817
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
821
my $err = $self->{ERROR};
822
$self->{ERROR} = undef;