1
#!/usr/local/bin/perl -w
2
use strict; # we at least try to ;)
5
# This file is part of the wvWare 2 project
6
# Copyright (C) 2001-2003 Werner Trobin <trobin@kde.org>
8
# This library is free software; you can redistribute it and/or
9
# modify it under the terms of the GNU Library General Public
10
# License version 2 as published by the Free Software Foundation.
12
# This library is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
# Library General Public License for more details.
17
# You should have received a copy of the GNU Library General Public License
18
# along with this library; see the file COPYING.LIB. If not, write to
19
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20
# Boston, MA 02111-1307, USA.
22
# A script to generate code which converts Word95 structures to Word97 ones
23
# as good as possible.
24
# If you add a convert comment to the Word 6 HTML you can "losen" the
26
# - convert="string(fieldname)" converts between U8[] and XCHAR[]
27
# In case you want to limit the string size just write
28
# "string(fieldname:XY)" where XY is the length to copy.
29
# - convert="type" losens the type restrictions and simply tries to assign
30
# even if the types are not exactly the same (U32 <- U16,...)
31
# - convert="(fieldname)" relates the fieldnames and losens the type
33
# - convert="unused" skips this field
35
###############################################################################
36
# To discuss with Shaheed:
37
# - CHP::chse - I think we should map that to the Word97 CHP::fMacChs (70)
38
# - I disabled (unused) DOP::fReadOnlyRecommended and DOP::fWriteReservation
39
# as this should normally go into the Word97 FIB, but well, I doubt we need
41
# - I mapped the things like cpnBtePap to the "active" Word97 structures
42
# instead of mapping it to the blah_W6 compatibility ones.
43
# - PAP::fAutoHyph -> ? Maybe it's Word97::PAP::fAutoWrap?
44
# - PAP::rgdxaTab, PAP::rgtbd?
45
# - PGD: We should create a PGD2 structure for the 2nd table and have
46
# conversion function like for PRM -> PRM2. The we can map Word95::PGD to
47
# the Word97::PGD2. For now I disabled the structure.
48
# - PHE: What to do with the Height/Line field? Have a union for them?
49
###############################################################################
51
# This structure holds one "variable"
53
name => '$', # The name of this variable
54
type => '$', # The type (e.g. U16, S32[42],...)
55
bits => '$', # The amount of bits (e.g. 3), if any
56
comment => '$', # The comment for this variable
57
initial => '$', # The initial value of this field, if any
58
len => '$', # If the item is a dynamic array we store its length
59
# here. length can be a plain C++ expression.
60
compareSizeLHS => '$', # If the item is a dynamic array we need to compare the
61
# left-hand-side (lhs) and the rhs in their size. This
62
# is a plain C++ expression returning the size of the LHS.
63
compareSizeRHS => '$', # If the item is a dynamic array we need to compare the
64
# left-hand-side (lhs) and the rhs in their size. This
65
# is a plain C++ expression returning the size of the RHS.
66
startNew => '$', # This field is used for debugging purposes. It
67
# is set to 1 if this variable should start a new
68
# bitfield (and close the last one). We simply
69
# check whether we filled the last field completely here
70
matched => '$', # This field is used to indicate that this item already was "matched"
71
convert => '$', # The conversion options - if any
75
name => '$', # The name of the structure
76
comment => '$', # The comment for this struct
77
items => '@', # All the data members
78
hidden => '$', # Set to "//" if we want to comment that structure out
79
dynamic => '$', # Do we have dynamic memory? Then we need a Copy CTOR,
80
# DTOR, assignment op, op==,...
84
# This array of strings contains the whole HTML
85
# documentation file. It's used twice when reading the spec in
86
# All the parsing subs will read/modify that global array
87
# Note: All the tags we use are already converted to
91
# The current index in the document-array (used during parsing)
94
# These arrays hold all the structures we want to write out
98
# The current struct we're working on (only used during parsing)
100
# The current item we're working on (only used during parsing)
103
# Parses all the structures
104
sub parseStructures {
108
print "Parsing $doc...\n";
110
while($i<=$#document) {
111
if($document[$i] =~ m,\</H3\>,) {
112
if($document[$i-1] =~ m/\<H3\>/) { # Safe, as </H3> can't be in the first line
114
$struct=Structure->new(); # create a new structure element
115
$document[$i] =~ m,^(.*)\</H3\>,;
116
$struct->comment($1);
118
elsif($document[$i] =~ m/\<H3\>/) {
120
$struct=Structure->new(); # create a new structure element
121
$document[$i] =~ m,\<H3\>(.*)\</H3\>,;
122
$struct->comment($1);
125
if($document[$i-1] !~ m/Algorithm/) {
126
# huh? Shouldn't happen at all
127
print "####### ERROR #######\n";
128
print $document[$i-1], "\n", $document[$i], "\n";
130
$i++; # don't forget that one here :))
133
$struct->comment =~ m,.*\((.*)\),; # get the name of the structure
134
$tmp=$1; # store it in a $tmp var as I'm too clueless :)
135
$tmp =~ s/\s/_/; # replace the spaces with underscores
136
$struct->name($tmp); # ...and set it as name
137
#print "found: name: '", $struct->name, "' comment: '", $struct->comment, "'\n";
138
$struct->hidden(""); # initialize that with a sane value
140
#print "Checking for a <TABLE> ";
141
while($document[$i] !~ m,\<TABLE ,) {
146
# parse the <TABLE> we found
147
if(parseStructure()) {
148
if($doc eq "Word95") {
149
push(@structs95, $struct); # append the new structure
151
elsif($doc eq "Word97") {
152
push(@structs97, $struct);
155
print "Error: Word95 or Word97?\n";
159
print "####### ERROR #######\n";
160
print " name: '", $struct->name, "' comment: '", $struct->comment, "'\n";
168
# Parses one structure (<table>...</table>)
171
# eat the first row (headline)
172
while($document[$i] !~ m,^\<TR\>$,) {
175
while($document[$i] !~ m,^\</TR\>$,) {
179
# parse all the variables till we encounter </TABLE>
180
while($document[$i] !~ m,^\</TABLE\>$,) {
182
push(@{$struct->items}, $item);
186
print "####### ERROR #######\n";
187
print " Error while parsing an item!\n";
191
#print "count: ", $#{$struct->items}+1, "\n";
195
# Parses one row of the table (<tr> ... </tr>) to get one
196
# data item out of it. Does some trivial error checking
201
while($document[$i] !~ m,^\<TR\>$,) {
205
while($document[$i] !~ m,^\</TR\>$,) {
206
if($document[$i] =~ m,^\<TD\>(.*)\</TD\>$,) {
207
if($myState==0) { # this is used for debugging/sanity checking
209
#print " startNew: ", $1, "\n";
211
# yes, I left out $myState==1 on purpose
214
#print " name: ", $1, "\n";
218
#print " type: ", $1, "\n";
222
if($tmp =~ m/^:(.*)/) {
224
#print " bits: ", $1, "\n";
227
#print " no bits but a plain size attribute!\n";
230
# yes, I left out $myState==5 on purpose
233
#print " (short) comment: ", $1, "\n";
237
# The comment can expand across several lines
238
elsif($document[$i] =~ m,^\<TD\>(.*)$, && $myState==6) {
240
# Insert a <BR> for "newlines" (consistency)
241
if($document[$i+1] !~ m,\<BR\>,) {
245
while($document[$i] !~ m,(.*)\</TD\>$,) {
246
$tmp .= $document[$i];
247
# Insert a <BR> for "newlines" (consistency)
248
if($document[$i+1] !~ m,\<BR\>,) {
253
$document[$i] =~ m,(.*)\</TD\>$,;
255
$item->comment($tmp);
256
#print " (long) comment: ", $tmp, "\n";
259
elsif($document[$i] =~ m,\<\!--\s*initial=\"(.*?)\"\s*--\>,) {
260
#print "initial found: ", $document[$i], " filtered: ", $1, "\n";
263
elsif($document[$i] =~ m,\<\!--\s+compareSizeLHS=\"(.*?)\"\s+compareSizeRHS=\"(.*?)\"\s+--\>,) {
264
#print "compareSize found: ", $document[$i], " filtered: ", $1, ", ", $2, "\n";
265
$item->compareSizeLHS($1);
266
$item->compareSizeRHS($2);
268
elsif($document[$i] =~ m,\<\!--\s*convert=\"(.*?)\"\s*--\>,) {
269
#print "convert found: ", $document[$i], " filtered: ", $1, "\n";
272
elsif($document[$i] =~ m,^\</TABLE\>$,) {
273
print "Error: Found a table end where I didn't expect it!\n";
278
#print "$myState==7 ? ", $myState==7, "\n";
282
# Parse the template file
284
my($name) = @_; # name of the template
285
my($license, $includes, $before, $after, $myState);
287
open(TEMPLATE, "<$name") or die "Couldn't open the template: " . $!;
288
# initialize all the template vars
294
# read in the information...
296
if(m/^\#\#\#/) { # ignore comments
299
if(m/^\@\@license-start\@\@$/) { # license section
303
if(m/^\@\@license-end\@\@$/) { # end of license sect.
307
if(m/^\@\@includes-start\@\@$/) { # includes section
311
if(m/^\@\@includes-end\@\@$/) { # end of includes sect.
315
if(m/^\@\@namespace-start\@\@$/) { # namespace (before)
319
if(m/^\@\@generated-code\@\@$/) { # namespace (after)
323
if(m/^\@\@namespace-end\@\@$/) { # end of namespace
341
close(TEMPLATE) or die $!;
342
return ($license, $includes, $before, $after);
345
# Removes some structures we can't generate easily.
346
# Note: We write out the struct in the header and just
347
# comment it out (that you can copy it for a proper impl.).
348
sub cleanStructures {
349
my($index, @clean, $done);
351
print "Cleaning up...\n";
352
# Feel free to add your "favorites" here
353
# The goal, however, should be to have as much as possible
354
# generated, so try to fix the HTML ;)
355
@clean=("PAPXFKP", "CHPXFKP",
356
"PAPX", "CHPX", "FLD", "PLCF", "STD", "BRC", "PGD", "SEPX",
357
"FFN", "STSHI", "TBD");
361
while($index<=$#structs95 && $done==0) {
362
if($structs95[$index]->name eq $_) {
363
print "Removing: ", $structs95[$index]->name, "\n";
364
# Better not really remove, just comment it out by setting "hidden"
365
# That way you can copy the declaration for a real implementation
366
#splice @structs95,$index,1;
367
$structs95[$index]->hidden("//");
376
# Generates the conversion header. trivial code, as we just create declarations
377
# like Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct
379
my($license, $includes, $before, $after, $myState);
381
print "Generating the header file...\n";
382
open(HEADER, ">convert.h") or die "Couldn't open the header for writing: " . $!;
384
($license, $includes, $before, $after) = parseTemplate("template-conv.h");
387
print HEADER $license;
388
print HEADER "\n#ifndef CONVERT_H\n#define CONVERT_H\n\n";
390
print HEADER "#include <word95_generated.h>\n";
391
print HEADER "#include <word97_generated.h>\n";
392
print HEADER $includes;
393
print HEADER "\nnamespace wvWare {\n\n";
394
print HEADER "namespace Word95 {\n";
397
print HEADER $before . "\n";
398
# Fill the empty template
399
print HEADER generateDeclarations();
403
print HEADER "\n} // namespace Word95\n\n";
404
print HEADER "} // namespace wvWare\n\n";
405
print HEADER "#endif // CONVERT_H\n";
406
close(HEADER) or die $!;
410
# This method is used to actually generate the methods with the pattern
411
# Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct
412
sub generateDeclarations {
413
my($index, $string, $n, $tmp);
415
for($index=0; $index<=$#structs95; $index++) {
416
$n=$structs95[$index]->name;
417
if($structs95[$index]->hidden ne "//") {
418
for($tmp=0; $tmp<=$#structs97; $tmp++) {
419
if($n eq $structs97[$tmp]->name) {
420
$string .= "Word97::$n toWord97(const Word95::$n &s);\n";
429
# This is the tricky part. It first adds all the template stuff and calls the
430
# generator method to fill the void ;)
431
sub generateImplementation {
432
my($tmp, $license, $includes, $before, $after);
434
print "Generating the source file...\n";
435
open(SOURCE, ">convert.cpp") or die "Couldn't open the file for writing: " . $!;
437
($license, $includes, $before, $after) = parseTemplate("template-conv.cpp");
440
print SOURCE $license . "\n";
442
print SOURCE "#include <convert.h>\n";
443
print SOURCE $includes;
444
print SOURCE "\nnamespace wvWare {\n";
445
print SOURCE "\nnamespace Word95 {\n";
448
print SOURCE $before . "\n";
449
# Fill the empty template
450
print SOURCE generateFunctions();
454
print SOURCE "} // namespace Word95\n";
455
print SOURCE "\n} // namespace wvWare\n";
456
close(SOURCE) or die $!;
461
# Creates the empty template for every conversion function
462
sub generateFunctions {
463
my($index95, $index97, $string, $n, $h);
465
for($index95=0; $index95<=$#structs95; $index95++) {
466
$n=$structs95[$index95]->name;
467
$h=$structs95[$index95]->hidden;
468
for($index97=0; $index97<=$#structs97; $index97++) {
469
if($n eq $structs97[$index97]->name) {
471
$string .= "/* Please check...\n";
473
$string .= "Word97::$n toWord97(const Word95::$n &s) {\n\n";
474
$string .= " Word97::$n ret;\n\n";
475
$string .= generateConversion($index95, $index97);
476
$string .= "\n return ret;\n";
478
$string .= "} */\n\n";
490
# This method tries to match fields inside structures, using some basic heuristics
491
# and hints inside the .html files. Check the documentation at the top of that file
492
# for further information about the hints and how to use them
493
sub generateConversion {
494
my($index95, $index97)=@_;
495
my($i, $j, @items95, @items97, %result, $tmp1, $tmp2, $string);
497
print "Trying to match the fields for " . $structs95[$index95]->name . "\n";
498
if($structs95[$index95]->hidden eq "//") {
499
print " Note: Hidden structure, implementation will be commented out\n";
501
@items95=@{$structs95[$index95]->items};
502
@items97=@{$structs97[$index97]->items};
503
# First try to find all "direct" matches (type, name, position)
504
for($i=0; $i<=$#items95 && $i<=$#items97; $i++) {
505
if($items95[$i]->name eq $items97[$i]->name &&
506
$items95[$i]->type eq $items97[$i]->type &&
507
((defined($items95[$i]->bits) && defined($items97[$i]->bits) &&
508
$items95[$i]->bits eq $items97[$i]->bits) ||
509
(not(defined($items95[$i]->bits)) && not(defined($items97[$i]->bits))))) {
510
#print " Direct match for " . $items95[$i]->name . "\n";
511
$items95[$i]->matched(1);
512
$items97[$i]->matched(1);
513
$result{$items95[$i]->name}=$i;
516
# Then try to check if we find the same name/type at some other position
517
for($i=0; $i<=$#items95; $i++) {
518
if(not(defined($items95[$i]->matched))) {
519
for($j=0; $j<=$#items97; $j++) {
520
if(not(defined($items97[$j]->matched)) &&
521
$items95[$i]->name eq $items97[$j]->name &&
522
$items95[$i]->type eq $items97[$j]->type) {
523
#print " Indirect match for " . $items95[$i]->name . "\n";
524
$items95[$i]->matched(1);
525
$items97[$j]->matched(1);
526
$result{$items95[$i]->name}=$j;
531
# Did the "user" add some hints for us?
532
for($i=0; $i<=$#items95; $i++) {
533
if(defined($items95[$i]->convert)) {
534
if($items95[$i]->convert =~ m/^string\((.*)\)$/) {
535
#print " Hint: string($1)\n";
537
if($tmp1 =~ m/(.*):(\d+)/) {
538
#print " Additional length hint: " . $2 . "\n";
541
if($items95[$i]->type =~ m/(.*)\[(.*)\]/) {
542
#print " Old type: " . $items95[$i]->type . "\n";
543
$items95[$i]->type($1 . "[" . $tmp2 . "]");
544
#print " New type: " . $items95[$i]->type . "\n";
547
for($j=0; $j<=$#items97; $j++) {
548
if(not(defined($items97[$j]->matched)) &&
549
$tmp1 eq $items97[$j]->name) {
550
#print " Matched due to string hint: " . $items95[$i]->name . " -> " . $1 . "\n";
551
$items95[$i]->matched(1);
552
$items97[$j]->matched(1);
553
$result{$items95[$i]->name}=$j;
557
elsif($items95[$i]->convert =~ m/^type$/) {
558
#print " Hint: type\n";
559
for($j=0; $j<=$#items97; $j++) {
560
if(not(defined($items97[$j]->matched)) &&
561
$items95[$i]->name eq $items97[$j]->name) {
562
#print " Matched due to type hint: " . $items95[$i]->name . "\n";
563
$items95[$i]->matched(1);
564
$items97[$j]->matched(1);
565
$result{$items95[$i]->name}=$j;
569
elsif($items95[$i]->convert =~ m/^\((.*)\)$/) {
570
#print " Hint: ($1)\n";
571
for($j=0; $j<=$#items97; $j++) {
572
if(not(defined($items97[$j]->matched)) &&
573
$1 eq $items97[$j]->name) {
574
#print " Matched due to mapping hint: " . $items95[$i]->name . " -> " . $1 . "\n";
575
$items95[$i]->matched(1);
576
$items97[$j]->matched(1);
577
$result{$items95[$i]->name}=$j;
581
elsif($items95[$i]->convert =~ m/^unused$/) {
582
#print " Hint: unused\n";
583
$items95[$i]->matched(1);
584
$result{$items95[$i]->name}=-42; # unused
587
print " Hint: Didn't understand this hint.\n";
591
# What's still missing? (Information)
593
if(not(defined($_->matched))) {
594
print " -> No match for " . $_->name . "\n";
598
# Now that we have a complete map (hopefully ;) let's generate the code
601
$i=$result{$_->name};
602
if(not(defined($i)) || $i == -42) {
603
#print " Skipping item " . $_->name . "\n";
606
$string .= generateMapping($_, $items97[$i]);
611
# Create "one line" of the conversion function. Depending on the type
612
# this method has to generate a proper assignment operation.
613
sub generateMapping {
614
my($item95, $item97)=@_;
617
# is it a dyn. array we know the size of?
618
if(defined($item95->len) && $item95->len ne "") {
619
$item95->type =~ m/(.*)\[.*\]/;
620
$ret .= " ret." . $item97->name . "=new " . $1 . "[" . $item95->len . "];\n";
621
$ret .= " memcpy(rhs." . $item97->name . ", s." . $item95->name . ", sizeof($1)*(" . $item95->len . "));\n";
623
elsif($item95->type =~ m/(.*)\[(\d+)\]/) {
624
$ret .= " for(int i=0;i<($2);++i)\n";
626
$ret .= " ret." . $item97->name . "[i]=toWord97(s." . $item95->name . "[i]);\n";
629
$ret .= " ret." . $item97->name . "[i]=s." . $item95->name . "[i];\n";
632
elsif(knownType($item95->type)) {
633
$ret .= " ret." . $item97->name . "=toWord97(s." . $item95->name . ");\n";
636
# "plain" members, no problem here
637
$ret .= " ret." . $item97->name . "=s." . $item95->name . ";\n";
642
# Helper method to detect known Word95 structs
646
foreach (@structs95) {
647
if($_->name eq $name) {
654
# Read the whole .html file into an array, line by line
659
open(INPUT, "<$name") or die $!;
662
# Detection of start for Word9x
663
if(m,^Structure Definitions\</h[12]\>$,) {
666
# Detection of end for Word97
667
elsif(m,^Appendix A - Reading a Macintosh PICT Graphic\</h2\>$,) {
670
# Detection of end for Word95
671
elsif(m,^Appendix A - Changes from version 1\.x to 2\.0\</h1\>$,) {
677
# convert the important tags we use to uppercase on the fly
682
s,\<table ,\<TABLE ,;
683
s,\</table\>,\</TABLE\>,;
687
# get rid of that ugly thingies
693
close(INPUT) or die $!;
696
# Reads the HTML files and converts the "interesting" tags
697
# to uppercase. It also cuts of areas we're not interested in
698
# from the begin and the end of the file.
701
readDocument($ARGV[0]);
702
parseStructures("Word95");
704
readDocument($ARGV[1]);
705
parseStructures("Word97");
707
cleanStructures(); # get rid of stuff we don't want to use
709
generateHeader(); # generate the header file
710
generateImplementation(); # generate the source
713
# We start execution here
715
print "Script to generate C++ code to convert Word95 to Word97 structures";
716
print "\nfrom the HTML specs.\n";
717
print "Usage: perl converter.pl spec95.html spec97.html\n";