1
// Copyright (C) 2000 - 2002 Hewlett-Packard Company
3
// This program is free software; you can redistribute it and/or modify it
4
// under the term of the GNU Lesser General Public License as published by the
5
// Free Software Foundation; either version 2 of the License, or (at your
6
// option) any later version.
8
// This program is distributed in the hope that it will be useful, but WITHOUT
9
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
13
// You should have received a copy of the GNU Lesser General Public License
14
// along with this program; if not, write to the Free Software Foundation,
15
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
// @(#) $Revision: 4.22 $ $Source: /cvsroot/judy/tool/jhton.c,v $
20
// JUDY HTML MANUAL ENTRY TO NROFF TRANSLATOR.
22
// USAGE: <program> filename.htm[l]
23
// Writes nroff -man output to stdout.
24
// Suggestion: Pipe output through rmnl to delete extraneous newlines that
25
// this program cannot easily avoid.
27
// Compile with -DDEBUG for assertions and other checks.
28
// If so, run with DUMP set in the env to dump the docnodes tree.
30
// CONCEPT: This program was written out of necessity for the 11.11 OEUR
31
// release of Judy. Ideally our manual entries would be written in an abstract
32
// XML format with an XSLT-based means to translate them to any other format,
33
// such as HTML or nroff. In lieu of that, this program knows how to translate
34
// a limited subset of HTML, as used in our manual entries, to equivalent
35
// nroff, as described below preceding EmitNroffHeader().
37
// The translation is still complex enough to merit writing a parser that first
38
// builds a tree representation of the structured (HTML) document. I would use
39
// yacc and lex if I knew them...
41
// This program is written for simplicity, cleanliness, and robustness, and not
42
// necessarily for speed.
44
// FLEXIBILITY: It should be possible to teach this program new HTML tags; see
45
// data structures below. The program might also be useful for other HTML
46
// manual entries, so long as they follow the simple conventions used in the
47
// Judy entries; see the comments before EmitNroffHeader(). You can also
48
// discover the format by trial and error -- this program issues verbose error
53
// - Global variables start with "g_" except option_*.
54
// - Global constants start with "gc_".
56
// - Pointer variable names start with one "P" per level of indirection.
58
// - Exception: (char *) and (char[]) types, that is, strings, do not
59
// necessarily start with "P". A generic char pointer is named "Pch".
60
// Variables of type (char **) start with a single P.
62
// - Exception: the well known name "char ** argv".
64
// - Pointers to first elements of serial linked lists of structures have names
67
// - Line lengths are less than 80 columns. Message parameters to Error()
68
// begin on the same line for easy finding.
70
// - Error messages might exceed one line when emitted, but no effort is made
71
// to wrap them nicely.
75
#include <stdarg.h> // for varargs.
76
#include <string.h> // for str*().
78
#include <ctype.h> // for isspace(), etc.
82
#define ISSPACE(Char) isspace((int) (Char)) // for convenience with lint.
83
#define ISDIGIT(Char) isdigit((int) (Char))
84
#define ISUPPER(Char) isupper((int) (Char))
86
#define PUTS(String) (void) fputs(String, stdout) // for convenience.
87
#define PUTC(Char) (void) putc((int) (Char), stdout)
90
#define NDEBUG // turn off assertions by default.
93
// Shorthand notation to avoid #ifdefs for single-line conditional statement:
95
// Warning: This cannot be used around compiler directives, such as
96
// "#include", nor in the case where Code contains a comma other than nested
97
// within parentheses or quotes.
100
#define DBGCODE(Code) // null.
102
#define DBGCODE(Code) Code
106
// ****************************************************************************
107
// MISCELLANEOUS GLOBAL VALUES:
109
#define FUNCTION // null; easy to find functions.
112
#define CHNULL ('\0')
113
#define PCNULL ((char *) NULL)
114
typedef int bool_t; // for clarity with Boolean values.
116
char * gc_usage[] = {
117
"usage: %s filename.htm[l]",
119
"Reads restricted (Judy-specific) HTML from filename.htm[l] and emits",
120
"equivalent nroff -man to stdout.",
124
char * gc_myname; // how program was invoked.
127
#define NOEXIT 0 // values for Error().
132
// Prefix for printf formats:
134
#define FILELINE "File \"%s\", line %d: "
136
// Common error string:
138
char * FmtErrLineEnds = FILELINE "Input line ends within an HTML tag; for this "
139
"translator, all tags must be on a single input line";
141
// Macros for skipping whitespace or non-whitespace; in the latter case,
142
// stopping at end of line or end of tag:
144
#define SKIPSPACE(Pch) { while (ISSPACE(*(Pch))) ++(Pch); }
146
#define SKIPNONSPACE(Pch) { while ((! ISSPACE(*(Pch))) \
147
&& (*(Pch) != CHNULL) \
148
&& (*(Pch) != '>')) ++(Pch); }
150
// Highest line number + 1, and last input line number that caused output:
153
int g_prevlinenum = 0;
155
// <PRE> block equivalents in nroff need some special handling for bold font
156
// and for continuing a tagged paragraph; these are bit flags:
158
#define INPRE_BLOCK 0x1 // came from <PRE>.
159
#define INPRE_BOLD 0x2 // came from <B><PRE>.
160
#define INPRE_INDENT 0x4 // under <DL> below top level.
163
// ****************************************************************************
164
// DOCUMENT NODE TYPES:
166
// If an HTML tag is not in this list, it's unrecognized and causes a fatal
167
// error. Otherwise the tag type (dn_type) is one of DN_TYPE_*, which are
168
// defined so the code can use them, but they MUST match the order of
169
// initialization of g_dntype[].
171
// Note: The default node type is DN_TYPE_TEXT, that is, text outside of any
195
// Regarding dnt_nest: If an HTML tag type is marked as nesting, that means
196
// it is required not to be a singleton in this context; it must have a closing
197
// tag, and when the tree is built, the intervening text is nested as a child.
198
// Otherwise, intervening text is a sibling; a closing tag is allowed (whether
199
// or not this makes sense), but is not required; however, if present, it must
202
struct docnode_type {
203
char * dnt_tag; // HTML tag.
204
bool_t dnt_savetag; // flag: save HTML tag.
205
bool_t dnt_nest; // flag: see comments above.
206
int dnt_type; // corresponding number.
209
// Note: HTML is case-insensitive, but for expediency this program is
210
// case-sensitive. Tags must be as shown below.
212
{ "", FALSE, FALSE, DN_TYPE_TEXT, }, // special, see above.
214
{ "HTML", FALSE, TRUE, DN_TYPE_HTML, },
215
{ "HEAD", FALSE, TRUE, DN_TYPE_HEAD, },
216
{ "TITLE", FALSE, TRUE, DN_TYPE_TITLE, },
217
{ "BODY", FALSE, TRUE, DN_TYPE_BODY, },
219
{ "!--", TRUE, FALSE, DN_TYPE_COMM, }, // comments are singleton tags.
221
{ "TABLE", FALSE, TRUE, DN_TYPE_TABLE, }, // limited understanding!
222
{ "TR", FALSE, TRUE, DN_TYPE_TR, },
223
{ "TD", TRUE, TRUE, DN_TYPE_TD, },
225
{ "DL", FALSE, TRUE, DN_TYPE_DL, },
226
{ "DT", FALSE, TRUE, DN_TYPE_DT, },
227
{ "DD", FALSE, FALSE, DN_TYPE_DD, }, // </DD> not req in our manuals.
229
{ "A", TRUE, TRUE, DN_TYPE_A, }, // either "name" or "href" type.
230
{ "B", FALSE, TRUE, DN_TYPE_B, },
231
{ "I", FALSE, TRUE, DN_TYPE_I, },
233
{ "PRE", FALSE, TRUE, DN_TYPE_PRE, },
235
{ "P", FALSE, FALSE, DN_TYPE_P, }, // </P> not req in our manuals.
236
{ "BR", FALSE, FALSE, DN_TYPE_BR, }, // </BR> not req in our manuals.
238
{ PCNULL, FALSE, FALSE, 0, }, // end of list.
241
// Convenience macros:
243
#define TAG(DN_Type) (g_dntype[DN_Type].dnt_tag)
244
#define SAVETAG(DN_Type) (g_dntype[DN_Type].dnt_savetag)
245
#define NEST(DN_Type) (g_dntype[DN_Type].dnt_nest)
248
// ****************************************************************************
249
// DOCUMENT NODE DATA STRUCTURES:
251
// Document nodes are saved in a doubly-linked tree of docnodes. Each docnode
252
// points sideways to a doubly-linked list of sibling docnodes for
253
// previous/successive unnested document objects, plus points to its parent and
254
// to the first of a sideways doubly-linked child list of nested objects. All
255
// data lives in malloc'd memory.
257
// The dn_text field is null for a tag node unless the tag text is worth
258
// saving. The field is non-null for non-tag (document) text.
260
typedef struct docnode * Pdn_t;
263
int dn_type; // node type, index in g_dntype[].
264
int dn_linenum; // where introduced, for reconstructing.
265
bool_t dn_closed; // flag: closing tag was seen.
266
bool_t dn_noemit; // flag: skip on output, for marking ahead.
267
bool_t dn_bold; // flag: for <PRE>, whole section is bold.
269
char * dn_text; // node text; see above.
271
Pdn_t dn_Pprev; // previous node in sibling list.
272
Pdn_t dn_Pnext; // next node in sibling list.
273
Pdn_t dn_Pparent; // up-link to parent node, if any.
274
Pdn_t dn_Pchild; // down-link to first node in child subtree.
277
#define PDNNULL ((Pdn_t) NULL)
279
Pdn_t g_Pdnhead = PDNNULL; // head of docnode tree.
282
// ****************************************************************************
283
// FUNCTION SIGNATURES (forward declarations):
285
int main(int argc, char ** argv);
287
void ReadInputFile( char * Filename, FILE * PFile);
288
void CheckNesting(Pdn_t Pdn);
289
void EmitNroffHeader(char * Filename, char ** PPageName);
290
void EmitNroffBody(Pdn_t Pdn, int DLLevel, int InPRE, char * PageName);
292
void ExtractHeader( Pdn_t Pdn, char ** PFileRev,
293
char ** PPageName, char ** PPageSection,
294
char * PLcLetter, char ** PRevision);
295
char * ExtractText( Pdn_t Pdn);
296
void ExtractPageInfo(Pdn_t Pdn, char * Pch,
297
char ** PPageName, char ** PPageSection,
300
int TagType(char * Tag, bool_t * isclosing, char * Filename, int Linenum);
301
Pdn_t AppDocNode( Pdn_t Pdn, int linenum);
302
Pdn_t NewDocNode( Pdn_t dn_Pparent, int linenum);
303
char * SaveDocNode(Pdn_t Pdn, int DN_Type, char * Pch,
304
char * Filename, int Linenum);
305
bool_t ParentPre(Pdn_t Pdn, bool_t BoldOnly);
307
void MarkNoEmit( Pdn_t Pdn, bool_t Font);
308
void EmitText( char * Pch, int InPRE, int Linenum);
309
void EmitTextPRE( char * Pch, int InPRE);
310
void EmitTextBS( char * Pch);
311
bool_t NoWhiteSpace( char * Pch);
312
int CountNewlines(char * Pch);
314
char * StrSave( char * String);
315
char * StrSaveN( char * String, ...);
316
void * Malloc( size_t Size);
319
void Error(int Exitvalue, int MyErrno, char * Message, ...);
321
DBGCODE(void DumpTree(Pdn_t Pdn, int Depth, bool_t Separator);)
324
// ****************************************************************************
331
char * filename; // input file.
332
FILE * Pfile; // open input file.
333
char * pagename; // such as "Judy1".
336
if (argc != 2) Usage();
340
// Assert that each dnt_type matches its index in the table, since the code
346
for (dn_type = 0; TAG(dn_type) != PCNULL; ++dn_type)
347
assert(g_dntype[dn_type].dnt_type == dn_type);
352
// READ FROM LIST OF FILES OR STDIN; BUILD TREE:
354
if ((Pfile = fopen(filename, "r")) == (FILE *) NULL)
356
Error(ERREXIT, errno, "Cannot open file \"%s\" to read it",
360
ReadInputFile(filename, Pfile);
363
Error(ERREXIT, errno, "Cannot read from file \"%s\"", filename);
365
DBGCODE(DumpTree(g_Pdnhead, /* Depth = */ 0, /* Separator = */ TRUE);)
367
if (g_Pdnhead == PDNNULL)
369
Error(ERREXIT, NOERRNO, "No HTML tags found in file \"%s\"",
373
CheckNesting(g_Pdnhead);
376
// EMIT NROFF VERSION OF TEXT:
378
EmitNroffHeader(filename, &pagename);
379
EmitNroffBody(g_Pdnhead, /* DLLevel = */ 0, /* InPRE = */ 0, pagename);
380
PUTC('\n'); // ensure last line is terminated.
387
// ****************************************************************************
388
// R E A D I N P U T F I L E
390
// Given a filename and stream pointer for reading, read and parse
391
// Judy-specific HTML and build a structure representing the document, under
392
// g_Pdnhead. Set g_linenumlim.
394
// Note: Ideally this would be a shorter function with helper subroutines, but
395
// I wrote this fast. :-)
397
// Note: This used be called just ReadFile(), but on win_ipf, at least the
398
// cross-compile environment, this resulted in a duplicate symbol error, as if
399
// ReadFile() is in a library somewhere.
401
FUNCTION void ReadInputFile(
405
int linenum = 0; // input line number.
406
char line[BUFSIZ]; // read from file.
407
char * Pch; // place in line.
408
char * Pch2; // place in line.
409
char * Pchp; // prior to skipped whitespace.
410
char * tagname; // for error reporting.
411
char chold; // old char.
412
int dn_type; // docnode type.
413
bool_t isclosing; // is a closing tag.
414
Pdn_t Pdn = PDNNULL; // current docnode.
415
Pdn_t Pdnprev; // previous in sibling list.
418
// READ LINE, TRIM AT ANY NEWLINE, AND SKIP LEADING WHITESPACE:
420
while (fgets(line, BUFSIZ, PFile) != PCNULL)
422
line[strcspn(line, "\n")] = CHNULL;
426
SKIPSPACE(Pch); // skip any leading whitespace.
428
if (! ParentPre(Pdn, /* BoldOnly = */ FALSE))
429
Pchp = Pch; // skip for storing, too.
432
// HANDLE EMPTY LINE:
434
// If within an "already open" DN_TYPE_TEXT, append a newline to the existing
435
// text in case it turns out to be significant later (mainly in a <PRE>
440
if ((Pdn->dn_type) == DN_TYPE_TEXT)
442
assert((Pdn->dn_text) != PCNULL);
443
(Pdn->dn_text) = StrSaveN(Pdn->dn_text, "\n", PCNULL);
451
// Look for a "<" that starts an HTML tag.
453
while (*Pch != CHNULL) // more on line.
455
Pch2 = strchr(Pch, '<'); // look for next tag start.
458
// SAVE DOCUMENT TEXT:
460
// Save any text preceding a tag (or through end of line) in a document text
461
// (DN_TYPE_TEXT) docnode; either the existing node if there is one, else a new
462
// node as a sibling or child of the previous, as appropriate.
464
// This builds one docnode for any inter-tag text, whether it's a portion of
465
// one line, or many lines long. In some ways that's simplest and most
466
// efficient, and in other ways it's weird.
468
// Note: Use Pchp here, not Pch, so as not to ignore leading whitespace on
471
if (! ((Pch2 == PCNULL) ? strlen(Pchp) : Pch2 - Pchp))
473
assert((Pch == Pch2) || (Pch2 == PCNULL));
477
if (g_Pdnhead == PDNNULL) // no current tree.
479
Error(ERREXIT, NOERRNO, FILELINE "For this "
480
"translator, the HTML file must start with an "
481
"HTML tag in \"<>\", and no other text",
485
// Current docnode is not DN_TYPE_TEXT, so append a new docnode child or
486
// sibling, as appropriate, to the tree:
488
assert(Pdn != PDNNULL); // should already be assigned.
490
if ((Pdn->dn_type) != DN_TYPE_TEXT)
492
Pdn = AppDocNode(Pdn, linenum);
493
(Pdn->dn_type) = DN_TYPE_TEXT;
496
// Save current text in current docnode, appending to any existing text:
498
// Note: To avoid messing up alignments in <PRE> text, use Pchp, not Pch,
499
// which can skip whitespace.
501
if (Pch2 != PCNULL) *Pch2 = CHNULL; // terminate briefly.
503
if ((Pdn->dn_text) == PCNULL) // no existing text.
504
(Pdn->dn_text) = StrSave(Pchp);
506
(Pdn->dn_text) = StrSaveN(Pdn->dn_text, Pchp, PCNULL);
508
assert((Pdn->dn_text) != PCNULL);
510
// If there's still a tag on the line, unterminate Pch2; otherwise append a
511
// newline to the saved text in case more document text follows:
514
*Pch2 = '<'; // unterminate.
516
(Pdn->dn_text) = StrSaveN(Pdn->dn_text, "\n", PCNULL);
518
} // if text preceding tag.
521
// ANALYZE HTML TAG (if any):
523
if (Pch2 == PCNULL) break; // no tag on line; line is done.
525
Pch = Pch2 + 1; // skip "<".
526
SKIPSPACE(Pch); // skip any whitespace.
529
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, linenum);
532
SKIPNONSPACE(Pch2); // find whitespace or end of line or tag.
535
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, linenum);
538
*Pch2 = CHNULL; // temporarily terminate.
539
dn_type = TagType(Pch, &isclosing, Filename, linenum);
543
// HANDLE CLOSING TAG:
545
// First ensure the tag checks out OK.
551
Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
552
"\"%s\" must be followed immediately by \">\"; "
553
"this translator does not even allow whitespace",
554
Filename, linenum, Pch);
557
if (g_Pdnhead == PDNNULL)
559
Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
560
"\"%s\" found before any opening tag in the file",
561
Filename, linenum, Pch);
564
tagname = Pch; // for error reporting.
565
Pch = Pchp = Pch2 + 1; // skip ">" in line.
567
// Check if the closing tag is an optional closing for the last unclosed,
568
// non-DN_TYPE_TEXT docnode in the current sibling list, if any:
572
Pdnprev = Pdnprev->dn_Pprev)
574
if (((Pdnprev->dn_type) == DN_TYPE_TEXT)
575
|| (Pdnprev->dn_closed))
577
continue; // skip text or closed tag.
580
if ((Pdnprev->dn_type) == dn_type)
582
(Pdnprev->dn_closed) = TRUE; // optional closing.
587
if (Pdnprev != PDNNULL) continue; // matched closing.
589
// Otherwise check that the closing tag is the (required) closing tag for the
590
// (required) parent node (which must not have been closed yet):
592
if ((Pdn->dn_Pparent) == PDNNULL)
594
Error(ERREXIT, NOERRNO, FILELINE "Closing HTML tag "
595
"\"%s\" does not match an opening tag",
596
Filename, linenum, tagname);
599
assert(! (Pdn->dn_Pparent->dn_closed));
601
if ((Pdn->dn_Pparent->dn_type) != dn_type)
603
Error(ERREXIT, NOERRNO, FILELINE "Parent HTML tag "
604
"\"%s\" found on line %d requires a closing tag, "
605
"but \"%s\" does not match it; check for out-of-"
606
"order HTML tags", Filename, linenum,
607
TAG(Pdn->dn_Pparent->dn_type),
608
Pdn->dn_Pparent->dn_linenum, tagname);
611
// Go uplevel in the tree to the parent node:
613
Pdn = Pdn->dn_Pparent;
614
(Pdn->dn_closed) = TRUE;
620
// NEW HTML TAG: ADD SIBLING OR CHILD NODE TO TREE:
622
// Save appropriate information about the tag and move beyond its closing point
623
// in the input line.
625
Pdn = AppDocNode(Pdn, linenum);
627
assert( Pch != PCNULL);
628
assert(*Pch != CHNULL);
629
Pch = Pchp = SaveDocNode(Pdn, dn_type, Pch, Filename, linenum);
631
} // while more on input line.
632
} // while more in input file.
634
g_linenumlim = linenum + 1;
639
// ****************************************************************************
640
// C H E C K N E S T I N G
642
// Given a docnode, recursively check that all nested HTML tags were closed.
643
// If not, error out.
645
FUNCTION void CheckNesting(
646
Pdn_t Pdn) // current docnode.
648
if (NEST(Pdn->dn_type) && (! (Pdn->dn_closed)))
650
Error(ERREXIT, NOERRNO, "No closing tag found for HTML tag \"%s\" "
651
"from input line %d", TAG(Pdn->dn_type), Pdn->dn_linenum);
654
if ((Pdn->dn_Pchild) != PDNNULL) CheckNesting(Pdn->dn_Pchild);
655
if ((Pdn->dn_Pnext) != PDNNULL) CheckNesting(Pdn->dn_Pnext);
660
#ifdef notdef // --------------------------------------------------------------
662
SUMMARY OF HTML USED IN JUDY MANUAL ENTRIES AND HOW TO TRANSLATE IT
665
The details are based on studying strchr(3C) as an example. If you
666
follow the requirements listed here when writing an HTML manual
667
entry, this translator should work for you, and if not, you should
668
get one of ~37 pretty good error messages.
670
Note: Backslashes are used in some literals here to hide them from
673
<HTML> </HTML> no output
674
<HEAD> </HEAD> no output
675
<TITLE> </TITLE> no output
676
<BODY> </BODY> no output
678
<!-- no output except from: @\(#) $\Revision: ...
679
that is, first comment line must contain "@\(#)"
681
<TABLE border=0 width="100%">
683
<TD width="5%" align="left">Judy(3X)</TD>
684
<TD width="90%" align="center">HP-UX Release 11i: June 2001</TD>
685
<TD width="5%" align="right">Judy(3X)</TD>
689
Pull the .TH page title from the first TD (consisting of
690
PageName and PageSection), and the ]W value from the second
691
TD, for an nroff file header like this:
693
.\" Auto-translated from <file> by <prog> at <date>
694
.\" <revision string above>
697
.ds )H Hewlett-Packard Company
698
.ds ]W HP-UX Release...
701
<DT>text</DT> top level: .SH text (ignore <B>...</B> if any)
702
otherwise: .TP 15\n.C text
703
<DD>text</DD> text (continues previous .TP)
706
Note: Anchors within <B><PRE> are not font-marked because this
707
messes up nroff, which doesn't nest fonts well.
709
Note: Font C (computer) is preferred to bold for other markings,
710
but it only seems to work for ".C", not ".ft C" nor "\fC", and ".C"
711
is hard to generate reliably in this translator, so use bold
714
<A name...>text</A> no output
716
<A href...>text</A> if within <B><PRE> no output, see above
717
else if text is <PageName>.*: \fBfunc()\fP
719
or as appropriate: \fItext\fP
721
Note: <B><PRE> is used in the HTML because <B> inside of <PRE>
722
seems to cause ugly extra blank lines, at least on Netscape.
724
<B>text</B> if <DT><B>: ignore <B>...</B>
725
if <B><PRE>: .ft B ... .ft P
726
otherwise: \fBtext\fP
728
<I>text</I> \fItext\fP
730
Note: Ideally most <PRE> text that is not bold should be in ".C"
731
font, but this requires ensuring there is no " on the line, and
732
would simply make the line bold anyway for nroff; it would only be a
733
real improvement for troff (which we do not expect will be used with
734
these manual entries).
736
<PRE>text</PRE> .IP or .PP, see <P> below
739
text, with .IP or .PP for blank lines
743
<P> if parent is <DL> below top level:
753
(for proper nroff handling, assuming HTML does not
754
treat backslashes specially itself, they are literal)
756
#endif // notdef --------------------------------------------------------------
759
// ****************************************************************************
760
// E M I T N R O F F H E A D E R
762
// Given the input filename, a pointer to a page name string to return, and the
763
// docnode tree under g_Pdnhead, extract header info and emit nroff header
766
FUNCTION void EmitNroffHeader(
767
char * Filename, // input file.
768
char ** PPageName) // such as "Judy1", to return.
770
char * filerev = PCNULL; // from first comment in input.
771
char * pagesection; // such as "3X".
772
char lcletter; // manual tab section, such as "j".
773
char * revision; // from centered table datum.
774
time_t currtime; // for ctime().
776
// Extract "weird" header values:
778
// These must be found in the docnodes tree and prepared for emitting nroff.
780
ExtractHeader(g_Pdnhead, &filerev,
781
PPageName, &pagesection, &lcletter, &revision);
783
if (filerev == PCNULL)
785
Error(ERREXIT, NOERRNO, "HTML file lacks comment lines; it must "
786
"contain at least one comment line, and the first one must "
787
"contain revision information");
790
// Emit file header; note, ctime() output already contains a newline:
792
(void) time(&currtime);
793
(void) printf(".\\\" Auto-translated to nroff -man from %s by %s at %s",
794
Filename, gc_myname, ctime(&currtime));
796
(void) printf(".\\\" %s\n", filerev);
797
(void) printf(".TA %c\n", lcletter);
798
(void) printf(".TH %s %s\n", *PPageName, pagesection);
799
(void) puts( ".ds )H Hewlett-Packard Company");
800
(void) printf(".ds ]W %s\n", revision);
802
} // EmitNroffHeader()
805
// ****************************************************************************
806
// E M I T N R O F F B O D Y
808
// Given a current node in the docnodes tree, the current <DL> level, a flag
809
// whether below a <PRE> node, the manual entry page name, and in
810
// g_prevlinenum, the previous input line number that resulted in output,
811
// recursively emit nroff body text. Translate the HTML docnodes as described
812
// in the comments prior to EmitNroffHeader(), and also translate certain HTML
813
// escaped chars back to literal form. Hope the results are legal nroff
814
// without spurious unintended nroff commands embedded.
816
// Note: This function recurses two ways; first, to the child subtree, and
817
// second, to the next sibling at the current level.
819
FUNCTION void EmitNroffBody(
820
Pdn_t Pdn, // current top of subtree.
821
int DLLevel, // <DL> level, top = 0.
822
int InPRE, // bit flags for <PRE> handling.
823
char * PageName) // such as "Judy1".
825
int DLcount = 0; // set to 1 if hit <DL> here.
826
char * suffix = PCNULL; // to print after children, before siblings.
828
// When about to emit text, if the previous output came from a lower input line
829
// number, start with a newline; otherwise do not, and let the text
832
// Use CHECKPREV except when the text to be emitted is forced to a new line.
834
#ifdef CPPRINT // for special debugging:
835
#define CHECKPREVPRINT printf("\ncp %d %d\n", g_prevlinenum, Pdn->dn_linenum)
837
#define CHECKPREVPRINT // null
842
{ if (g_prevlinenum && (g_prevlinenum < (Pdn->dn_linenum))) PUTC('\n');}
844
// To support CHECKPREV, call SETPREV() after emitting text that might need a
845
// line break to a new line, or SETPREVNONL to ensure NO newline, that is, the
846
// next text concatenates on the same line:
848
// Note: For a correct line number, SETPREV() must account for any newlines in
849
// the text just emitted.
851
#define SETPREV(Text) g_prevlinenum = (Pdn->dn_linenum) + CountNewlines(Text)
852
#define SETPREVNONL g_prevlinenum = g_linenumlim // no newline.
854
// Check if under a lower-level <DL>, for continuing an indented paragraph:
856
#define UNDER_DL ((DLLevel > 1) \
857
&& ((Pdn->dn_Pparent) != PDNNULL) \
858
&& ((Pdn->dn_Pparent->dn_type) == DN_TYPE_DL))
861
// SWITCH ON DOCNODE TYPE:
863
if (Pdn->dn_noemit) // upstream node said to skip this one.
866
switch (Pdn->dn_type)
872
// Just emit it with HTML escaped chars modified, with backslashes doubled,
873
// with no trailing newline, and if not within <PRE> text, with any leading
874
// whitespace deleted, so that, for example, something like "\fI text\fP" does
879
assert((Pdn->dn_text) != PCNULL);
881
EmitText(Pdn->dn_text, InPRE, Pdn->dn_linenum);
882
SETPREV(Pdn->dn_text);
886
// IGNORE THESE TYPES:
888
// See EmitNroffHeader() for nroff equivalents already emitted in some cases.
889
// In some cases, mark all child nodes no-emit to ignore them.
894
case DN_TYPE_COMM: break;
901
MarkNoEmit(Pdn->dn_Pchild, /* Font = */ FALSE);
907
// At the top level these represent manual entry sections, and any bold markers
908
// around the text are ignored. Below the top level these translate to tagged
909
// paragraphs. Here, just note the increment and continue the walk.
917
// DESCRIPTIVE LIST TAG:
921
assert(NEST(DN_TYPE_DT)); // tag text must be child.
923
if ((Pdn->dn_Pchild) == PDNNULL) // no child exists.
925
Error(ERREXIT, NOERRNO, "HTML tag \"%s\" found at input line "
926
"%d lacks text, which is required by this translator",
927
TAG(DN_TYPE_DT), Pdn->dn_linenum);
930
// Further handling depends on DLLevel as explained above:
932
if (DLLevel <= 1) // major manual section.
936
if ((Pdn->dn_Pchild->dn_type) == DN_TYPE_B)
937
(Pdn->dn_Pchild->dn_noemit) = TRUE; // skip <B>...</B>.
940
// If a <DT> immediately follows a previous <DT>, use .PD 0 for the successive
941
// .TP to join lines:
945
if (((Pdn->dn_Pprev) != PDNNULL)
946
&& ((Pdn->dn_Pprev->dn_type) == DN_TYPE_DT))
952
PUTS("\n.TP 15\n.C ");
959
// DESCRIPTIVE LIST DATUM:
961
// Just proceed to dump the embedded text.
963
case DN_TYPE_DD: break;
968
// Ignore inbound ("name") anchors and process outbound ("href") anchor labels
969
// into appropriately highlighted text.
973
size_t len; // of substring.
974
Pdn_t Pdn2; // child node.
975
char * Pch; // place in text.
977
assert((Pdn->dn_text) != PCNULL);
979
if (strstr(Pdn->dn_text, "name=") != PCNULL) break;
981
if (strstr(Pdn->dn_text, "href=") == PCNULL)
983
Error(NOEXIT, NOERRNO, "Unrecognized HTML anchor type \"%s\" "
984
"at input line %d ignored; only \"name=\" and \"href=\" "
985
"are allowed by this translator",
986
Pdn->dn_text, Pdn->dn_linenum);
990
// Check for nested text (anchor label):
992
// TBD: The error message lies a little. If the text is something like,
993
// "foo<B>bar</B>", it passes this test; and later, all font tags in the anchor
994
// label are marked no-emit; and any other embedded tags, who knows what
997
if (((Pdn2 = Pdn->dn_Pchild)->dn_type) != DN_TYPE_TEXT)
999
Error(ERREXIT, NOERRNO, "HTML \"href\" anchor at input line "
1000
"%d lacks a directly nested anchor label, with no "
1001
"further nested tags; this translator cannot support "
1002
"nested tags in anchor labels", Pdn->dn_linenum);
1004
assert((Pdn2->dn_text) != PCNULL);
1006
// If the anchor is within a <B><PRE>, do nothing special with fonts, as
1007
// explained earlier:
1009
if (ParentPre(Pdn, /* BoldOnly = */ TRUE)) break;
1011
// Since anchor label text font will be forced in a moment, ignore any nested
1012
// font directives so they don't mess up nroff:
1014
MarkNoEmit(Pdn->dn_Pchild, /* Font = */ TRUE);
1016
// See if anchor label appears to be a reference to the current page, to some
1017
// other page, or else just make it italicized text:
1019
// TBD: This is pretty shaky, hope it's close enough.
1021
len = strlen(PageName);
1023
if (strncmp(Pdn2->dn_text, PageName, len) == 0) // self-reference.
1026
PUTS("\\fB"); // bold font.
1028
suffix = "\\fP"; // revert to previous font.
1032
// Contains '(' and no whitespace => appears to reference some other page:
1034
// Emit revised, tagged anchor label text immediately.
1036
if (((Pch = strchr(Pdn2->dn_text, '(')) != PCNULL)
1037
&& NoWhiteSpace(Pdn2->dn_text))
1040
PUTS("\\fI"); // italic font.
1041
*Pch = CHNULL; // terminate briefly.
1042
PUTS(Pdn2->dn_text);
1044
PUTS("\\fP"); // revert to previous font.
1046
SETPREV(Pdn2->dn_text);
1048
(Pdn2->dn_noemit) = TRUE; // skip later.
1052
// Just make the anchor label italicized text:
1055
PUTS("\\fI"); // italic font.
1057
suffix = "\\fP"; // revert to previous font.
1065
// If the first child is <PRE>, use a "hard" font change; otherwise an in-line
1068
// Note: For <DT><B>, this node is already marked dn_noemit and not seen here.
1070
// Note: For <B><PRE>, nroff seems to reset font upon .PP, so mark the bold
1071
// for later emission.
1075
if (((Pdn->dn_Pchild) != PDNNULL)
1076
&& ((Pdn->dn_Pchild->dn_type) == DN_TYPE_PRE))
1078
(Pdn->dn_Pchild->dn_bold) = TRUE; // see above.
1083
PUTS("\\fB"); // bold font.
1085
suffix = "\\fP"; // revert to previous font.
1094
PUTS("\\fI"); // italic font.
1096
suffix = "\\fP"; // revert to previous font.
1100
// PREFORMATTED TEXT:
1102
// Emit prefix/suffix directives based on example in strchr(3C).
1106
PUTS(UNDER_DL ? "\n.IP\n.nf\n.ps +1\n" : "\n.PP\n.nf\n.ps +1\n");
1107
if (Pdn->dn_bold) PUTS(".ft B\n"); // deferred bold.
1109
suffix = ((Pdn->dn_bold) ? "\n.ft P\n.ps\n.fi\n" : "\n.ps\n.fi\n");
1111
// set for all children:
1113
| ((Pdn->dn_bold) ? INPRE_BOLD : 0)
1114
| (UNDER_DL ? INPRE_INDENT : 0);
1120
// If the parent is a <DL> below the top level, use .IP to continue a .TP
1121
// (tagged paragraph); otherwise emit a standard .PP.
1125
PUTS(UNDER_DL ? "\n.IP\n" : "\n.PP\n");
1132
case DN_TYPE_BR: PUTS("\n.br\n"); SETPREVNONL; break;
1135
// UNRECOGNIZED DOCNODE TYPE:
1138
Error(ERREXIT, NOERRNO, "Internal error: Unexpected docnode type "
1139
"%d in docnodes tree", Pdn->dn_type);
1141
} // end switch on dn_type
1144
// VISIT CHILD AND SIBLING DOCNODES:
1146
// If this was a <DL> here, pass an incremented value to child nodes, but not
1147
// to sibling nodes.
1150
if ((Pdn->dn_Pchild) != PDNNULL)
1151
EmitNroffBody(Pdn->dn_Pchild, DLLevel + DLcount, InPRE, PageName);
1153
if (suffix != PCNULL) PUTS(suffix);
1155
if ((Pdn->dn_Pnext) != PDNNULL)
1156
EmitNroffBody(Pdn->dn_Pnext, DLLevel, InPRE, PageName);
1158
} // EmitNroffBody()
1161
// ****************************************************************************
1162
// E X T R A C T H E A D E R
1164
// Given a current docnode and pointers to values to return, walk the entire
1165
// docnode tree once, recursively, in-order (parent then child then sibling) to
1166
// extract nroff header information. Find the first comment line, insist it
1167
// contain "@\(#)", and put this in *PFileRev. Also find exactly one
1168
// DN_TYPE_TABLE, containing exactly one DN_TYPE_TR, containing one DN_TYPE_TD
1169
// containing "align=\"left\"" and one DN_TYPE_TD containing
1170
// "align=\"center\"", and extract from these the nroff .TH pagename,
1171
// pagesection, and lcletter, and nroff ]W variable revision string,
1172
// respectively. Error out if anything goes wrong.
1174
// Note: Some of the returned strings are in separate malloc'd memory and
1175
// others are not; treat them as read-only.
1177
FUNCTION void ExtractHeader(
1178
Pdn_t Pdn, // current docnode.
1179
char ** PFileRev, // from first comment in input.
1180
char ** PPageName, // such as "Judy1".
1181
char ** PPageSection, // such as "3X".
1182
char * PLcLetter, // manual tab section, such as "j".
1183
char ** PRevision) // from centered table datum.
1185
static bool_t found_filerev = FALSE;
1186
static bool_t found_table = FALSE;
1187
static bool_t found_tr = FALSE;
1188
static bool_t found_tdleft = FALSE;
1189
static bool_t found_tdcenter = FALSE;
1191
char * text; // from text node.
1193
// Note: The following are used for both 0 and >= 2 instances, so they don't
1194
// include a line number because there is none for the 0 case:
1196
#define ERR_TABLE "This translator expects exactly one HTML table " \
1197
"(\"TABLE\" tag) in the input file"
1198
#define ERR_TR "This translator expects exactly one HTML table row " \
1199
"(\"TR\" tag) in the input file"
1200
#define ERR_TDLEFT "This translator expects exactly one HTML table row " \
1201
"datum (\"TD\" tag) in the input containing " \
1203
#define ERR_TDCENTER "This translator expects exactly one HTML table row " \
1204
"datum (\"TD\" tag) in the input containing " \
1205
"'align=\"center\"'"
1208
// CHECK CURRENT DOCNODE TYPE:
1210
switch (Pdn->dn_type)
1214
if (found_filerev) break; // already done.
1215
found_filerev = TRUE;
1217
// Hide the whatstring markers here from what(1) itself:
1219
if (strstr(Pdn->dn_text, "@" "(#)") == PCNULL)
1221
Error(ERREXIT, NOERRNO, "First HTML comment line in input, "
1222
"found at line %d, must contain a whatstring, marked by "
1223
"\"@" "(#)\"", Pdn->dn_linenum);
1226
*PFileRev = Pdn->dn_text;
1231
if (found_table) Error(ERREXIT, NOERRNO, ERR_TABLE);
1234
if (((Pdn->dn_Pchild) == PDNNULL)
1235
|| ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TR))
1237
Error(ERREXIT, NOERRNO, "The HTML \"%s\" tag at line %d must "
1238
"be followed by a \"%s\" tag, but it is not",
1239
TAG(DN_TYPE_TABLE), Pdn->dn_linenum, TAG(DN_TYPE_TR));
1245
if (found_tr) Error(ERREXIT, NOERRNO, ERR_TR);
1248
if (((Pdn->dn_Pchild) == PDNNULL)
1249
|| ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TD))
1251
Error(ERREXIT, NOERRNO, "The HTML \"%s\" tag at line %d must "
1252
"be followed by a \"%s\" tag, but it is not",
1253
TAG(DN_TYPE_TR), Pdn->dn_linenum, TAG(DN_TYPE_TD));
1259
if (strstr(Pdn->dn_text, "align=\"left\"") != PCNULL)
1261
if (found_tdleft) Error(ERREXIT, NOERRNO, ERR_TDLEFT);
1262
found_tdleft = TRUE;
1264
text = StrSave(ExtractText(Pdn));
1265
ExtractPageInfo(Pdn, text, PPageName, PPageSection, PLcLetter);
1267
else if (strstr(Pdn->dn_text, "align=\"center\"") != PCNULL)
1269
if (found_tdcenter) Error(ERREXIT, NOERRNO, ERR_TDCENTER);
1270
found_tdcenter = TRUE;
1272
*PRevision = ExtractText(Pdn);
1274
// else ignore line.
1276
} // switch on dn_type
1279
// VISIT CHILD AND SIBLING DOCNODES:
1281
// Note: Do this even though it seems redundant, to ensure no duplicates.
1283
if ((Pdn->dn_Pchild) != PDNNULL)
1285
ExtractHeader(Pdn->dn_Pchild, PFileRev,
1286
PPageName, PPageSection, PLcLetter, PRevision);
1289
if ((Pdn->dn_Pnext) != PDNNULL)
1291
ExtractHeader(Pdn->dn_Pnext, PFileRev,
1292
PPageName, PPageSection, PLcLetter, PRevision);
1296
// AT TOP OF TREE, CHECK FOR SUCCESS:
1298
// Note: If you read the fine print, it's clear the ERR_TR here is impossible
1301
if (Pdn != g_Pdnhead) return;
1303
if (! found_table) Error(ERREXIT, NOERRNO, ERR_TABLE);
1304
if (! found_tr) Error(ERREXIT, NOERRNO, ERR_TR);
1305
if (! found_tdleft) Error(ERREXIT, NOERRNO, ERR_TDLEFT);
1306
if (! found_tdcenter) Error(ERREXIT, NOERRNO, ERR_TDCENTER);
1308
} // ExtractHeader()
1311
// ****************************************************************************
1312
// E X T R A C T T E X T
1314
// Given a non-null docnode, return the non-null dn_text field from its first
1315
// child (directly, not a copy). Error out if anything goes wrong.
1317
FUNCTION char * ExtractText(
1318
Pdn_t Pdn) // parent node.
1320
assert(Pdn != PDNNULL);
1322
#define ERR_NULLTEXT "Node for HTML tag \"%s\", found at line %d, lacks a " \
1323
"child \"text\" node containing a non-null text string " \
1326
// TBD: This does not report a case of a text string containing only
1327
// whitespace, but some callers do that themselves:
1329
if (((Pdn->dn_Pchild) == PDNNULL)
1330
|| ((Pdn->dn_Pchild->dn_type) != DN_TYPE_TEXT)
1331
|| ((Pdn->dn_Pchild->dn_text) == PCNULL)
1332
|| ((Pdn->dn_Pchild->dn_text[0]) == CHNULL))
1334
Error(ERREXIT, NOERRNO, ERR_NULLTEXT,
1335
TAG(Pdn->dn_type), Pdn->dn_linenum);
1338
return(Pdn->dn_Pchild->dn_text);
1343
// ****************************************************************************
1344
// E X T R A C T P A G E I N F O
1346
// Given a docnode, a non-null, modifiable string alleged to contain an nroff
1347
// -man header, such as "Judy(3X)", and pointers to return values, break out
1348
// and return the pieces. Error out if anything goes wrong.
1350
// Note: Returned strings are in separate malloc'd memory.
1352
FUNCTION void ExtractPageInfo(
1353
Pdn_t Pdn, // for error reporting.
1354
char * Pch, // string to decipher.
1355
char ** PPageName, // such as "Judy1".
1356
char ** PPageSection, // such as "3X".
1357
char * PLcLetter) // manual tab section, such as "j".
1359
char * Pch2; // second place in string.
1360
char * Pch3 = PCNULL; // third place in string; init for gcc -Wall.
1362
// Find start of string:
1364
assert(Pch != PCNULL);
1367
if (*Pch == CHNULL) // nothing but whitepace.
1369
Error(ERREXIT, NOERRNO, ERR_NULLTEXT,
1370
TAG(Pdn->dn_type), Pdn->dn_linenum);
1375
if ((Pch2 = strchr(Pch, '(')) == PCNULL)
1377
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
1378
"line %d, has a child \"text\" node whose text lacks a '('",
1379
TAG(Pdn->dn_type), Pdn->dn_linenum);
1384
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
1385
"line %d, has a child \"text\" node whose text starts with "
1386
"'(' and lacks a leading pagename",
1387
TAG(Pdn->dn_type), Pdn->dn_linenum);
1390
// Validate the "()" suffix, such as "(1)" or "(3X)":
1392
if ((! ISDIGIT(Pch2[1])) // not "(<digit>".
1393
|| ((Pch3 = strchr(Pch2, ')')) == PCNULL) // no ")".
1394
|| (Pch2 + 3 < Pch3) // too far away.
1395
|| ((Pch2 + 3 == Pch3) // <digit><suffix>.
1396
&& (! ISUPPER(Pch2[2])))) // not <A-Z>.
1398
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
1399
"line %d, has a child \"text\" node whose text lacks a "
1400
"standard UNIX manual entry suffix in the form "
1401
"\"(<digit>[<A-Z>])\", such as \"(1)\" or \"(3X)\"",
1402
TAG(Pdn->dn_type), Pdn->dn_linenum);
1407
*Pch2 = *Pch3 = CHNULL; // terminate at '(' and ')'.
1408
*PPageName = StrSave(Pch);
1409
*PPageSection = StrSave(Pch2 + 1);
1411
// Look for *PLcLetter:
1413
if (! ISUPPER(**PPageName))
1415
Error(ERREXIT, NOERRNO, "Node for HTML tag \"%s\", found at "
1416
"line %d, has a child \"text\" node whose text does not "
1417
"start with an uppercase letter",
1418
TAG(Pdn->dn_type), Pdn->dn_linenum);
1421
*PLcLetter = tolower((int) (**PPageName));
1423
} // ExtractPageInfo()
1426
// ****************************************************************************
1429
// Given a non-null string that should be an HTML tag type, a pointer to a
1430
// bool_t to return whether this is a closing tag, and a filename and line
1431
// number for error reporting, look up the tag type in g_dntype[] and return
1432
// its index. Error out if not found.
1434
// As a special case, if presented with "!---" with any number of dashes, look
1437
FUNCTION int TagType(
1438
char * Tag, // to look up.
1439
bool_t * Pisclosing, // return flag: is a closing tag.
1440
char * Filename, // for error reporting.
1441
int Linenum) // for error reporting.
1443
int dn_type; // to return.
1444
char * mytag; // local variation.
1446
assert( Tag != PCNULL);
1447
assert(*Tag != CHNULL);
1449
// Check for closing tag (yes, even for types that don't really allow it):
1451
if ((*Pisclosing = (*Tag == '/'))) // (()) for gcc.
1457
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
1460
// Translate comment tag to known type:
1462
mytag = (strncmp(Tag, "!--", 3) ? Tag : "!--"); // see above.
1466
// Note: Main code already asserted dnt_type == dn_type for each entry.
1468
for (dn_type = 0; TAG(dn_type) != PCNULL; ++dn_type)
1469
if (strcmp(mytag, TAG(dn_type)) == 0) return(dn_type);
1471
Error(ERREXIT, NOERRNO, FILELINE "Unrecognized HTML tag \"%s\"; "
1472
"see program source file for recognized types; this is a "
1473
"limited, special-purpose translator", Filename, Linenum, Tag);
1476
return(0); // make some compilers happy.
1481
// ****************************************************************************
1482
// A P P D O C N O D E
1484
// Given a current docnode tree node, the input file line number, and
1485
// g_Pdnhead, create a new docnode, append it to the tree in the right place,
1486
// and return a pointer to it, with g_Pdnhead updated if required:
1488
// * If empty tree, insert new as head of tree.
1490
// * Otherwise if current node nests and is not closed, insert as its child.
1492
// * Otherwise insert as a sibling of the current node.
1494
// Note: Most HTML tags are non-singletons and hence nest, but if the nesting
1495
// doesn't make sense, too bad, it's not detected, at least not here.
1497
FUNCTION Pdn_t AppDocNode(
1498
Pdn_t Pdn, // current docnode tree node.
1499
int Linenum) // in input file.
1501
// No current tree, insert first node:
1503
if (g_Pdnhead == PDNNULL)
1504
return(g_Pdnhead = NewDocNode(PDNNULL, Linenum));
1506
// Insert new node as child, with parent set to current node:
1508
if (NEST(Pdn->dn_type) && (! (Pdn->dn_closed)))
1509
return((Pdn->dn_Pchild) = NewDocNode(Pdn, Linenum));
1511
// Insert new node as sibling with same parent:
1513
(Pdn->dn_Pnext) = NewDocNode(Pdn->dn_Pparent, Linenum);
1514
(Pdn->dn_Pnext->dn_Pprev) = Pdn;
1515
return(Pdn->dn_Pnext);
1520
// ****************************************************************************
1521
// N E W D O C N O D E
1523
// Malloc() a new docnode and initialize its fields except dn_type, with error
1524
// checking. Set its parent to the given value.
1526
FUNCTION Pdn_t NewDocNode(
1527
Pdn_t dn_Pparent, // parent to record.
1528
int Linenum) // in input file.
1530
Pdn_t Pdn = (Pdn_t) Malloc(sizeof(struct docnode));
1532
(Pdn -> dn_linenum) = Linenum;
1533
(Pdn -> dn_closed) = FALSE;
1534
(Pdn -> dn_noemit) = FALSE;
1535
(Pdn -> dn_bold) = FALSE;
1536
(Pdn -> dn_text) = PCNULL;
1537
(Pdn -> dn_Pprev) = PDNNULL;
1538
(Pdn -> dn_Pnext) = PDNNULL;
1539
(Pdn -> dn_Pparent) = dn_Pparent;
1540
(Pdn -> dn_Pchild) = PDNNULL;
1547
// ****************************************************************************
1548
// S A V E D O C N O D E
1550
// Given a pointer to a docnode, the docnode type, a string for the current
1551
// location (past tag name at whitespace or ">"), and a filename and line
1552
// number for error reporting, save the docnode type in the node, and also save
1553
// the tag text if appropriate; then find the end of the tag (">") and return
1554
// past that location (possibly before more whitespace). Error out in case of
1557
FUNCTION char * SaveDocNode(
1558
Pdn_t Pdn, // docnode to modify.
1559
int DN_Type, // new type to save.
1560
char * Pch, // current location past tagname.
1561
char * Filename, // for error reporting.
1562
int Linenum) // for error reporting.
1564
char * Pch2 = PCNULL; // second location; init for gcc -Wall.
1566
assert( Pch != PCNULL);
1567
assert(*Pch != CHNULL);
1571
(Pdn->dn_type) = DN_Type;
1573
// Pass whitespace and then find the end of the tag:
1577
if ((*Pch == CHNULL) || ((Pch2 = strchr(Pch, '>')) == PCNULL))
1578
Error(ERREXIT, NOERRNO, FmtErrLineEnds, Filename, Linenum);
1580
// Optionally save tag text:
1582
if (SAVETAG(DN_Type))
1584
*Pch2 = CHNULL; // temporarily terminate.
1585
(Pdn->dn_text) = StrSave(Pch);
1594
// ****************************************************************************
1595
// P A R E N T P R E
1597
// Given a docnode (can be null) and a flag whether only bold <PRE> is of
1598
// interest, return TRUE if any of its parents is a <PRE> (marked for bold
1599
// text), that is, DN_TYPE_PRE (with dn_bold set); otherwise return FALSE.
1601
FUNCTION bool_t ParentPre(
1602
Pdn_t Pdn, // starting node.
1603
bool_t BoldOnly) // flag: only care about bold <PRE>.
1605
if (Pdn == PDNNULL) return (FALSE); // no parent.
1607
for (Pdn = Pdn->dn_Pparent; Pdn != PDNNULL; Pdn = Pdn->dn_Pparent)
1609
if (((Pdn->dn_type) == DN_TYPE_PRE)
1610
&& ((! BoldOnly) || (Pdn->dn_bold)))
1621
// ****************************************************************************
1622
// M A R K N O E M I T
1624
// Given a docnode (can be null), and a flag, recursively mark the node and all
1625
// children and siblings as do-not-emit, unless the flag is set, only mark font
1628
FUNCTION void MarkNoEmit(
1629
Pdn_t Pdn, // top node to mark.
1630
bool_t Font) // flag: only mark font docnodes.
1632
if (Pdn == PDNNULL) return;
1635
|| ((Pdn->dn_type) == DN_TYPE_B)
1636
|| ((Pdn->dn_type) == DN_TYPE_I))
1638
(Pdn->dn_noemit) = TRUE;
1641
if ((Pdn->dn_Pchild) != PDNNULL) MarkNoEmit(Pdn->dn_Pchild, Font);
1642
if ((Pdn->dn_Pnext) != PDNNULL) MarkNoEmit(Pdn->dn_Pnext, Font);
1647
// ****************************************************************************
1650
// Given a text string, a bitflag for <PRE> status, and an input line number
1651
// for error reporting, copy the text string to stdout with no added newlines,
1652
// but translating selected HTML escape codes to simple characters, doubling
1653
// any backslashes, and if InPRE, inserting .IP (if INPRE_INDENT) or .PP at
1654
// blank lines (between successive newlines), and if INPRE_BOLD, putting back
1655
// bold font since .IP/.PP seems to reset the font. Warn about unrecognized
1659
char * et_escape; // expected text.
1660
size_t et_len; // of expected text.
1661
char et_emit; // equivalent char.
1663
{ "amp;", 4, '&', },
1666
{ PCNULL, 0, ' ', }, // end of list.
1669
FUNCTION void EmitText(
1670
char * Pch, // text to emit.
1671
int InPRE, // bitflag for <PRE> status.
1672
int Linenum) // for error reporting.
1674
char * Pch2; // place in text.
1675
struct et_list * Pet; // place in et_list[].
1677
while ((Pch2 = strchr(Pch, '&')) != PCNULL) // another escape code.
1679
*Pch2 = CHNULL; // briefly terminate.
1680
EmitTextPRE(Pch, InPRE); // emit preceding part.
1682
Pch = Pch2 + 1; // past '&'.
1684
for (Pet = et_list; Pet->et_escape != PCNULL; ++Pet)
1686
if (strncmp(Pch, Pet->et_escape, Pet->et_len) == 0)
1688
PUTC(Pet->et_emit); // translate.
1689
Pch += Pet->et_len; // skip escapecode.
1694
if (Pet->et_escape == PCNULL) // no match found.
1696
Error(NOEXIT, NOERRNO, "Unrecognized HTML escape code in "
1697
"line %d (or text beginning on that line): \"%.4s...\", "
1698
"passed through unaltered", Linenum, Pch2);
1700
PUTC('&'); // emit start of escape code.
1701
// continue with Pch is just after the '&'.
1705
EmitTextPRE(Pch, InPRE); // emit remaining part.
1710
// ****************************************************************************
1711
// E M I T T E X T P R E
1713
// Given a text string with no HTML escape codes in it and a bitflag for <PRE>
1714
// status (see EmitText()), emit the string with <PRE> handling, and with any
1715
// backslashes doubled.
1717
FUNCTION void EmitTextPRE(
1718
char * Pch, // string to emit.
1719
int InPRE) // bitflag for <PRE> status.
1721
char * Pch2; // place in string.
1723
if (! InPRE) { EmitTextBS(Pch); return; }
1725
while ((Pch2 = strchr(Pch, '\n')) != PCNULL) // another newline.
1727
*Pch2 = CHNULL; // briefly terminate.
1728
EmitTextBS(Pch); // emit preceding part.
1730
PUTC('\n'); // emit current newline.
1732
if (*(Pch = Pch2 + 1) == '\n') // successive newline.
1734
// emit before next newline:
1735
PUTS((InPRE & INPRE_INDENT) ? ".IP" : ".PP");
1738
if (InPRE & INPRE_BOLD) PUTS("\n.ft B");
1742
EmitTextBS(Pch); // emit trailing part.
1747
// ****************************************************************************
1748
// E M I T T E X T B S
1750
// Given a text string with no HTML escape codes in it, emit the string with
1751
// any backslashes doubled.
1753
FUNCTION void EmitTextBS(
1754
char * Pch) // string to emit.
1756
while (*Pch != CHNULL)
1758
PUTC(*Pch); if (*Pch == '\\') PUTC('\\');
1765
// ****************************************************************************
1766
// N O W H I T E S P A C E
1768
// Given a string, return TRUE if it contains no whitespace, otherwise FALSE.
1770
FUNCTION bool_t NoWhiteSpace(
1771
char * Pch) // string to check.
1773
assert(Pch != PCNULL);
1775
while (*Pch != CHNULL) { if (ISSPACE(*Pch)) return(FALSE); ++Pch; }
1781
// ****************************************************************************
1782
// C O U N T N E W L I N E S
1784
// Return the number of newline chars in a string.
1786
FUNCTION int CountNewlines(
1787
char * Pch) // in which to count newlines.
1791
assert(Pch != PCNULL);
1793
while (*Pch != CHNULL) count += ((*Pch++) == '\n');
1796
} // CountNewlines()
1799
// ****************************************************************************
1802
// Given a string, copy the string into malloc'd space and return a pointer to
1803
// the new copy. Error out if malloc() fails.
1805
FUNCTION char * StrSave(
1808
return(strcpy((char *) Malloc((size_t) (strlen(string) + 1)), string));
1813
// ****************************************************************************
1816
// Given one or more strings, terminated by a null pointer, allocate space for
1817
// the concatenation of the strings, concatenate them, and return a pointer to
1818
// the result. Also free() all but the last string.
1820
FUNCTION char * StrSaveN(
1823
va_list Parg; // place in arg list.
1824
char * stringN; // string from arg list.
1825
char * stringN1 = PCNULL; // previous string.
1826
size_t size = 0; // total bytes needed.
1827
char * result; // string to return.
1830
// DETERMINE SPACE NEEDED:
1832
va_start(Parg, String);
1834
for (stringN = String;
1836
stringN = va_arg(Parg, char *))
1838
size += strlen(stringN);
1844
// ALLOCATE SPACE, CONCATENATE STRINGS:
1846
*(result = (char *) Malloc((size_t) (size + 1))) = CHNULL;
1848
va_start(Parg, String);
1850
for (stringN = String;
1852
stringN = va_arg(Parg, char *))
1854
if (stringN1 != PCNULL) free((void *) stringN1);
1855
stringN1 = stringN; // all but last string.
1856
(void) strcat(result, stringN);
1865
// ****************************************************************************
1868
// Do a malloc() with error checking.
1870
FUNCTION void * Malloc(
1871
size_t Size) // bytes to get.
1873
void * Pm; // pointer to memory.
1875
if ((Pm = malloc(Size)) == (void *) NULL)
1876
Error(ERREXIT, errno, "Cannot malloc %d bytes", Size);
1883
// ****************************************************************************
1886
// Print usage messages (char *gc_usage[]) to stderr and exit with ERREXIT.
1887
// Follow each message line by a newline.
1889
FUNCTION void Usage(void)
1891
int which = 0; // current line.
1893
while (gc_usage[which] != PCNULL)
1895
(void) fprintf(stderr, gc_usage[which++], gc_myname);
1896
(void) putc('\n', stderr);
1904
// ****************************************************************************
1907
// Given an exit value (NOEXIT, ERREXIT, or USAGE), an errno value (NOERRNO if
1908
// none), a message (printf) string, and zero or more argument strings, print
1909
// an error message to stderr and, if exitvalue is NOEXIT, return; if USAGE,
1910
// print a pointer to the program's usage message; otherwise exit with the
1913
// Message is preceded by "<myname>: " using global gc_myname, and by
1914
// "Warning: " for NOEXIT, and followed by a period and newline. If myerrno
1915
// (system error number) is not NOERRNO, a relevant message is appended before
1918
FUNCTION void Error(
1919
int Exitvalue, // or NOEXIT for warning.
1920
int MyErrno, // system errno if relevant.
1921
char * Message, ...)
1923
va_list Parg; // place in arg list.
1925
(void) fprintf(stderr, "%s: ", gc_myname);
1926
if (Exitvalue == NOEXIT) (void) fputs("Warning: ", stderr);
1928
va_start(Parg, Message);
1929
(void) vfprintf(stderr, Message, Parg);
1932
if (MyErrno != NOERRNO)
1934
(void) fprintf(stderr, ": %s (errno = %d)", strerror(MyErrno),
1938
(void) putc('.', stderr);
1939
(void) putc('\n', stderr);
1941
if (Exitvalue == USAGE)
1943
(void) fprintf(stderr, "For a usage summary, run %s -?\n",
1947
DBGCODE(DumpTree(g_Pdnhead, /* Depth = */ 0, /* Separator = */ FALSE);)
1949
if (Exitvalue != NOEXIT)
1957
// ****************************************************************************
1960
// Dump to stdout a representation of the docnode tree under g_Pdnhead.
1961
// Recursively traverse the tree in-order (parent then child then sibling).
1963
FUNCTION void DumpTree(
1964
Pdn_t Pdn, // first node of current sibling list.
1965
int Depth, // current depth.
1966
bool_t Separator) // print a separator line after a long dump.
1968
int indent; // for counting to Depth.
1970
// Check if enabled:
1972
if (getenv("DUMP") == PCNULL)
1974
PUTS(".\\\" $DUMP not set; DumpTree() disabled.\n");
1978
// Check for empty tree:
1980
if ((Depth == 0) && (Pdn == PDNNULL))
1982
PUTS("Head pointer is null.\n");
1986
// Print siblings and each of their children, indented to Depth after the node
1989
while (Pdn != PDNNULL)
1991
(void) printf("%lx ", (unsigned long) Pdn);
1993
for (indent = 0; indent <= Depth; ++indent) PUTC('.');
1995
(void) printf(" %-5s %3d %c %lx %lx \"%s\"\n",
1996
((Pdn -> dn_type) == DN_TYPE_TEXT) ?
1997
"text" : TAG(Pdn -> dn_type),
1999
(Pdn->dn_closed) ? 'c' : 'o',
2004
if ((Pdn -> dn_Pchild) != PDNNULL)
2005
DumpTree(Pdn -> dn_Pchild, Depth + 1, Separator);
2007
Pdn = Pdn -> dn_Pnext;
2010
// Print separator line:
2012
if ((Depth == 0) && Separator)
2013
PUTS("=======================================================\n");