3
"""Generate ESIS events based on a LaTeX source document and
6
The conversion is not strong enough to work with arbitrary LaTeX
7
documents; it has only been designed to work with the highly stylized
8
markup used in the standard Python documentation. A lot of
9
information about specific markup is encoded in the control table
10
passed to the convert() function; changing this table can allow this
11
tool to support additional LaTeX markups.
13
The format of the table is largely undocumented; see the commented
14
headers where the table is specified in main(). There is no provision
15
to load an alternate table from an external file.
25
import xml.sax.saxutils
27
from types import ListType, StringType, TupleType
30
from xml.parsers.xmllib import XMLParser
32
from xmllib import XMLParser
35
from esistools import encode
41
class LaTeXFormatError(Exception):
45
class LaTeXStackError(LaTeXFormatError):
46
def __init__(self, found, stack):
47
msg = "environment close for %s doesn't match;\n stack = %s" \
51
LaTeXFormatError.__init__(self, msg)
54
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
55
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
56
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
57
_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
58
_text_rx = re.compile(r"[^]~%\\{}]+")
59
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
60
# _parameter_rx is this complicated to allow {...} inside a parameter;
61
# this is useful to match tabular layout specifications like {c|p{24pt}}
62
_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
63
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
64
_start_group_rx = re.compile("[ \n]*{")
65
_start_optional_rx = re.compile("[ \n]*[[]")
68
ESCAPED_CHARS = "$%#^ {}&~"
73
sys.stderr.write(msg + "\n")
75
def pushing(name, point, depth):
76
dbgmsg("pushing <%s> at %s" % (name, point))
78
def popping(name, point, depth):
79
dbgmsg("popping </%s> at %s" % (name, point))
82
class _Stack(UserList.UserList):
83
def append(self, entry):
84
if type(entry) is not StringType:
85
raise LaTeXFormatError("cannot push non-string on stack: "
87
#dbgmsg("%s<%s>" % (" "*len(self.data), entry))
88
self.data.append(entry)
90
def pop(self, index=-1):
91
entry = self.data[index]
93
#dbgmsg("%s</%s>" % (" "*len(self.data), entry))
95
def __delitem__(self, index):
96
entry = self.data[index]
98
#dbgmsg("%s</%s>" % (" "*len(self.data), entry))
108
def __init__(self, ifp, ofp, table):
109
self.write = ofp.write
112
self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
118
def subconvert(self, endchar=None, depth=0):
120
# Parses content, including sub-structures, until the character
121
# 'endchar' is found (with no open structures), or until the end
122
# of the input data is endchar is None.
127
if line[0] == endchar and not stack:
130
m = _comment_rx.match(line)
134
self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
136
line = line[m.end():]
138
m = _begin_env_rx.match(line)
141
entry = self.get_env_entry(name)
142
# re-write to use the macro handler
143
line = r"\%s %s" % (name, line[m.end():])
145
m = _end_env_rx.match(line)
149
entry = self.get_entry(envname)
150
while stack and envname != stack[-1] \
151
and stack[-1] in entry.endcloses:
152
self.write(")%s\n" % stack.pop())
153
if stack and envname == stack[-1]:
154
self.write(")%s\n" % entry.outputname)
157
raise LaTeXStackError(envname, stack)
158
line = line[m.end():]
160
m = _begin_macro_rx.match(line)
163
macroname = m.group(1)
165
# Ugh! This is a combining character...
167
self.combining_char("c", line[endpos])
168
line = line[endpos + 1:]
170
entry = self.get_entry(macroname)
173
pos = string.find(line, "\\end{%s}" % macroname)
174
text = line[m.end(1):pos]
175
stack.append(entry.name)
176
self.write("(%s\n" % entry.outputname)
177
self.write("-%s\n" % encode(text))
178
self.write(")%s\n" % entry.outputname)
180
line = line[pos + len("\\end{%s}" % macroname):]
182
while stack and stack[-1] in entry.closes:
184
topentry = self.get_entry(top)
185
if topentry.outputname:
186
self.write(")%s\n-\\n\n" % topentry.outputname)
192
params, optional, empty, environ = self.start_macro(macroname)
193
# rip off the macroname
195
line = line[m.end(1):]
197
line = line[m.end(1):]
199
line = line[m.end():]
203
# handle attribute mappings here:
204
for pentry in params:
205
if pentry.type == "attribute":
207
m = _optional_rx.match(line)
208
if m and entry.outputname:
209
line = line[m.end():]
210
self.dump_attr(pentry, m.group(1))
211
elif pentry.text and entry.outputname:
212
# value supplied by conversion spec:
213
self.dump_attr(pentry, pentry.text)
215
m = _parameter_rx.match(line)
217
raise LaTeXFormatError(
218
"could not extract parameter %s for %s: %s"
219
% (pentry.name, macroname, `line[:100]`))
221
self.dump_attr(pentry, m.group(1))
222
line = line[m.end():]
223
elif pentry.type == "child":
225
m = _optional_rx.match(line)
227
line = line[m.end():]
228
if entry.outputname and not opened:
230
self.write("(%s\n" % entry.outputname)
231
stack.append(macroname)
232
stack.append(pentry.name)
233
self.write("(%s\n" % pentry.name)
234
self.write("-%s\n" % encode(m.group(1)))
235
self.write(")%s\n" % pentry.name)
238
if entry.outputname and not opened:
240
self.write("(%s\n" % entry.outputname)
241
stack.append(entry.name)
242
self.write("(%s\n" % pentry.name)
243
stack.append(pentry.name)
244
self.line = skip_white(line)[1:]
245
line = self.subconvert(
246
"}", len(stack) + depth + 1)[1:]
247
self.write(")%s\n" % stack.pop())
248
elif pentry.type == "content":
252
if entry.outputname and not opened:
254
self.write("(%s\n" % entry.outputname)
255
stack.append(entry.name)
256
line = skip_white(line)
258
raise LaTeXFormatError(
259
"missing content for " + macroname)
261
line = self.subconvert("}", len(stack) + depth + 1)
262
if line and line[0] == "}":
264
elif pentry.type == "text" and pentry.text:
265
if entry.outputname and not opened:
267
stack.append(entry.name)
268
self.write("(%s\n" % entry.outputname)
269
#dbgmsg("--- text: %s" % `pentry.text`)
270
self.write("-%s\n" % encode(pentry.text))
271
elif pentry.type == "entityref":
272
self.write("&%s\n" % pentry.name)
275
self.write("(%s\n" % entry.outputname)
276
stack.append(entry.name)
277
if not implied_content:
278
self.write(")%s\n" % entry.outputname)
281
if line[0] == endchar and not stack:
285
# end of macro or group
286
macroname = stack[-1]
288
conversion = self.table[macroname]
289
if conversion.outputname:
290
# otherwise, it was just a bare group
291
self.write(")%s\n" % conversion.outputname)
296
# don't worry about the "tie" aspect of this command
304
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
305
self.write("-%s\n" % encode(line[1]))
308
if line[:2] == r"\\":
309
self.write("(BREAK\n)BREAK\n")
312
if line[:2] == r"\_":
313
line = "_" + line[2:]
315
if line[:2] in (r"\'", r'\"'):
316
# combining characters...
317
self.combining_char(line[1], line[2])
320
m = _text_rx.match(line)
322
text = encode(m.group())
323
self.write("-%s\n" % text)
324
line = line[m.end():]
326
# special case because of \item[]
327
# XXX can we axe this???
332
# avoid infinite loops
336
raise LaTeXFormatError("could not identify markup: %s%s"
337
% (`line[:100]`, extra))
339
entry = self.get_entry(stack[-1])
341
self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
346
raise LaTeXFormatError("elements remain on stack: "
347
+ string.join(stack, ", "))
348
# otherwise we just ran out of input here...
350
# This is a really limited table of combinations, but it will have
358
def combining_char(self, prefix, char):
359
ordinal = self._combinations[(prefix, char)]
360
self.write("-\\%%%d;\n" % ordinal)
362
def start_macro(self, name):
363
conversion = self.get_entry(name)
364
parameters = conversion.parameters
365
optional = parameters and parameters[0].optional
366
return parameters, optional, conversion.empty, conversion.environment
368
def get_entry(self, name):
369
entry = self.table.get(name)
371
dbgmsg("get_entry(%s) failing; building default entry!" % `name`)
372
# not defined; build a default entry:
373
entry = TableEntry(name)
374
entry.has_content = 1
375
entry.parameters.append(Parameter("content"))
376
self.table[name] = entry
379
def get_env_entry(self, name):
380
entry = self.table.get(name)
382
# not defined; build a default entry:
383
entry = TableEntry(name, 1)
384
entry.has_content = 1
385
entry.parameters.append(Parameter("content"))
386
entry.parameters[-1].implied = 1
387
self.table[name] = entry
388
elif not entry.environment:
389
raise LaTeXFormatError(
390
name + " is defined as a macro; expected environment")
393
def dump_attr(self, pentry, value):
394
if not (pentry.name and value):
396
if _token_rx.match(value):
400
self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
403
def convert(ifp, ofp, table):
404
c = Conversion(ifp, ofp, table)
407
except IOError, (err, msg):
408
if err != errno.EPIPE:
412
def skip_white(line):
413
while line and line[0] in " %\n\t\r":
414
line = string.lstrip(line[1:])
420
def __init__(self, name, environment=0):
422
self.outputname = name
423
self.environment = environment
424
self.empty = not environment
433
def __init__(self, type, name=None, optional=0):
436
self.optional = optional
441
class TableParser(XMLParser):
442
def __init__(self, table=None):
446
self.__current = None
448
XMLParser.__init__(self)
451
for entry in self.__table.values():
452
if entry.environment and not entry.has_content:
453
p = Parameter("content")
455
entry.parameters.append(p)
456
entry.has_content = 1
459
def start_environment(self, attrs):
461
self.__current = TableEntry(name, environment=1)
462
self.__current.verbatim = attrs.get("verbatim") == "yes"
463
if attrs.has_key("outputname"):
464
self.__current.outputname = attrs.get("outputname")
465
self.__current.endcloses = string.split(attrs.get("endcloses", ""))
466
def end_environment(self):
469
def start_macro(self, attrs):
471
self.__current = TableEntry(name)
472
self.__current.closes = string.split(attrs.get("closes", ""))
473
if attrs.has_key("outputname"):
474
self.__current.outputname = attrs.get("outputname")
476
self.__table[self.__current.name] = self.__current
477
self.__current = None
479
def start_attribute(self, attrs):
480
name = attrs.get("name")
481
optional = attrs.get("optional") == "yes"
483
p = Parameter("attribute", name, optional=optional)
485
p = Parameter("attribute", optional=optional)
486
self.__current.parameters.append(p)
488
def end_attribute(self):
489
self.__current.parameters[-1].text = self.__buffer
491
def start_entityref(self, attrs):
493
p = Parameter("entityref", name)
494
self.__current.parameters.append(p)
496
def start_child(self, attrs):
498
p = Parameter("child", name, attrs.get("optional") == "yes")
499
self.__current.parameters.append(p)
500
self.__current.empty = 0
502
def start_content(self, attrs):
503
p = Parameter("content")
504
p.implied = attrs.get("implied") == "yes"
505
if self.__current.environment:
507
self.__current.parameters.append(p)
508
self.__current.has_content = 1
509
self.__current.empty = 0
511
def start_text(self, attrs):
512
self.__current.empty = 0
515
p = Parameter("text")
516
p.text = self.__buffer
517
self.__current.parameters.append(p)
519
def handle_data(self, data):
520
self.__buffer = self.__buffer + data
523
def load_table(fp, table=None):
524
parser = TableParser(table=table)
525
parser.feed(fp.read())
527
return parser.get_table()
533
opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
534
for opt, arg in opts:
535
if opt in ("-D", "--debug"):
545
ofp = open(args[1], "w")
550
table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
551
convert(ifp, ofp, table)
554
if __name__ == "__main__":