2
An experimental module for reading single-page dvi files output by
3
TeX. Several limitations make this not (currently) useful as a
4
general-purpose dvi preprocessor. The idea is that the file has a
5
single page with only a single formula or other piece of text.
11
text, boxes = dvi.output(72)
12
for x,y,font,glyph in text:
13
fontname, pointsize = dvi.fontinfo(font)
15
for x,y,height,width in boxes:
19
from matplotlib.cbook import Bunch
23
dvistate = Bunch(pre=0, outer=1, inpage=2, post_post=3, finale=4)
27
def __init__(self, filename):
28
self.filename = filename
29
self.text = [] # list of (x,y,fontnum,glyphnum)
30
self.boxes = [] # list of (x,y,width,height)
33
def output(self, dpi):
34
"""Return lists of text and box objects transformed into a standard
35
Cartesian coordinate system at the given dpi value. The coordinates
36
are floating point numbers, but otherwise precision is not lost and
37
coordinate values are not clipped to integers."""
39
minx, miny, maxx, maxy = t0[0], t0[1], t0[0], t0[1]
40
for x,y,_,_ in self.text + self.boxes:
45
d = dpi / (72.27 * 2**16) # from TeX's "scaled points" to dpi units
46
text = [ ((x-minx)*d, (maxy-y)*d, f, g) for (x,y,f,g) in self.text ]
47
boxes = [ ((x-minx)*d, (maxy-y)*d, h*d, w*d) for (x,y,h,w) in self.boxes ]
50
def fontinfo(self, f):
51
"""Name and size in (Adobe) points."""
52
return self.fonts[f].name, self.fonts[f].scale * (72.0 / (72.27 * 2**16))
54
def read(self, debug=False):
55
self.file = open(self.filename, 'rb')
57
self.state = dvistate.pre
59
byte = ord(self.file.read(1))
63
if debug and self.state == dvistate.inpage:
65
if byte == 140: break # end of page; we only read a single page for now
69
def arg(self, nbytes, signed=False):
70
str = self.file.read(nbytes)
72
if signed and value >= 0x80:
74
for i in range(1, nbytes):
75
value = 0x100*value + ord(str[i])
78
def dispatch(self, byte):
79
if 0 <= byte <= 127: self.set_char(byte)
80
elif byte == 128: self.set_char(self.arg(1))
81
elif byte == 129: self.set_char(self.arg(2))
82
elif byte == 130: self.set_char(self.arg(3))
83
elif byte == 131: self.set_char(self.arg(4, True))
84
elif byte == 132: self.set_rule(self.arg(4, True), self.arg(4, True))
85
elif byte == 133: self.put_char(self.arg(1))
86
elif byte == 134: self.put_char(self.arg(2))
87
elif byte == 135: self.put_char(self.arg(3))
88
elif byte == 136: self.put_char(self.arg(4, True))
89
elif byte == 137: self.put_rule(self.arg(4, True), self.arg(4, True))
90
elif byte == 138: self.nop()
91
elif byte == 139: self.bop(*[self.arg(4, True) for i in range(11)])
92
elif byte == 140: self.eop()
93
elif byte == 141: self.push()
94
elif byte == 142: self.pop()
95
elif byte == 143: self.right(self.arg(1, True))
96
elif byte == 144: self.right(self.arg(2, True))
97
elif byte == 145: self.right(self.arg(3, True))
98
elif byte == 146: self.right(self.arg(4, True))
99
elif byte == 147: self.right_w(None)
100
elif byte == 148: self.right_w(self.arg(1, True))
101
elif byte == 149: self.right_w(self.arg(2, True))
102
elif byte == 150: self.right_w(self.arg(3, True))
103
elif byte == 151: self.right_w(self.arg(4, True))
104
elif byte == 152: self.right_x(None)
105
elif byte == 153: self.right_x(self.arg(1, True))
106
elif byte == 154: self.right_x(self.arg(2, True))
107
elif byte == 155: self.right_x(self.arg(3, True))
108
elif byte == 156: self.right_x(self.arg(4, True))
109
elif byte == 157: self.down(self.arg(1, True))
110
elif byte == 158: self.down(self.arg(2, True))
111
elif byte == 159: self.down(self.arg(3, True))
112
elif byte == 160: self.down(self.arg(4, True))
113
elif byte == 161: self.down_y(None)
114
elif byte == 162: self.down_y(self.arg(1, True))
115
elif byte == 163: self.down_y(self.arg(2, True))
116
elif byte == 164: self.down_y(self.arg(3, True))
117
elif byte == 165: self.down_y(self.arg(4, True))
118
elif byte == 166: self.down_z(None)
119
elif byte == 167: self.down_z(self.arg(1, True))
120
elif byte == 168: self.down_z(self.arg(2, True))
121
elif byte == 169: self.down_z(self.arg(3, True))
122
elif byte == 170: self.down_z(self.arg(4, True))
123
elif 171 <= byte <= 234: self.fnt_num(byte-171)
124
elif byte == 235: self.fnt_num(self.arg(1))
125
elif byte == 236: self.fnt_num(self.arg(2))
126
elif byte == 237: self.fnt_num(self.arg(3))
127
elif byte == 238: self.fnt_num(self.arg(4, True))
128
elif 239 <= byte <= 242:
129
len = self.arg(byte-238)
130
special = self.file.read(len)
132
elif 243 <= byte <= 246:
133
k = self.arg(byte-242, byte==246)
134
c, s, d, a, l = [ self.arg(x) for x in (4, 4, 4, 1, 1) ]
135
n = self.file.read(a+l)
136
self.fnt_def(k, c, s, d, a, l, n)
138
i, num, den, mag, k = [ self.arg(x) for x in (1, 4, 4, 4, 1) ]
139
x = self.file.read(k)
140
self.pre(i, num, den, mag, x)
141
elif byte == 248: self.post()
142
elif byte == 249: self.post_post()
144
raise ValueError, "unknown command: byte %d"%byte
146
def pre(self, i, num, den, mag, comment):
147
if self.state != dvistate.pre:
148
raise ValueError, "pre command in middle of dvi file"
150
raise ValueError, "Unknown dvi format %d"%i
151
if num != 25400000 or den != 7227 * 2**16:
152
raise ValueError, "nonstandard units in dvi file"
153
# meaning: TeX always uses those exact values, so it
154
# should be enough for us to support those
155
# (There are 72.27 pt to an inch so 7227 pt =
156
# 7227 * 2**16 sp to 100 in. The numerator is multiplied
157
# by 10^5 to get units of 10**-7 meters.)
159
raise ValueError, "nonstandard magnification in dvi file"
160
# meaning: LaTeX seems to frown on setting \mag, so
161
# I think we can assume this is constant
162
self.state = dvistate.outer
164
def set_char(self, char):
165
if self.state != dvistate.inpage:
166
raise ValueError, "misplaced set_char in dvi file"
168
font = self.fonts[self.f]
169
width = font.tfm.width[char]
170
width = (width * font.scale) >> 20
173
def set_rule(self, a, b):
174
if self.state != dvistate.inpage:
175
raise ValueError, "misplaced set_rule in dvi file"
179
def put_char(self, char):
180
if self.state != dvistate.inpage:
181
raise ValueError, "misplaced put_char in dvi file"
182
self.text.append((self.h, self.v, self.f, char))
184
def put_rule(self, a, b):
185
if self.state != dvistate.inpage:
186
raise ValueError, "misplaced put_rule in dvi file"
188
self.boxes.append((self.h, self.v, a, b))
193
def bop(self, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, p):
194
if self.state != dvistate.outer:
195
raise ValueError, "misplaced bop in dvi file"
196
self.state = dvistate.inpage
197
self.h, self.v, self.w, self.x, self.y, self.z = 0, 0, 0, 0, 0, 0
201
if self.state != dvistate.inpage:
202
raise ValueError, "misplaced eop in dvi file"
203
self.state = dvistate.outer
204
del self.h, self.v, self.w, self.x, self.y, self.z, self.stack
207
if self.state != dvistate.inpage:
208
raise ValueError, "misplaced push in dvi file"
209
self.stack.append((self.h, self.v, self.w, self.x, self.y, self.z))
212
if self.state != dvistate.inpage:
213
raise ValueError, "misplaced pop in dvi file"
214
self.h, self.v, self.w, self.x, self.y, self.z = self.stack.pop()
217
if self.state != dvistate.inpage:
218
raise ValueError, "misplaced right in dvi file"
221
def right_w(self, new_w):
222
if self.state != dvistate.inpage:
223
raise ValueError, "misplaced w in dvi file"
224
if new_w is not None:
228
def right_x(self, new_x):
229
if self.state != dvistate.inpage:
230
raise ValueError, "misplaced x in dvi file"
231
if new_x is not None:
236
if self.state != dvistate.inpage:
237
raise ValueError, "misplaced down in dvi file"
240
def down_y(self, new_y):
241
if self.state != dvistate.inpage:
242
raise ValueError, "misplaced y in dvi file"
243
if new_y is not None:
247
def down_z(self, new_z):
248
if self.state != dvistate.inpage:
249
raise ValueError, "misplaced z in dvi file"
250
if new_z is not None:
254
def fnt_num(self, k):
255
if self.state != dvistate.inpage:
256
raise ValueError, "misplaced fnt_num in dvi file"
259
def xxx(self, special):
262
def fnt_def(self, k, c, s, d, a, l, n):
263
filename = n[-l:] + '.tfm'
264
pipe = os.popen('kpsewhich ' + filename, 'r')
265
filename = pipe.readline().rstrip()
268
if c != 0 and tfm.checksum != 0 and c != tfm.checksum:
269
raise ValueError, 'tfm checksum mismatch: %s'%n
270
# It seems that the assumption behind the following check is incorrect:
271
#if d != tfm.design_size:
272
# raise ValueError, 'tfm design size mismatch: %d in dvi, %d in %s'%\
273
# (d, tfm.design_size, n)
274
self.fonts[k] = Bunch(scale=s, tfm=tfm, name=n)
277
raise NotImplementedError
280
raise NotImplementedError
284
def __init__(self, filename):
285
file = open(filename, 'rb')
287
header1 = file.read(24)
289
struct.unpack('!4H', header1[2:10])
290
header2 = file.read(4*lh)
291
self.checksum, self.design_size = \
292
struct.unpack('!2I', header2[:8])
293
# plus encoding information etc.
295
char_info = file.read(4*(ec-bc+1))
296
widths = file.read(4*nw)
300
widths = struct.unpack('!%dI' % nw, widths)
302
for i in range(ec-bc):
303
self.width[bc+i] = widths[ord(char_info[4*i])]
305
if __name__ == '__main__':
308
for x,y,f,c in dvi.text:
309
print x,y,c,chr(c),dvi.fonts[f].__dict__