1
# Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009
2
# David Turner <david@freetype.org>
5
# this file contains definitions of classes needed to decompose
6
# C sources files into a series of multi-line "blocks". There are
9
# - normal blocks, which contain source code or ordinary comments
11
# - documentation blocks, which have restricted formatting, and
12
# whose text always start with a documentation markup tag like
13
# "<Function>", "<Type>", etc..
15
# the routines used to process the content of documentation blocks
16
# are not contained here, but in "content.py"
18
# the classes and methods found here only deal with text parsing
19
# and basic documentation block extraction
22
import fileinput, re, sys, os, string
26
################################################################
28
## BLOCK FORMAT PATTERN
30
## A simple class containing compiled regular expressions used
31
## to detect potential documentation format block comments within
34
## note that the 'column' pattern must contain a group that will
35
## be used to "unbox" the content of documentation comment blocks
37
class SourceBlockFormat:
39
def __init__( self, id, start, column, end ):
40
"""create a block pattern, used to recognize special documentation blocks"""
42
self.start = re.compile( start, re.VERBOSE )
43
self.column = re.compile( column, re.VERBOSE )
44
self.end = re.compile( end, re.VERBOSE )
49
# format 1 documentation comment blocks look like the following:
51
# /************************************/
55
# /************************************/
57
# we define a few regular expressions here to detect them
61
\s* # any number of whitespace
62
/\*{2,}/ # followed by '/' and at least two asterisks then '/'
63
\s*$ # probably followed by whitespace
67
\s* # any number of whitespace
68
/\*{1} # followed by '/' and precisely one asterisk
69
([^*].*) # followed by anything (group 1)
70
\*{1}/ # followed by one asterisk and a '/'
71
\s*$ # probably followed by whitespace
74
re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
78
# format 2 documentation comment blocks look like the following:
80
# /************************************ (at least 2 asterisks)
85
# **/ (1 or more asterisks at the end)
87
# we define a few regular expressions here to detect them
90
\s* # any number of whitespace
91
/\*{2,} # followed by '/' and at least two asterisks
92
\s*$ # probably followed by whitespace
96
\s* # any number of whitespace
97
\*{1}(?!/) # followed by precisely one asterisk not followed by `/'
98
(.*) # then anything (group1)
102
\s* # any number of whitespace
103
\*+/ # followed by at least one asterisk, then '/'
106
re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
110
# the list of supported documentation block formats, we could add new ones
113
re_source_block_formats = [re_source_block_format1, re_source_block_format2]
117
# the following regular expressions corresponds to markup tags
118
# within the documentation comment blocks. they're equivalent
119
# despite their different syntax
121
# notice how each markup tag _must_ begin a new line
123
re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format
124
re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format
127
# the list of supported markup tags, we could add new ones relatively
130
re_markup_tags = [re_markup_tag1, re_markup_tag2]
133
# used to detect a cross-reference, after markup tags have been stripped
135
re_crossref = re.compile( r'@(\w*)(.*)' )
138
# used to detect italic and bold styles in paragraph text
140
re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_
141
re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold*
144
# used to detect the end of commented source lines
146
re_source_sep = re.compile( r'\s*/\*\s*\*/' )
149
# used to perform cross-reference within source output
151
re_source_crossref = re.compile( r'(\W*)(\w*)' )
154
# a list of reserved source keywords
156
re_source_keywords = re.compile( '''\\b ( typedef |
175
\#endif ) \\b''', re.VERBOSE )
178
################################################################
180
## SOURCE BLOCK CLASS
182
## A SourceProcessor is in charge of reading a C source file
183
## and decomposing it into a series of different "SourceBlocks".
184
## each one of these blocks can be made of the following data:
186
## - A documentation comment block that starts with "/**" and
187
## whose exact format will be discussed later
189
## - normal sources lines, including comments
191
## the important fields in a text block are the following ones:
193
## self.lines : a list of text lines for the corresponding block
195
## self.content : for documentation comment blocks only, this is the
196
## block content that has been "unboxed" from its
197
## decoration. This is None for all other blocks
198
## (i.e. sources or ordinary comments with no starting
203
def __init__( self, processor, filename, lineno, lines ):
204
self.processor = processor
205
self.filename = filename
207
self.lines = lines[:]
208
self.format = processor.format
211
if self.format == None:
216
# extract comment lines
219
for line0 in self.lines:
220
m = self.format.column.match( line0 )
222
lines.append( m.group( 1 ) )
224
# now, look for a markup tag
226
l = string.strip( l )
228
for tag in re_markup_tags:
233
def location( self ):
234
return "(" + self.filename + ":" + repr( self.lineno ) + ")"
236
# debugging only - not used in normal operations
239
print "{{{content start---"
240
for l in self.content:
242
print "---content end}}}"
247
fmt = repr( self.format.id ) + " "
249
for line in self.lines:
254
################################################################
256
## SOURCE PROCESSOR CLASS
258
## The SourceProcessor is in charge of reading a C source file
259
## and decomposing it into a series of different "SourceBlock"
262
## each one of these blocks can be made of the following data:
264
## - A documentation comment block that starts with "/**" and
265
## whose exact format will be discussed later
267
## - normal sources lines, include comments
270
class SourceProcessor:
272
def __init__( self ):
273
"""initialize a source processor"""
280
"""reset a block processor, clean all its blocks"""
284
def parse_file( self, filename ):
285
"""parse a C source file, and add its blocks to the processor's list"""
288
self.filename = filename
295
for line in fileinput.input( filename ):
296
# strip trailing newlines, important on Windows machines!
297
if line[-1] == '\012':
300
if self.format == None:
301
self.process_normal_line( line )
303
if self.format.end.match( line ):
304
# that's a normal block end, add it to 'lines' and
306
self.lines.append( line )
307
self.add_block_lines()
308
elif self.format.column.match( line ):
309
# that's a normal column line, add it to 'lines'
310
self.lines.append( line )
312
# humm.. this is an unexpected block end,
313
# create a new block, but don't process the line
314
self.add_block_lines()
316
# we need to process the line again
317
self.process_normal_line( line )
319
# record the last lines
320
self.add_block_lines()
322
def process_normal_line( self, line ):
323
"""process a normal line and check whether it is the start of a new block"""
324
for f in re_source_block_formats:
325
if f.start.match( line ):
326
self.add_block_lines()
328
self.lineno = fileinput.filelineno()
330
self.lines.append( line )
332
def add_block_lines( self ):
333
"""add the current accumulated lines and create a new block"""
335
block = SourceBlock( self, self.filename, self.lineno, self.lines )
337
self.blocks.append( block )
341
# debugging only, not used in normal operations
343
"""print all blocks in a processor"""
344
for b in self.blocks: