1
require 'kpeg/grammar_renderer'
6
def initialize(name, gram, debug=false)
15
attr_accessor :standalone
18
name = name.gsub("-","_hyphen_")
26
str = "_save#{@saves}"
37
def output_ast(short, code, description)
38
parser = FormatParser.new description
40
# just skip it if it's bad.
41
return unless parser.parse "ast_root"
43
name, attrs = parser.result
45
code << " class #{name} < Node\n"
46
code << " def initialize(#{attrs.join(', ')})\n"
48
code << " @#{at} = #{at}\n"
52
code << " attr_reader :#{at}\n"
62
root = @grammar.variables["ast-location"] || "AST"
66
vars = @grammar.variables.keys.sort
69
val = @grammar.variables[name]
71
if val.index("ast ") == 0
74
code << " module #{root}\n"
75
code << " class Node; end\n"
78
if m = output_ast(name, code, val[4..-1])
86
code << " module #{root}Construction\n"
87
methods.each do |short, name, attrs|
88
code << " def #{short}(#{attrs.join(', ')})\n"
89
code << " #{root}::#{name}.new(#{attrs.join(', ')})\n"
93
code << " include #{root}Construction\n"
97
def indentify(code, indent)
98
"#{" " * indent}#{code}"
101
# Default indent is 4 spaces (indent=2)
102
def output_op(code, op, indent=2)
105
code << indentify("_tmp = get_byte\n", indent)
107
code << indentify("_tmp = match_string(#{op.string.dump})\n", indent)
109
if op.regexp.respond_to?(:kcode)
110
lang = op.regexp.kcode.to_s[0,1]
112
# Let default ruby string handling figure it out
115
code << indentify("_tmp = scan(/\\A#{op.regexp}/#{lang})\n", indent)
118
if op.start.bytesize == 1 and op.fin.bytesize == 1
119
code << indentify("#{ss} = self.pos\n", indent)
120
code << indentify("_tmp = get_byte\n", indent)
121
code << indentify("if _tmp\n", indent)
123
if op.start.respond_to? :getbyte
124
left = op.start.getbyte 0
125
right = op.fin.getbyte 0
131
code << indentify(" unless _tmp >= #{left} and _tmp <= #{right}\n", indent)
132
code << indentify(" self.pos = #{ss}\n", indent)
133
code << indentify(" _tmp = nil\n", indent)
134
code << indentify(" end\n", indent)
135
code << indentify("end\n", indent)
137
raise "Unsupported char range - #{op.inspect}"
142
code << indentify("#{ss} = self.pos\n", indent)
143
code << indentify("while true # choice\n", indent)
144
op.ops.each_with_index do |n,idx|
145
output_op code, n, (indent+1)
147
code << indentify(" break if _tmp\n", indent)
148
code << indentify(" self.pos = #{ss}\n", indent)
149
if idx == op.ops.size - 1
150
code << indentify(" break\n", indent)
153
code << indentify("end # end choice\n\n", indent)
156
if op.min == 0 and op.max == 1
157
code << indentify("#{ss} = self.pos\n", indent)
158
output_op code, op.op, indent
160
code << indentify("@result = nil unless _tmp\n", indent)
162
code << indentify("unless _tmp\n", indent)
163
code << indentify(" _tmp = true\n", indent)
164
code << indentify(" self.pos = #{ss}\n", indent)
165
code << indentify("end\n", indent)
166
elsif op.min == 0 and !op.max
168
code << indentify("_ary = []\n", indent)
171
code << indentify("while true\n", indent)
172
output_op code, op.op, (indent+1)
174
code << indentify(" _ary << @result if _tmp\n", indent)
176
code << indentify(" break unless _tmp\n", indent)
177
code << indentify("end\n", indent)
178
code << indentify("_tmp = true\n", indent)
181
code << indentify("@result = _ary\n", indent)
184
elsif op.min == 1 and !op.max
185
code << indentify("#{ss} = self.pos\n", indent)
187
code << indentify("_ary = []\n", indent)
189
output_op code, op.op, indent
190
code << indentify("if _tmp\n", indent)
192
code << indentify(" _ary << @result\n", indent)
194
code << indentify(" while true\n", indent)
195
output_op code, op.op, (indent+2)
197
code << indentify(" _ary << @result if _tmp\n", indent)
199
code << indentify(" break unless _tmp\n", indent)
200
code << indentify(" end\n", indent)
201
code << indentify(" _tmp = true\n", indent)
203
code << indentify(" @result = _ary\n", indent)
205
code << indentify("else\n", indent)
206
code << indentify(" self.pos = #{ss}\n", indent)
207
code << indentify("end\n", indent)
209
code << indentify("#{ss} = self.pos\n", indent)
210
code << indentify("_count = 0\n", indent)
211
code << indentify("while true\n", indent)
212
output_op code, op.op, (indent+1)
213
code << indentify(" if _tmp\n", indent)
214
code << indentify(" _count += 1\n", indent)
215
code << indentify(" break if _count == #{op.max}\n", indent)
216
code << indentify(" else\n", indent)
217
code << indentify(" break\n", indent)
218
code << indentify(" end\n", indent)
219
code << indentify("end\n", indent)
220
code << indentify("if _count >= #{op.min}\n", indent)
221
code << indentify(" _tmp = true\n", indent)
222
code << indentify("else\n", indent)
223
code << indentify(" self.pos = #{ss}\n", indent)
224
code << indentify(" _tmp = nil\n", indent)
225
code << indentify("end\n", indent)
231
code << indentify("#{ss} = self.pos\n", indent)
232
code << indentify("while true # sequence\n", indent)
233
op.ops.each_with_index do |n, idx|
234
output_op code, n, (indent+1)
236
if idx == op.ops.size - 1
237
code << indentify(" unless _tmp\n", indent)
238
code << indentify(" self.pos = #{ss}\n", indent)
239
code << indentify(" end\n", indent)
240
code << indentify(" break\n", indent)
242
code << indentify(" unless _tmp\n", indent)
243
code << indentify(" self.pos = #{ss}\n", indent)
244
code << indentify(" break\n", indent)
245
code << indentify(" end\n", indent)
248
code << indentify("end # end sequence\n\n", indent)
251
code << indentify("#{ss} = self.pos\n", indent)
252
if op.op.kind_of? Action
253
code << indentify("_tmp = begin; #{op.op.action}; end\n", indent)
255
output_op code, op.op, indent
257
code << indentify("self.pos = #{ss}\n", indent)
260
code << indentify("#{ss} = self.pos\n", indent)
261
if op.op.kind_of? Action
262
code << indentify("_tmp = begin; #{op.op.action}; end\n", indent)
264
output_op code, op.op, indent
266
code << indentify("_tmp = _tmp ? nil : true\n", indent)
267
code << indentify("self.pos = #{ss}\n", indent)
270
code << indentify("_tmp = apply_with_args(:#{method_name op.rule_name}, #{op.arguments[1..-2]})\n", indent)
272
code << indentify("_tmp = apply(:#{method_name op.rule_name})\n", indent)
276
code << indentify("_tmp = #{method_name op.rule_name}#{op.arguments}\n", indent)
278
code << indentify("_tmp = #{method_name op.rule_name}()\n", indent)
280
when ForeignInvokeRule
282
code << indentify("_tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name}, #{op.arguments[1..-2]})\n", indent)
284
code << indentify("_tmp = @_grammar_#{op.grammar_name}.external_invoke(self, :#{method_name op.rule_name})\n", indent)
287
if op.tag_name and !op.tag_name.empty?
288
output_op code, op.op, indent
289
code << indentify("#{op.tag_name} = @result\n", indent)
291
output_op code, op.op, indent
294
code << indentify("@result = begin; ", indent)
295
code << op.action << "; end\n"
297
code << indentify("puts \" => \" #{op.action.dump} \" => \#{@result.inspect} \\n\"\n", indent)
299
code << indentify("_tmp = true\n", indent)
301
code << indentify("_text_start = self.pos\n", indent)
302
output_op code, op.op, indent
303
code << indentify("if _tmp\n", indent)
304
code << indentify(" text = get_text(_text_start)\n", indent)
305
code << indentify("end\n", indent)
307
code << indentify("_bounds_start = self.pos\n", indent)
308
output_op code, op.op, indent
309
code << indentify("if _tmp\n", indent)
310
code << indentify(" bounds = [_bounds_start, self.pos]\n", indent)
311
code << indentify("end\n", indent)
313
raise "Unknown op - #{op.class}"
317
def standalone_region(path, marker = "STANDALONE")
318
expanded_path = File.expand_path("../#{path}", __FILE__)
319
cp = File.read(expanded_path)
321
start_marker = "# #{marker} START"
322
end_marker = /^\s*# #{Regexp.escape marker} END/
324
start = cp.index(start_marker) + start_marker.length + 1 # \n
325
fin = cp.index(end_marker)
328
abort("#{marker} boundaries in #{path} missing " \
329
"for standalone generation")
336
return @output if @output
348
# Output of class end and footer
350
def output_footer(code)
353
if footer = @grammar.directives['footer']
354
code << footer.action
359
# Output of grammar and rules
361
def output_grammar(code)
362
code << " # :stopdoc:\n"
365
fg = @grammar.foreign_grammars
369
code << " def setup_foreign_grammar; end\n"
372
code << " def setup_foreign_grammar\n"
373
@grammar.foreign_grammars.each do |name, gram|
374
code << " @_grammar_#{name} = #{gram}.new(nil)\n"
379
render = GrammarRenderer.new(@grammar)
383
@grammar.rule_order.each do |name|
386
rule = @grammar.rules[name]
388
render.render_op io, rule.op
393
renderings[name] = rend
396
code << " # #{name} = #{rend}\n"
399
code << " def #{method_name name}(#{rule.arguments.join(',')})\n"
401
code << " def #{method_name name}\n"
405
code << " puts \"START #{name} @ \#{show_pos}\\n\"\n"
408
output_op code, rule.op
411
code << " puts \" OK #{name} @ \#{show_pos}\\n\"\n"
413
code << " puts \" FAIL #{name} @ \#{show_pos}\\n\"\n"
417
code << " set_failed_rule :#{method_name name} unless _tmp\n"
418
code << " return _tmp\n"
422
code << "\n Rules = {}\n"
423
@grammar.rule_order.each do |name|
424
rend = GrammarRenderer.escape renderings[name], true
425
code << " Rules[:#{method_name name}] = rule_info(\"#{name}\", \"#{rend}\")\n"
428
code << " # :startdoc:\n"
432
# Output up to the user-defined setup actions
434
def output_header(code)
435
if header = @grammar.directives['header']
436
code << header.action.strip
440
pre_class = @grammar.directives['pre-class']
444
code << pre_class.action.strip
447
code << "class #{@name}\n"
449
cp = standalone_region("compiled_parser.rb")
450
cpi = standalone_region("compiled_parser.rb", "INITIALIZE")
451
pp = standalone_region("position.rb")
453
cp.gsub!(/^\s*include Position/, pp)
454
code << " # :stopdoc:\n"
455
code << cpi << "\n" unless @grammar.variables['custom_initialize']
457
code << " # :startdoc:\n"
459
code << "require 'kpeg/compiled_parser'\n\n"
461
code << pre_class.action.strip
464
code << "class #{@name} < KPeg::CompiledParser\n"
467
@grammar.setup_actions.each do |act|
468
code << "\n#{act.action}\n\n"
474
m.module_eval output, "(kpeg parser #{@name})"
476
cls = m.const_get(@name)