~ubuntu-branches/ubuntu/trusty/menhir/trusty

Viewing changes to lr0.ml

Committer: Bazaar Package Importer
Author(s): Mehdi Dogguy
Date: 2009-02-22 23:41:17 UTC
mfrom: (1.1.5 upstream) (2.1.2 squeeze)
Revision ID: james.westby@ubuntu.com-20090222234117-yxk115kvzv634utx

Tags: 20090204.dfsg-2

http://bugs.debian.org/516134

* New binary package libmenhir-ocaml-dev, Closes: #516134.
* Use dh-ocaml predefined variables.
* Use predefined variable OCAML_BEST (dh-ocaml >= 0.4).
* debian/svn-deblayout: remove no longer needed SVN setting

files added:
debian/libmenhir-ocaml-dev.dirs.in

debian/libmenhir-ocaml-dev.install.in

debian/menhir.clean

debian/menhir.dirs

debian/menhir.examples

debian/menhir.install.in

debian/menhir.manpages

debian/patches/src_Makefile.dpatch

demos/calc/.omakedb

demos/calc/.omakedb.lock

demos/calc/OMakefile.omc

demos/calc/OMakeroot.omc

src/IL.mli

src/IO.ml

src/IO.mli

src/META

src/Makefile

src/action.ml

src/action.mli

src/back.ml

src/back.mli

src/breadth.ml

src/breadth.mli

src/checkOCamlVersion.ml

src/codeBackend.ml

src/codeBackend.mli

src/codeBits.ml

src/codeBits.mli

src/codePieces.ml

src/codePieces.mli

src/compressedBitSet.ml

src/compressedBitSet.mli

src/concreteSyntax.mli

src/conflict.ml

src/conflict.mli

src/convert.ml

src/convert.mli

src/cst.ml

src/cst.mli

src/derivation.ml

src/derivation.mli

src/dot.ml

src/dot.mli

src/engine.ml

src/engine.mli

src/engineTypes.ml

src/error.ml

src/error.mli

src/fancy-parser.mly

src/front.ml

src/front.mli

src/gMap.ml

src/gSet.ml

src/grammar.ml

src/grammar.mli

src/infer.ml

src/infer.mli

src/infiniteArray.ml

src/infiniteArray.mli

src/inliner.ml

src/inliner.mli

src/installation.mli

src/interface.ml

src/interface.mli

src/internalSyntax.mli

src/interpret.ml

src/interpret.mli

src/invariant.ml

src/invariant.mli

src/item.ml

src/item.mli

src/keyword.ml

src/keyword.mli

src/lexdep.mll

src/lexer.mll

src/lexmli.mll

src/lineCount.mll

src/listMonad.ml

src/listMonad.mli

src/lookahead.mli

src/lr0.ml

src/lr0.mli

src/lr1.ml

src/lr1.mli

src/lr1partial.ml

src/lr1partial.mli

src/mark.ml

src/mark.mli

src/misc.ml

src/misc.mli

src/nonTerminalDefinitionInlining.ml

src/nonTerminalDefinitionInlining.mli

src/option.ml

src/option.mli

src/packedIntArray.ml

src/packedIntArray.mli

src/parameterizedGrammar.ml

src/parameterizedGrammar.mli

src/parameters.ml

src/parser.mly

src/parserAux.ml

src/parserAux.mli

src/partialGrammar.ml

src/partialGrammar.mli

src/patricia.ml

src/patricia.mli

src/positions.ml

src/positions.mli

src/pprint.ml

src/pprint.mli

src/preFront.ml

src/preFront.mli

src/printer.ml

src/printer.mli

src/rawPrinter.ml

src/rawPrinter.mli

src/reachability.ml

src/reachability.mli

src/referenceInterpreter.ml

src/referenceInterpreter.mli

src/rowDisplacement.ml

src/rowDisplacement.mli

src/sentenceLexer.mll

src/sentenceParser.mly

src/settings.ml

src/settings.mli

src/standard.mly

src/stretch.mli

src/stringMap.ml

src/stringMap.mli

src/stringSet.ml

src/stringSet.mli

src/syntax.mli

src/tableBackend.ml

src/tableBackend.mli

src/tableFormat.ml

src/tableInterpreter.ml

src/tableInterpreter.mli

src/tarjan.ml

src/tarjan.mli

src/time.ml

src/time.mli

src/tokenType.ml

src/tokenType.mli

src/traverse.ml

src/unionFind.ml

src/unionFind.mli

src/unparameterizedPrinter.ml

src/unparameterizedPrinter.mli

src/unparameterizedSyntax.mli

src/version.ml

files removed:
IL.mli

IO.ml

IO.mli

Makefile.common

action.ml

action.mli

back.ml

back.mli

breadth.ml

breadth.mli

check-ocaml-version.ml

code.ml

code.mli

codeBits.ml

codeBits.mli

codeSharing.ml

codeSharing.mli

compressedBitSet.ml

compressedBitSet.mli

concreteSyntax.mli

conflict.ml

conflict.mli

conflictGraph.ml

conflictGraph.mli

debian/dirs

debian/examples

debian/svn-deblayout

derivation.ml

derivation.mli

dot.ml

dot.mli

error.ml

error.mli

fancy-parser.mly

front.ml

front.mli

gMap.ml

gSet.ml

grammar.ml

grammar.mli

infer.ml

infer.mli

infiniteArray.ml

infiniteArray.mli

inliner.ml

inliner.mli

interface.ml

interface.mli

internalSyntax.mli

invariant.ml

invariant.mli

item.ml

item.mli

keyword.ml

keyword.mli

lexdep.mll

lexer.mll

lexmli.mll

lineCount.mll

listMonad.ml

listMonad.mli

lookahead.mli

lr0.ml

lr0.mli

lr1.ml

lr1.mli

lr1partial.ml

lr1partial.mli

mark.ml

mark.mli

misc.ml

misc.mli

nonTerminalDefinitionInlining.ml

nonTerminalDefinitionInlining.mli

option.ml

option.mli

parameterizedGrammar.ml

parameterizedGrammar.mli

parameters.ml

parser.mly

parserAux.ml

parserAux.mli

partialGrammar.ml

partialGrammar.mli

patricia.ml

patricia.mli

positions.ml

positions.mli

preFront.ml

preFront.mli

printer.ml

printer.mli

rawPrinter.ml

rawPrinter.mli

reachability.ml

reachability.mli

settings.ml

settings.mli

standard.mly

stretch.mli

stringMap.ml

stringMap.mli

stringSet.ml

stringSet.mli

syntax.mli

tarjan.ml

tarjan.mli

time.ml

time.mli

tokenType.ml

tokenType.mli

traverse.ml

unionFind.ml

unionFind.mli

unparameterizedPrinter.ml

unparameterizedPrinter.mli

unparameterizedSyntax.mli

version.ml

files modified:
CHANGES

INSTALL

LICENSE

Makefile

debian/changelog

debian/compat

debian/control

debian/patches/00list

debian/rules

debian/watch

demos/Makefile

demos/Makefile.auto

demos/Makefile.shared

demos/OMakefile.shared

demos/calc-param/Makefile

demos/calc-param/OMakefile

demos/calc-param/README

demos/calc-param/calc.ml

demos/calc-param/lexer.mll

demos/calc-param/parser.mly

demos/calc-param/tokens.mly

demos/calc-two/Makefile

demos/calc-two/OMakefile

demos/calc-two/README

demos/calc-two/algebraic.mly

demos/calc-two/calc.ml

demos/calc-two/common.mly

demos/calc-two/lexer.mll

demos/calc-two/reverse.mly

demos/calc-two/tokens.mly

demos/calc/Makefile

demos/calc/OMakefile

demos/calc/README

demos/calc/calc.ml

demos/calc/lexer.mll

demos/calc/parser.mly

menhir.1

Show diffs side-by-side

added added

removed removed

lr0.ml

(**************************************************************************)

(* *)

(* Menhir *)

(* *)

(* Fran�ois Pottier and Yann R�gis-Gianas, INRIA Rocquencourt *)

(* *)

(* under the terms of the Q Public License version 1.0, with the *)

(* change described in file LICENSE. *)

(* *)

(**************************************************************************)

open Grammar

(* ------------------------------------------------------------------------ *)

(* Symbolic lookahead information. *)

(* A symbolic lookahead set consists of an actual concrete set of

terminal symbols and of a number of set variables. Set variables as

encoded as integers. *)

module SymbolicLookahead = struct

type t =

TerminalSet.t * CompressedBitSet.t

let constant toks =

(toks, CompressedBitSet.empty)

let empty =

constant TerminalSet.empty

let union (toks1, vars1) ((toks2, vars2) as s2) =

let toks = TerminalSet.union toks1 toks2

and vars = CompressedBitSet.union vars1 vars2 in

if toks2 == toks && vars2 == vars then

else

(toks, vars)

let variable (var : int) : t =

(TerminalSet.empty, CompressedBitSet.singleton var)

let project (toks, vars) =

assert (CompressedBitSet.is_empty vars);

toks

end

(* We will perform closure operations over symbolic lookahead sets.

This allows us to later represent LR(1) states as pairs of an

LR(0) node number and an array of concrete lookahead sets. *)

module SymbolicClosure =

Item.Closure(SymbolicLookahead)

(* Closure operations over concrete lookahead sets are also used (when

explaining conflicts). One could take another instance of the

functor. The approach below is somewhat less elegant and makes each

call to [closure] somewhat slower, but saves the cost of

instantiating the functor again -- which is linear in the size of

the grammar. *)

type concretelr1state =

TerminalSet.t Item.Map.t

let closure (state : concretelr1state) : concretelr1state =

Item.Map.map SymbolicLookahead.project

(SymbolicClosure.closure

(Item.Map.map SymbolicLookahead.constant state))

(* ------------------------------------------------------------------------ *)

(* Finding which non-epsilon transitions leave a set of items. This

code is parametric in the nature of lookahead sets. *)

let transitions (state : 'a Item.Map.t) : 'a Item.Map.t SymbolMap.t =

Item.Map.fold (fun item toks transitions ->

match Item.classify item with

| Item.Shift (symbol, item') ->

let items : 'a Item.Map.t =

try

SymbolMap.find symbol transitions

with Not_found ->

Item.Map.empty

SymbolMap.add symbol (Item.Map.add item' toks items) transitions

| Item.Reduce _ ->

transitions

) state SymbolMap.empty

(* ------------------------------------------------------------------------ *)

(* Determining the reduction opportunities at a (closed) state. They

are represented as a list of pairs of a lookahead set and a

production index. This code is again parametric in the nature of

lookahead sets. *)

let reductions (state : 'a Item.Map.t) : ('a * Production.index) list =

100

Item.Map.fold (fun item toks accu ->

101

match Item.classify item with

102

| Item.Reduce prod ->

103

(toks, prod) :: accu

104

| Item.Shift _ ->

105

accu

106

) state []

107

108

(* ------------------------------------------------------------------------ *)

109

(* Construction of the the LR(0) automaton. *)

110

111

(* Nodes are numbered sequentially. *)

112

113

type node =

114

int

115

116

(* A symbolic transition is a pair of the target state number and an

117

array of symbolic lookahead sets. The variables in these sets are

118

numbered in [0,g) where g is the number of items in the source

119

LR(0) state. Items are numbered in the order of presentation by

120

[Item.Set.fold]. *)

121

122

type symbolic_transition_target =

123

node * SymbolicLookahead.t array

124

125

(* The automaton is represented by (growing) arrays of states (sets of

126

items), symbolic transition information, and symbolic reduction

127

information, indexed by node numbers. Conversely, a hash table maps

128

states (sets of items) to node numbers. *)

129

130

let n =

131

ref 0

132

133

let states : Item.Set.t InfiniteArray.t =

134

InfiniteArray.make Item.Set.empty

135

136

let _transitions : symbolic_transition_target SymbolMap.t InfiniteArray.t =

137

InfiniteArray.make SymbolMap.empty

138

139

let _reductions : (SymbolicLookahead.t * Production.index) list InfiniteArray.t =

140

InfiniteArray.make []

141

142

let map : (Item.Set.t, node) Hashtbl.t =

143

Hashtbl.create 50021

144

145

(* The automaton is built depth-first. *)

146

147

let rec explore (state : Item.Set.t) : node =

148

149

(* Find out whether this state was already explored. *)

150

151

try

152

Hashtbl.find map state

153

with Not_found ->

154

155

(* If not, create a new node. *)

156

157

let k = !n in

158

n := k + 1;

159

InfiniteArray.set states k state;

160

Hashtbl.add map state k;

161

162

(* Build a symbolic version of the current state, where each item

163

is associated with a distinct lookahead set variable, numbered

164

consecutively. *)

165

166

let (_ : int), (symbolic_state : SymbolicClosure.state) =

167

Item.Set.fold (fun item (i, symbolic_state) ->

168

i+1, Item.Map.add item (SymbolicLookahead.variable i) symbolic_state

169

) state (0, Item.Map.empty) in

170

171

(* Compute the symbolic closure. *)

172

173

let closure = SymbolicClosure.closure symbolic_state in

174

175

(* Compute symbolic information about reductions. *)

176

177

InfiniteArray.set _reductions k (reductions closure);

178

179

(* Compute symbolic information about the transitions, and, by

180

dropping the symbolic lookahead information, explore the

181

transitions to further LR(0) states. *)

182

183

InfiniteArray.set _transitions k (SymbolMap.map (fun symbolic_state ->

184

let (k : node) = explore (Item.Map.domain symbolic_state) in

185

let lookahead : SymbolicLookahead.t array =

186

Array.create (Item.Map.cardinal symbolic_state) SymbolicLookahead.empty in

187

let (_ : int) = Item.Map.fold (fun _ s i ->

188

lookahead.(i) <- s;

189

i+1

190

) symbolic_state 0 in

191

((k, lookahead) : symbolic_transition_target)

192

) (transitions closure));

193

194

195

196

(* Creating a start state out of a start production. It contains a

197

single item, consisting of the start production, at position 0. *)

198

199

let start prod : Item.Set.t =

200

Item.Set.singleton (Item.import (prod, 0))

201

202

(* This starts the construction of the automaton and records the

203

entry nodes in an array. *)

204

205

let entry : node array =

206

Production.maps (fun prod ->

207

explore (start prod)

208

)

209

210

let () =

211

Hashtbl.clear map

212

213

let n =

214

215

216

let () =

217

Error.logA 1 (fun f -> Printf.fprintf f "Built an LR(0) automaton with %d states.\n" n);

218

Time.tick "Construction of the LR(0) automaton"

219

220

(* ------------------------------------------------------------------------ *)

221

(* Accessors. *)

222

223

let items node : Item.Set.t =

224

InfiniteArray.get states node

225

226

(* ------------------------------------------------------------------------ *)

227

(* Help for building the LR(1) automaton. *)

228

229

(* An LR(1) state is represented as a pair of an LR(0) state number

230

and an array of concrete lookahead sets (whose length depends on

231

the LR(0) state). *)

232

233

type lr1state =

234

node * TerminalSet.t array

235

236

(* An encoded LR(1) state can be turned into a concrete representation,

237

that is, a mapping of items to concrete lookahead sets. *)

238

239

let export (k, toksr) =

240

let (_ : int), items = Item.Set.fold (fun item (i, items) ->

241

i+1, Item.Map.add item toksr.(i) items

242

) (InfiniteArray.get states k) (0, Item.Map.empty) in

243

items

244

245

(* Displaying a concrete state. *)

246

247

let print_concrete (state : concretelr1state) =

248

let buffer = Buffer.create 1024 in

249

Item.Map.iter (fun item toks ->

250

Printf.bprintf buffer "%s[ %s ]\n" (Item.print item) (TerminalSet.print toks)

251

) state;

252

Buffer.contents buffer

253

254

(* Displaying a state. By default, only the kernel is displayed, not

255

the closure. *)

256

257

let print state =

258

print_concrete (export state)

259

260

let print_closure state =

261

print_concrete (closure (export state))

262

263

(* The core of an LR(1) state is the underlying LR(0) state. *)

264

265

let core (k, _) =

266

267

268

(* A sanity check. *)

269

270

let well_formed (k, toksr) =

271

Array.length toksr = Item.Set.cardinal (InfiniteArray.get states k)

272

273

(* An LR(1) start state is the combination of an LR(0) start state

274

(which consists of a single item) with a singleton lookahead set

275

that consists of the end-of-file pseudo-token. *)

276

277

let start k =

278

let state = (k, [| TerminalSet.singleton Terminal.sharp |]) in

279

assert (well_formed state);

280

state

281

282

(* Interpreting a symbolic lookahead set with respect to a source

283

state. The variables in the symbolic lookahead set (which are

284

integers) are interpreted as indices into the state's array of

285

concrete lookahead sets. The result is a concrete lookahead set. *)

286

287

let interpret

288

((_, toksr) as state : lr1state)

289

((toks, vars) : SymbolicLookahead.t)

290

: TerminalSet.t =

291

292

assert (well_formed state);

293

CompressedBitSet.fold (fun var toks ->

294

assert (var >= 0 && var < Array.length toksr);

295

TerminalSet.union toksr.(var) toks

296

) vars toks

297

298

(* Out of an LR(1) state, one produces information about reductions

299

and transitions. This is done in an efficient way by interpreting

300

the precomputed symbolic information with respect to that state. *)

301

302

let reductions

303

((k, _) as state : lr1state)

304

: (TerminalSet.t * Production.index) list =

305

306

List.map (fun (s, prod) ->

307

interpret state s, prod

308

) (InfiniteArray.get _reductions k)

309

310

let transitions

311

((k, _) as state : lr1state)

312

: lr1state SymbolMap.t =

313

314

SymbolMap.map (fun ((k, sr) : symbolic_transition_target) ->

315

((k, Array.map (interpret state) sr) : lr1state)

316

) (InfiniteArray.get _transitions k)

317

318

let outgoing_symbols

319

(k : node)

320

: Symbol.t list =

321

322

SymbolMap.domain (InfiniteArray.get _transitions k)

323

324

let transition

325

symbol

326

((k, _) as state : lr1state)

327

: lr1state =

328

329

let ((k, sr) : symbolic_transition_target) =

330

try

331

SymbolMap.find symbol (InfiniteArray.get _transitions k)

332

with Not_found ->

333

assert false (* no transition along this symbol *)

334

335

(k, Array.map (interpret state) sr)

336

337

(* Equality of states. *)

338

339

let equal ((k1, toksr1) as state1) ((k2, toksr2) as state2) =

340

assert (k1 = k2 && well_formed state1 && well_formed state2);

341

let rec loop i =

342

if i = 0 then

343

true

344

else

345

let i = i - 1 in

346

(TerminalSet.equal toksr1.(i) toksr2.(i)) && (loop i)

347

348

loop (Array.length toksr1)

349

350

(* Subsumption between states. *)

351

352

let subsume ((k1, toksr1) as state1) ((k2, toksr2) as state2) =

353

assert (k1 = k2 && well_formed state1 && well_formed state2);

354

let rec loop i =

355

if i = 0 then

356

true

357

else

358

let i = i - 1 in

359

(TerminalSet.subset toksr1.(i) toksr2.(i)) && (loop i)

360

361

loop (Array.length toksr1)

362

363

(* This function determines whether two (core-equivalent) states are

364

compatible, according to a criterion that is close to Pager's weak

365

compatibility criterion.

366

367

Pager's criterion guarantees that if a merged state has a potential

368

conflict at [(i, j)] -- that is, some token [t] appears within the

369

lookahead sets of both item [i] and item [j] -- then there exists a

370

state in the canonical automaton that also has a potential conflict

371

at [(i, j)] -- that is, some token [u] appears within the lookahead

372

sets of both item [i] and item [j]. Note that [t] and [u] can be

373

distinct.

374

375

Pager has shown that his weak compatibility criterion is stable,

376

that is, preserved by transitions and closure. This means that, if

377

two states can be merged, then so can their successors. This is

378

important, because merging two states means committing to merging

379

their successors, even though we have not even built these

380

successors yet.

381

382

The criterion used here is a slightly more restrictive version of

383

Pager's criterion, which guarantees equality of the tokens [t] and

384

[u]. This is done essentially by applying Pager's original

385

criterion on a token-wise basis. Pager's original criterion states

386

that two states can be merged if the new state has no conflict or

387

one of the original states has a conflict. Our more restrictive

388

criterion states that two states can be merged if, for every token

389

[t], the new state has no conflict at [t] or one of the original

390

states has a conflict at [t].

391

392

This modified criterion is also stable. My experiments show that it

393

is almost as effective in practice: out of more than a hundred

394

real-world sample grammars, only one automaton was affected, and

395

only one extra state appeared as a result of using the modified

396

criterion. Its advantage is to potentially make conflict

397

explanations easier: if there appears to be a conflict at [t], then

398

some conflict at [t] can be explained. This was not true when using

399

Pager's original criterion. *)

400

401

let compatible (k1, toksr1) (k2, toksr2) =

402

assert (k1 = k2);

403

let n = Array.length toksr1 in

404

(* Two states are compatible if and only if they are compatible

405

at every pair (i, j), where i and j are distinct. *)

406

let rec loopi i =

407

if i = n then

408

true

409

else

410

let toksr1i = toksr1.(i)

411

and toksr2i = toksr2.(i) in

412

let rec loopj j =

413

if j = i then

414

true

415

else

416

let toksr1j = toksr1.(j)

417

and toksr2j = toksr2.(j) in

418

419

(* The two states are compatible at (i, j) if every conflict

420

token in the merged state already was a conflict token in

421

one of the two original states. This could be written as

422

follows:

423

424

TerminalSet.subset

425

(TerminalSet.inter (TerminalSet.union toksr1i toksr2i) (TerminalSet.union toksr1j toksr2j))

426

(TerminalSet.union (TerminalSet.inter toksr1i toksr1j) (TerminalSet.inter toksr2i toksr2j))

427

428

but is easily seen (on paper) to be equivalent to:

429

430

431

432

TerminalSet.subset

433

(TerminalSet.inter toksr2i toksr1j)

434

(TerminalSet.union toksr1i toksr2j)

435

436

TerminalSet.subset

437

(TerminalSet.inter toksr1i toksr2j)

438

(TerminalSet.union toksr2i toksr1j)

439

440

loopj (j+1)

441

442

loopj 0 && loopi (i+1)

443

444

loopi 0

445

446

(* This function determines whether two (core-equivalent) states can

447

be merged without creating an end-of-stream conflict, now or in the

448

future.

449

450

The rule is, if an item appears in one state with the singleton "#"

451

as its lookahead set, then its lookahead set in the other state

452

must contain "#".

453

454

So, either the second lookahead set is also the singleton "#", and

455

no end-of-stream conflict exists, or it is larger, and the second

456

state already contains an end-of-stream conflict.

457

458

Put another way, we do not to merge two lookahead sets when one

459

contains "#" alone and the other does not contain "#".

460

461

I invented this rule to complement Pager's criterion. I believe,

462

but I am not 100% sure, that it does indeed prevent end-of-stream

463

conflicts and that it is stable.

464

465

Thanks to S�bastien Hinderer for reporting the bug caused by the

466

absence of this extra criterion. *)

467

468

let eos_compatible (k1, toksr1) (k2, toksr2) =

469

assert (k1 = k2);

470

let n = Array.length toksr1 in

471

let rec loop i =

472

if i = n then

473

true

474

else

475

let toks1 = toksr1.(i)

476

and toks2 = toksr2.(i) in

477

begin

478

if TerminalSet.mem Terminal.sharp toks1 && TerminalSet.cardinal toks1 = 1 then

479

(* "#" is alone in one set: it must be a member of the other set. *)

480

TerminalSet.mem Terminal.sharp toks2

481

else if TerminalSet.mem Terminal.sharp toks2 && TerminalSet.cardinal toks2 = 1 then

482

(* Symmetric condition. *)

483

TerminalSet.mem Terminal.sharp toks1

484

else

485

true

486

end

487

&& loop (i+1)

488

489

loop 0

490

491

(* Union of two states. The two states must have the same core. The

492

new state is obtained by pointwise union of the lookahead sets. *)

493

494

let union (k1, toksr1) (k2, toksr2) =

495

assert (k1 = k2);

496

k1, Array.init (Array.length toksr1) (fun i ->

497

TerminalSet.union toksr1.(i) toksr2.(i)

498

)

499

500

(* Restriction of a state to a set of tokens of interest. Every

501

lookahead set is intersected with that set. *)

502

503

let restrict toks (k, toksr) =

504

k, Array.map (fun toksri ->

505

TerminalSet.inter toksri toks

506

) toksr

507

Older »