~ubuntu-branches/ubuntu/vivid/menhir/vivid

« back to all changes in this revision

Viewing changes to patricia.ml

Committer: Bazaar Package Importer
Author(s): Samuel Mimram
Date: 2006-07-11 12:26:18 UTC
Revision ID: james.westby@ubuntu.com-20060711122618-dea56bmjs3qlmsd8

Tags: upstream-20060615.dfsg

Import upstream version 20060615.dfsg

files added:

AUTHORS

CHANGES

IL.mli

INSTALL

IO.ml

IO.mli

LICENSE

Makefile

Makefile.common

action.ml

action.mli

back.ml

back.mli

breadth.ml

breadth.mli

check-ocaml-version.ml

code.ml

code.mli

codeBits.ml

codeBits.mli

compressedBitSet.ml

compressedBitSet.mli

concreteSyntax.mli

conflict.ml

conflict.mli

demos

demos/Makefile

demos/Makefile.auto

demos/Makefile.shared

demos/OMakefile.shared

demos/calc

demos/calc-two

demos/calc-two/Makefile

demos/calc-two/OMakefile

demos/calc-two/OMakeroot

demos/calc-two/README

demos/calc-two/algebraic.mly

demos/calc-two/calc.ml

demos/calc-two/lexer.mll

demos/calc-two/reverse.mly

demos/calc-two/tokens.mly

demos/calc/Makefile

demos/calc/OMakefile

demos/calc/OMakeroot

demos/calc/README

demos/calc/calc.ml

demos/calc/lexer.mll

demos/calc/parser.mly

demos/ocamldep.wrapper

derivation.ml

derivation.mli

dot.ml

dot.mli

error.ml

error.mli

fancy-parser.mly

front.ml

front.mli

gMap.ml

gSet.ml

grammar.ml

grammar.mli

infer.ml

infer.mli

infiniteArray.ml

infiniteArray.mli

inliner.ml

inliner.mli

interface.ml

interface.mli

internalSyntax.mli

invariant.ml

invariant.mli

item.ml

item.mli

keyword.ml

keyword.mli

lexdep.mll

lexer.mll

lexmli.mll

lineCount.mll

listMonad.ml

listMonad.mli

lookahead.mli

lr0.ml

lr0.mli

lr1.ml

lr1.mli

lr1partial.ml

lr1partial.mli

mark.ml

mark.mli

menhir.1

misc.ml

misc.mli

nonTerminalDefinitionInlining.ml

nonTerminalDefinitionInlining.mli

option.ml

option.mli

parameterizedGrammar.ml

parameterizedGrammar.mli

parameters.ml

parser.mly

parserAux.ml

parserAux.mli

partialGrammar.ml

partialGrammar.mli

patricia.ml

patricia.mli

positions.ml

positions.mli

preFront.ml

preFront.mli

printer.ml

printer.mli

reachability.ml

reachability.mli

settings.ml

settings.mli

standard.mly

stretch.mli

stringMap.ml

stringMap.mli

stringSet.ml

stringSet.mli

syntax.mli

tarjan.ml

tarjan.mli

time.ml

time.mli

tokenType.ml

tokenType.mli

traverse.ml

unionFind.ml

unionFind.mli

unparameterizedPrinter.ml

unparameterizedPrinter.mli

unparameterizedSyntax.mli

version.ml

Show diffs side-by-side

added added

removed removed

patricia.ml

(**************************************************************************)

(* *)

(* Menhir *)

(* *)

(* Fran�ois Pottier and Yann R�gis-Gianas, INRIA Rocquencourt *)

(* *)

(* under the terms of the Q Public License version 1.0, with the *)

(* change described in file LICENSE. *)

(* *)

(**************************************************************************)

(* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore.

Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia

trees. *)

(*i --------------------------------------------------------------------------------------------------------------- i*)

(*s \mysection{Little-endian vs big-endian trees} *)

(* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is

big-endian if it expects the key's most significant bits to be tested first.

Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small

structure which defines endianness. Here is the interface which must be adhered to by such a structure. *)

module Endianness = struct

module type S = sig

(* A mask is an integer with a single one bit (i.e. a power of 2). *)

type mask = int

(* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they

differ. The meaning of ``first'' varies according to the endianness being implemented. *)

val branching_bit: int -> int -> mask

(* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i],

and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given

mask varies according to the endianness being implemented. *)

val mask: int -> mask -> int

(* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer

bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being

implemented. *)

val shorter: mask -> mask -> bool

end

(* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *)

module Little = struct

type mask = int

let lowest_bit x =

x land (-x)

(* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1]

show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is

the lowest bit in this bit field, since we are checking least significant bits first. *)

let branching_bit i0 i1 =

lowest_bit (i0 lxor i1)

(* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit

in the mask [m]. We keep these bits, and set all others to 0. *)

let mask i m =

i land (m-1)

(* The smaller [m] is, the fewer bits are relevant. *)

let shorter =

(<)

end

module Big = struct

type mask = int

let lowest_bit x =

x land (-x)

let rec highest_bit x =

let m = lowest_bit x in

if x = m then

else

highest_bit (x - m)

(* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1]

show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is

the highest bit in this bit field, since we are checking most significant bits first.

100

101

In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which

102

the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the

103

least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the

104

main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This

105

``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *)

106

107

let branching_bit i0 i1 =

108

highest_bit (i0 lxor i1)

109

110

(* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit

111

in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows

112

big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *)

113

114

let mask i m =

115

i land (lnot (2*m-1))

116

117

(* The smaller [m] is, the more bits are relevant. *)

118

119

let shorter =

120

(>)

121

122

end

123

124

end

125

126

(*i --------------------------------------------------------------------------------------------------------------- i*)

127

(*s \mysection{Patricia-tree-based maps} *)

128

129

module Make (X : Endianness.S) = struct

130

131

(* Patricia trees are maps whose keys are integers. *)

132

133

type key = int

134

135

(* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node.

136

Each binary node carries two integers. The first one is the longest common prefix of all keys in this

137

sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2),

138

which describes the bit being tested at this node. *)

139

140

type 'a t =

141

| Empty

142

| Leaf of int * 'a

143

| Branch of int * X.mask * 'a t * 'a t

144

145

(* The empty map. *)

146

147

let empty =

148

Empty

149

150

(* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is

151

bound to [k].

152

153

This implementation takes branches \emph{without} checking whether the key matches the prefix found at the

154

current node. This means that a query for a non-existent key shall be detected only when finally reaching

155

a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be

156

successful. *)

157

158

let rec lookup key = function

159

| Empty ->

160

raise Not_found

161

| Leaf (key', data) ->

162

if key = key' then

163

data

164

else

165

raise Not_found

166

| Branch (_, mask, tree0, tree1) ->

167

lookup key (if (key land mask) = 0 then tree0 else tree1)

168

169

(* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree.

170

171

Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further,

172

suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how

173

large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$

174

as children! *)

175

176

let join p0 t0 p1 t1 =

177

let m = X.branching_bit p0 p1 in

178

let p = X.mask p0 (* for instance *) m in

179

if (p0 land m) = 0 then

180

Branch(p, m, t0, t1)

181

else

182

Branch(p, m, t1, t0)

183

184

(* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically,

185

[match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m].

186

187

Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their

188

irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p]

189

is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to

190

[X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where

191

[X.mask p0 m] has to be used instead of [p0]. *)

192

193

let match_prefix k p m =

194

X.mask k m = p

195

196

(* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to

197

the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from

198

[k] to [decide d0 d]. *)

199

200

type 'a decision = 'a -> 'a -> 'a

201

202

exception Unchanged

203

204

let basic_add decide k d m =

205

206

let rec add t =

207

match t with

208

| Empty ->

209

Leaf (k, d)

210

| Leaf (k0, d0) ->

211

if k = k0 then

212

let d' = decide d0 d in

213

if d' == d0 then

214

raise Unchanged

215

else

216

Leaf (k, d')

217

else

218

join k (Leaf (k, d)) k0 t

219

| Branch (p, m, t0, t1) ->

220

if match_prefix k p m then

221

if (k land m) = 0 then Branch (p, m, add t0, t1)

222

else Branch (p, m, t0, add t1)

223

else

224

join k (Leaf (k, d)) p t in

225

226

add m

227

228

let strict_add k d m =

229

basic_add (fun _ _ -> raise Unchanged) k d m

230

231

let fine_add decide k d m =

232

try

233

basic_add decide k d m

234

with Unchanged ->

235

236

237

(* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum

238

[d]. If a binding already exists for [k], it is overridden. *)

239

240

let add k d m =

241

fine_add (fun old_binding new_binding -> new_binding) k d m

242

243

(* [singleton k d] returns a map whose only binding is from [k] to [d]. *)

244

245

let singleton k d =

246

Leaf (k, d)

247

248

(* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map

249

that maps [k] to [d]. Otherwise, it returns [None]. *)

250

251

let is_singleton = function

252

| Leaf (k, d) ->

253

Some (k, d)

254

| Empty

255

| Branch _ ->

256

None

257

258

(* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *)

259

260

let is_empty = function

261

| Empty ->

262

true

263

| Leaf _

264

| Branch _ ->

265

false

266

267

(* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's

268

cardinal. *)

269

270

let rec cardinal = function

271

| Empty ->

272

273

| Leaf _ ->

274

275

| Branch (_, _, t0, t1) ->

276

cardinal t0 + cardinal t1

277

278

(* [remove k m] returns the map [m] deprived from any binding involving [k]. *)

279

280

let remove key m =

281

282

let rec remove = function

283

| Empty ->

284

raise Not_found

285

| Leaf (key', _) ->

286

if key = key' then

287

Empty

288

else

289

raise Not_found

290

| Branch (prefix, mask, tree0, tree1) ->

291

if (key land mask) = 0 then

292

match remove tree0 with

293

| Empty ->

294

tree1

295

| tree0 ->

296

Branch (prefix, mask, tree0, tree1)

297

else

298

match remove tree1 with

299

| Empty ->

300

tree0

301

| tree1 ->

302

Branch (prefix, mask, tree0, tree1) in

303

304

try

305

remove m

306

with Not_found ->

307

308

309

(* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found]

310

if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding

311

from [k] to [v]. *)

312

313

let rec lookup_and_remove key = function

314

| Empty ->

315

raise Not_found

316

| Leaf (key', data) ->

317

if key = key' then

318

data, Empty

319

else

320

raise Not_found

321

| Branch (prefix, mask, tree0, tree1) ->

322

if (key land mask) = 0 then

323

match lookup_and_remove key tree0 with

324

| data, Empty ->

325

data, tree1

326

| data, tree0 ->

327

data, Branch (prefix, mask, tree0, tree1)

328

else

329

match lookup_and_remove key tree1 with

330

| data, Empty ->

331

data, tree0

332

| data, tree1 ->

333

data, Branch (prefix, mask, tree0, tree1)

334

335

(* [fine_union decide m1 m2] returns the union of the maps [m1] and

336

[m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1]

337

(resp. [m2]), then [decide] is called. It is passed [x1] and

338

[x2], and must return the value which shall be bound to [k] in

339

the final map. The operation returns [m2] itself (as opposed to a

340

copy of it) when its result is equal to [m2]. *)

341

342

let reverse decision elem1 elem2 =

343

decision elem2 elem1

344

345

let fine_union decide m1 m2 =

346

347

let rec union s t =

348

match s, t with

349

350

| Empty, _ ->

351

352

| (Leaf _ | Branch _), Empty ->

353

354

355

| Leaf(key, value), _ ->

356

fine_add (reverse decide) key value t

357

| Branch _, Leaf(key, value) ->

358

fine_add decide key value s

359

360

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

361

if (p = q) & (m = n) then

362

363

(* The trees have the same prefix. Merge their sub-trees. *)

364

365

let u0 = union s0 t0

366

and u1 = union s1 t1 in

367

if t0 == u0 && t1 == u1 then t

368

else Branch(p, m, u0, u1)

369

370

else if (X.shorter m n) & (match_prefix q p m) then

371

372

(* [q] contains [p]. Merge [t] with a sub-tree of [s]. *)

373

374

if (q land m) = 0 then

375

Branch(p, m, union s0 t, s1)

376

else

377

Branch(p, m, s0, union s1 t)

378

379

else if (X.shorter n m) & (match_prefix p q n) then

380

381

(* [p] contains [q]. Merge [s] with a sub-tree of [t]. *)

382

383

if (p land n) = 0 then

384

let u0 = union s t0 in

385

if t0 == u0 then t

386

else Branch(q, n, u0, t1)

387

else

388

let u1 = union s t1 in

389

if t1 == u1 then t

390

else Branch(q, n, t0, u1)

391

392

else

393

394

(* The prefixes disagree. *)

395

396

join p s q t in

397

398

union m1 m2

399

400

(* [union m1 m2] returns the union of the maps [m1] and

401

[m2]. Bindings in [m2] take precedence over those in [m1]. The

402

operation returns [m2] itself (as opposed to a copy of it) when

403

its result is equal to [m2]. *)

404

405

let union m1 m2 =

406

fine_union (fun d d' -> d') m1 m2

407

408

(* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are

409

presented to [f] according to some unspecified, but fixed, order. *)

410

411

let rec iter f = function

412

| Empty ->

413

()

414

| Leaf (key, data) ->

415

f key data

416

| Branch (_, _, tree0, tree1) ->

417

iter f tree0;

418

iter f tree1

419

420

(* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map

421

[m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of

422

[accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The

423

value returned by [fold] is the final value of [accu]. *)

424

425

let rec fold f m accu =

426

match m with

427

| Empty ->

428

accu

429

| Leaf (key, data) ->

430

f key data accu

431

| Branch (_, _, tree0, tree1) ->

432

fold f tree1 (fold f tree0 accu)

433

434

(* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *)

435

436

let rec fold_rev f m accu =

437

match m with

438

| Empty ->

439

accu

440

| Leaf (key, data) ->

441

f key data accu

442

| Branch (_, _, tree0, tree1) ->

443

fold_rev f tree0 (fold_rev f tree1 accu)

444

445

(* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes

446

[f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f]

447

according to some unspecified, but fixed, order. *)

448

449

let rec iter2 f t1 t2 =

450

match t1, t2 with

451

| Empty, Empty ->

452

()

453

| Leaf (key1, data1), Leaf (key2, data2) ->

454

assert (key1 = key2);

455

f key1 (* for instance *) data1 data2

456

| Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) ->

457

assert (p1 = p2);

458

assert (m1 = m2);

459

iter2 f left1 left2;

460

iter2 f right1 right2

461

| _, _ ->

462

assert false

463

464

(* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map

465

$k\mapsto f(m(k))$. *)

466

467

let rec map f = function

468

| Empty ->

469

Empty

470

| Leaf (key, data) ->

471

Leaf(key, f data)

472

| Branch (p, m, tree0, tree1) ->

473

Branch (p, m, map f tree0, map f tree1)

474

475

(* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves

476

memory when [f] is the identity function. *)

477

478

let rec endo_map f tree =

479

match tree with

480

| Empty ->

481

tree

482

| Leaf (key, data) ->

483

let data' = f data in

484

if data == data' then

485

tree

486

else

487

Leaf(key, data')

488

| Branch (p, m, tree0, tree1) ->

489

let tree0' = endo_map f tree0 in

490

let tree1' = endo_map f tree1 in

491

if (tree0' == tree0) & (tree1' == tree1) then

492

tree

493

else

494

Branch (p, m, tree0', tree1')

495

496

(*i --------------------------------------------------------------------------------------------------------------- i*)

497

(*s \mysection{Patricia-tree-based sets} *)

498

499

(* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement

500

sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To

501

avoid it, we define each structure separately. *)

502

503

module Domain = struct

504

505

type element = int

506

507

type t =

508

| Empty

509

| Leaf of int

510

| Branch of int * X.mask * t * t

511

512

(* The empty set. *)

513

514

let empty =

515

Empty

516

517

(* [is_empty s] returns [true] if and only if the set [s] is empty. *)

518

519

let is_empty = function

520

| Empty ->

521

true

522

| Leaf _

523

| Branch _ ->

524

false

525

526

(* [singleton x] returns a set whose only element is [x]. *)

527

528

let singleton x =

529

Leaf x

530

531

(* [is_singleton s] returns [Some x] if [s] is a singleton

532

containing [x] as its only element; otherwise, it returns

533

[None]. *)

534

535

let is_singleton = function

536

| Leaf x ->

537

Some x

538

| Empty

539

| Branch _ ->

540

None

541

542

(* [choose s] returns an element of [s] if [s] is nonempty and

543

raises [Not_found] otherwise. *)

544

545

let rec choose = function

546

| Leaf x ->

547

548

| Empty ->

549

raise Not_found

550

| Branch (_, _, tree0, _) ->

551

choose tree0

552

553

(* [cardinal s] returns [s]'s cardinal. *)

554

555

let rec cardinal = function

556

| Empty ->

557

558

| Leaf _ ->

559

560

| Branch (_, _, t0, t1) ->

561

cardinal t0 + cardinal t1

562

563

(* [mem x s] returns [true] if and only if [x] appears in the set [s]. *)

564

565

let rec mem x = function

566

| Empty ->

567

false

568

| Leaf x' ->

569

x = x'

570

| Branch (_, mask, tree0, tree1) ->

571

mem x (if (x land mask) = 0 then tree0 else tree1)

572

573

(* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *)

574

575

let join p0 t0 p1 t1 =

576

let m = X.branching_bit p0 p1 in

577

let p = X.mask p0 (* for instance *) m in

578

if (p0 land m) = 0 then

579

Branch(p, m, t0, t1)

580

else

581

Branch(p, m, t1, t0)

582

583

(* [add x s] returns a set whose elements are all elements of [s], plus [x]. *)

584

585

exception Unchanged

586

587

let rec strict_add x t =

588

match t with

589

| Empty ->

590

Leaf x

591

| Leaf x0 ->

592

if x = x0 then

593

raise Unchanged

594

else

595

join x (Leaf x) x0 t

596

| Branch (p, m, t0, t1) ->

597

if match_prefix x p m then

598

if (x land m) = 0 then Branch (p, m, strict_add x t0, t1)

599

else Branch (p, m, t0, strict_add x t1)

600

else

601

join x (Leaf x) p t

602

603

let add x s =

604

try

605

strict_add x s

606

with Unchanged ->

607

608

609

(* [make2 x y] creates a set whose elements are [x] and [y]. [x] and [y] need not be distinct. *)

610

611

let make2 x y =

612

add x (Leaf y)

613

614

(* [fine_add] does not make much sense for sets of integers. Better warn the user. *)

615

616

type decision = int -> int -> int

617

618

let fine_add decision x s =

619

assert false

620

621

(* [remove x s] returns a set whose elements are all elements of [s], except [x]. *)

622

623

let remove x s =

624

625

let rec strict_remove = function

626

| Empty ->

627

raise Not_found

628

| Leaf x' ->

629

if x = x' then

630

Empty

631

else

632

raise Not_found

633

| Branch (prefix, mask, tree0, tree1) ->

634

if (x land mask) = 0 then

635

match strict_remove tree0 with

636

| Empty ->

637

tree1

638

| tree0 ->

639

Branch (prefix, mask, tree0, tree1)

640

else

641

match strict_remove tree1 with

642

| Empty ->

643

tree0

644

| tree1 ->

645

Branch (prefix, mask, tree0, tree1) in

646

647

try

648

strict_remove s

649

with Not_found ->

650

651

652

(* [union s1 s2] returns the union of the sets [s1] and [s2]. *)

653

654

let rec union s t =

655

match s, t with

656

657

| Empty, _ ->

658

659

| (Leaf _ | Branch _), Empty ->

660

661

662

| Leaf x, _ ->

663

add x t

664

| Branch _, Leaf x ->

665

add x s

666

667

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

668

if (p = q) & (m = n) then

669

670

(* The trees have the same prefix. Merge their sub-trees. *)

671

672

let u0 = union s0 t0

673

and u1 = union s1 t1 in

674

if t0 == u0 && t1 == u1 then t

675

else Branch(p, m, u0, u1)

676

677

else if (X.shorter m n) & (match_prefix q p m) then

678

679

(* [q] contains [p]. Merge [t] with a sub-tree of [s]. *)

680

681

if (q land m) = 0 then

682

Branch(p, m, union s0 t, s1)

683

else

684

Branch(p, m, s0, union s1 t)

685

686

else if (X.shorter n m) & (match_prefix p q n) then

687

688

(* [p] contains [q]. Merge [s] with a sub-tree of [t]. *)

689

690

if (p land n) = 0 then

691

let u0 = union s t0 in

692

if t0 == u0 then t

693

else Branch(q, n, u0, t1)

694

else

695

let u1 = union s t1 in

696

if t1 == u1 then t

697

else Branch(q, n, t0, u1)

698

699

else

700

701

(* The prefixes disagree. *)

702

703

join p s q t

704

705

(* [fine_union] does not make much sense for sets of integers. Better warn the user. *)

706

707

let fine_union decision s1 s2 =

708

assert false

709

710

(* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures

711

that the newly created node does not have an [Empty] child. *)

712

713

let build p m t0 t1 =

714

match t0, t1 with

715

| Empty, Empty ->

716

Empty

717

| Empty, _ ->

718

719

| _, Empty ->

720

721

| _, _ ->

722

Branch(p, m, t0, t1)

723

724

(* [diff s t] returns the set difference of [s] and [t], that is, $s\setminus t$. *)

725

726

let rec diff s t =

727

match s, t with

728

729

| Empty, _

730

| _, Empty ->

731

732

733

| Leaf x, _ ->

734

if mem x t then Empty else s

735

| _, Leaf x ->

736

remove x s

737

738

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

739

if (p = q) & (m = n) then

740

741

(* The trees have the same prefix. Compute the differences of their sub-trees. *)

742

743

build p m (diff s0 t0) (diff s1 t1)

744

745

else if (X.shorter m n) & (match_prefix q p m) then

746

747

(* [q] contains [p]. Subtract [t] off a sub-tree of [s]. *)

748

749

if (q land m) = 0 then

750

build p m (diff s0 t) s1

751

else

752

build p m s0 (diff s1 t)

753

754

else if (X.shorter n m) & (match_prefix p q n) then

755

756

(* [p] contains [q]. Subtract a sub-tree of [t] off [s]. *)

757

758

diff s (if (p land n) = 0 then t0 else t1)

759

760

else

761

762

(* The prefixes disagree. *)

763

764

765

766

(* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *)

767

768

let rec inter s t =

769

match s, t with

770

771

| Empty, _

772

| _, Empty ->

773

Empty

774

775

| Leaf x, _ ->

776

if mem x t then s else Empty

777

| _, Leaf x ->

778

if mem x s then t else Empty

779

780

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

781

if (p = q) & (m = n) then

782

783

(* The trees have the same prefix. Compute the intersections of their sub-trees. *)

784

785

build p m (inter s0 t0) (inter s1 t1)

786

787

else if (X.shorter m n) & (match_prefix q p m) then

788

789

(* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *)

790

791

inter (if (q land m) = 0 then s0 else s1) t

792

793

else if (X.shorter n m) & (match_prefix p q n) then

794

795

(* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *)

796

797

inter s (if (p land n) = 0 then t0 else t1)

798

799

else

800

801

(* The prefixes disagree. *)

802

803

Empty

804

805

(* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection

806

is empty. It is a specialized version of [inter], which uses less space. *)

807

808

exception NotDisjoint

809

810

let disjoint s t =

811

812

let rec inter s t =

813

match s, t with

814

815

| Empty, _

816

| _, Empty ->

817

()

818

819

| Leaf x, _ ->

820

if mem x t then

821

raise NotDisjoint

822

| _, Leaf x ->

823

if mem x s then

824

raise NotDisjoint

825

826

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

827

if (p = q) & (m = n) then begin

828

inter s0 t0;

829

inter s1 t1

830

end

831

else if (X.shorter m n) & (match_prefix q p m) then

832

inter (if (q land m) = 0 then s0 else s1) t

833

else if (X.shorter n m) & (match_prefix p q n) then

834

inter s (if (p land n) = 0 then t0 else t1)

835

else

836

() in

837

838

try

839

inter s t;

840

true

841

with NotDisjoint ->

842

false

843

844

(* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according

845

to some unspecified, but fixed, order. *)

846

847

let rec iter f = function

848

| Empty ->

849

()

850

| Leaf x ->

851

f x

852

| Branch (_, _, tree0, tree1) ->

853

iter f tree0;

854

iter f tree1

855

856

(* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f]

857

according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call,

858

its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final

859

value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then

860

[fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *)

861

862

let rec fold f s accu =

863

match s with

864

| Empty ->

865

accu

866

| Leaf x ->

867

f x accu

868

| Branch (_, _, s0, s1) ->

869

fold f s1 (fold f s0 accu)

870

871

(* [fold_rev] performs exactly the same job as [fold], but presents elements to [f] in the opposite order. *)

872

873

let rec fold_rev f s accu =

874

match s with

875

| Empty ->

876

accu

877

| Leaf x ->

878

f x accu

879

| Branch (_, _, s0, s1) ->

880

fold_rev f s0 (fold_rev f s1 accu)

881

882

(* [iter2] does not make much sense for sets of integers. Better warn the user. *)

883

884

let rec iter2 f t1 t2 =

885

assert false

886

887

(* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where

888

$x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time,

889

returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to

890

iterate over a set's elements, without being restricted by the call stack's discipline.

891

892

For more comments about this algorithm, please see module [Baltree], which defines a similar one. *)

893

894

let iterator s =

895

896

let remainder = ref [ s ] in

897

898

let rec next () =

899

match !remainder with

900

| [] ->

901

None

902

| Empty :: parent ->

903

remainder := parent;

904

next()

905

| (Leaf x) :: parent ->

906

remainder := parent;

907

Some x

908

| (Branch(_, _, s0, s1)) :: parent ->

909

remainder := s0 :: s1 :: parent;

910

next () in

911

912

913

914

(* [exists p s] returns [true] if and only if some element of [s] matches the predicate [p]. *)

915

916

exception Exists

917

918

let exists p s =

919

try

920

iter (fun x ->

921

if p x then

922

raise Exists

923

) s;

924

false

925

with Exists ->

926

true

927

928

(* [compare] is an ordering over sets. *)

929

930

exception Got of int

931

932

let compare s1 s2 =

933

let iterator2 = iterator s2 in

934

try

935

iter (fun x1 ->

936

match iterator2() with

937

| None ->

938

raise (Got 1)

939

| Some x2 ->

940

let c = x1 - x2 in

941

if c <> 0 then

942

raise (Got c)

943

) s1;

944

match iterator2() with

945

| None ->

946

947

| Some _ ->

948

-1

949

with Got c ->

950

951

952

(* [equal] implements equality over sets. *)

953

954

let equal s1 s2 =

955

compare s1 s2 = 0

956

957

(* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if

958

$s\subseteq t$. It is a specialized version of [diff]. *)

959

960

exception NotSubset

961

962

let subset s t =

963

964

let rec diff s t =

965

match s, t with

966

967

| Empty, _ ->

968

()

969

| _, Empty

970

971

| Branch _, Leaf _ ->

972

raise NotSubset

973

| Leaf x, _ ->

974

if not (mem x t) then

975

raise NotSubset

976

977

| Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->

978

979

if (p = q) & (m = n) then begin

980

981

diff s0 t0;

982

diff s1 t1

983

984

end

985

else if (X.shorter n m) & (match_prefix p q n) then

986

987

diff s (if (p land n) = 0 then t0 else t1)

988

989

else

990

991

(* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t],

992

or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *)

993

994

raise NotSubset in

995

996

try

997

diff s t;

998

true

999

with NotSubset ->

1000

false

1001

1002

(* [filter p s] returns the subset of [s] formed by all elements which satisfy the predicate [p]. *)

1003

1004

let filter predicate s =

1005

let modified = ref false in

1006

1007

let subset = fold (fun element subset ->

1008

if predicate element then

1009

add element subset

1010

else begin

1011

modified := true;

1012

subset

1013

end

1014

) s Empty in

1015

1016

if !modified then

1017

subset

1018

else

1019

1020

1021

(* [map f s] computes the image of [s] through [f]. *)

1022

1023

let map f s =

1024

fold (fun element accu ->

1025

add (f element) accu

1026

) s Empty

1027

1028

(* [monotone_map] and [endo_map] do not make much sense for sets of integers. Better warn the user. *)

1029

1030

let monotone_map f s =

1031

assert false

1032

1033

let endo_map f s =

1034

assert false

1035

1036

end

1037

1038

(*i --------------------------------------------------------------------------------------------------------------- i*)

1039

(*s \mysection{Relating sets and maps} *)

1040

1041

(* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *)

1042

1043

(* [domain m] returns [m]'s domain. *)

1044

1045

let rec domain = function

1046

| Empty ->

1047

Domain.Empty

1048

| Leaf (k, _) ->

1049

Domain.Leaf k

1050

| Branch (p, m, t0, t1) ->

1051

Domain.Branch (p, m, domain t0, domain t1)

1052

1053

(* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *)

1054

1055

let rec lift f = function

1056

| Domain.Empty ->

1057

Empty

1058

| Domain.Leaf k ->

1059

Leaf (k, f k)

1060

| Domain.Branch (p, m, t0, t1) ->

1061

Branch(p, m, lift f t0, lift f t1)

1062

1063

(* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures

1064

that the newly created node does not have an [Empty] child. *)

1065

1066

let build p m t0 t1 =

1067

match t0, t1 with

1068

| Empty, Empty ->

1069

Empty

1070

| Empty, _ ->

1071

1072

| _, Empty ->

1073

1074

| _, _ ->

1075

Branch(p, m, t0, t1)

1076

1077

(* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map

1078

$k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles

1079

[diff]'s. *)

1080

1081

let rec corestrict s t =

1082

match s, t with

1083

1084

| Empty, _

1085

| _, Domain.Empty ->

1086

1087

1088

| Leaf (k, _), _ ->

1089

if Domain.mem k t then Empty else s

1090

| _, Domain.Leaf k ->

1091

remove k s

1092

1093

| Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) ->

1094

if (p = q) & (m = n) then

1095

1096

build p m (corestrict s0 t0) (corestrict s1 t1)

1097

1098

else if (X.shorter m n) & (match_prefix q p m) then

1099

1100

if (q land m) = 0 then

1101

build p m (corestrict s0 t) s1

1102

else

1103

build p m s0 (corestrict s1 t)

1104

1105

else if (X.shorter n m) & (match_prefix p q n) then

1106

1107

corestrict s (if (p land n) = 0 then t0 else t1)

1108

1109

else

1110

1111

1112

1113

end

1114

1115

(*i --------------------------------------------------------------------------------------------------------------- i*)

1116

(*s \mysection{Instantiating the functor} *)

1117

1118

module Little = Make(Endianness.Little)

1119

1120

module Big = Make(Endianness.Big)

1121

Older »