2
; Copyright (C) 2013 Battelle Memorial Institute
5
; David Rager, ragerdl@cs.utexas.edu
7
; This program is free software; you can redistribute it and/or modify it under
8
; the terms of the GNU General Public License as published by the Free Software
9
; Foundation; either version 2 of the License, or (at your option) any later
10
; version. This program is distributed in the hope that it will be useful but
11
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13
; more details. You should have received a copy of the GNU General Public
14
; License along with this program; if not, write to the Free Software
15
; Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
17
; Original author: David Rager <ragerdl@cs.utexas.edu>
21
;(include-book "std/util/defaggregate" :dir :system)
22
(include-book "std/util/deflist" :dir :system)
23
;(include-book "tools/bstar" :dir :system)
24
(include-book "std/strings/top" :dir :system)
25
(include-book "std/io/base" :dir :system)
26
(include-book "std/util/define" :dir :system)
27
(include-book "misc/with-waterfall-parallelism" :dir :system)
29
(include-book "object-representations")
30
(include-book "strings")
31
(include-book "read-line")
32
;(include-book "read-file-to-char-list")
33
(include-book "read-file-characters-no-error")
35
;(include-book "unicode/read-file-characters" :dir :system)
37
;; The context free grammar representation is a hashtable keyed on non-terminal
38
;; symbols. For each symbol there is a list of productions/rules for that
39
;; symbol. The productions take the form of a ordered list of symbols.
41
;; The non-terminal symbols are represented as strings.
44
;; The lexicon is the representation of all terminal symbols (words in the
45
;; language). It is a hashtable keyed on word. For each word there is a list
46
;; of "terminal" structures that are different semantic mappings of the word.
49
;;;; Backus-Naur Form grammar reader functions
50
;;;;------------------------------------------
52
(with-waterfall-parallelism
53
(define read-next-bnf-lexeme1 ((file-input character-listp)
54
(keep-newline booleanp)
55
(whitespace character-listp)
57
:returns (mv (lexeme stringp :hyp :fguard)
58
(eof-p booleanp :hyp :fguard)
59
(new-file-input character-listp :hyp :fguard))
60
;:assert (stringp booleanp character-listp)
62
:parents (earley-parser)
66
(mv-let (char new-file-input)
67
(mv (car file-input) (cdr file-input))
68
(cond ;; If the char is an '=' and the rest of the lexeme is already
69
;; "::" -> Return lexeme
70
((and (equal char #\=) (equal lexeme "::"))
71
(mv (string-append lexeme (string char)) nil new-file-input))
72
;; If the char is an '>' and lexeme starts with an '<' ->
74
((and (equal char #\>)
75
(< 0 (length lexeme)) ; for guard of following car THISMAKESNOSENSE
76
(equal (car (str::firstn-chars 1 lexeme)) #\<))
77
(mv (string-append lexeme (string char)) nil new-file-input))
78
;; If the char is '|' it is in it self a complete lexeme
79
((and (equal char #\|) (equal (length lexeme) 0))
80
(mv (string-append lexeme (string char)) nil new-file-input))
81
;; Newlines are also (or may be) complete lexemes
82
((and (equal char #\Newline)
83
(equal (length lexeme) 0)
85
(mv (string-append lexeme (string char)) nil new-file-input))
86
;; If the char is whitespace, and lexeme is empty ->
87
;; Do nothing, just continue with the next char
88
((and (member-equal char whitespace)
89
(equal (length lexeme) 0))
90
(read-next-bnf-lexeme1 new-file-input keep-newline whitespace lexeme))
91
(t (read-next-bnf-lexeme1 new-file-input keep-newline whitespace
92
(string-append lexeme (string char)))))))))
94
(with-waterfall-parallelism
95
(defthm read-next-bnf-lexeme1-termination-weak
96
(<= (acl2-count (mv-nth 2 (read-next-bnf-lexeme1 file-input keep-newline
98
(acl2-count file-input))
99
:hints (("Goal" :in-theory (enable read-next-bnf-lexeme1)))
100
:rule-classes (:rewrite :linear)))
102
(with-waterfall-parallelism
103
(defthm read-next-bnf-lexeme1-termination-strong
104
(implies (not (mv-nth 1 (read-next-bnf-lexeme1 file-input keep-newline
106
(< (acl2-count (mv-nth 2 (read-next-bnf-lexeme1 file-input keep-newline
108
(acl2-count file-input)))
109
:hints (("Goal" :in-theory (enable read-next-bnf-lexeme1)))
110
:rule-classes (:rewrite :linear)))
113
;; (defthm file-measure-of-read-next-bnf-lexeme1-weak
115
;; (and (character-listp file-input)
116
;; (booleanp keep-newline)
117
;; (character-listp whitespace)
119
;; (<= (file-measure channel
120
;; (mv-nth 2 (read-next-bnf-lexeme1 channel keep-newline
121
;; whitespace lexeme state)))
122
;; (file-measure channel state)))
123
;; :hints (("Goal" :induct t :in-theory (e/d
124
;; (read-next-bnf-lexeme1)
125
;; (file-measure mv-nth))))
128
(define read-next-bnf-lexeme ((file-input character-listp)
129
(keep-newline booleanp)) ; keep-newline should be nil if not specified
130
; "Reads and returns the next Backus-Naur lexeme from file."
131
:returns (mv (lexeme stringp ; (if lexeme (stringp lexeme) t)
134
(eof-p booleanp :hyp :fguard
135
"Whether read-next-bnf-lexeme has reached the end of ~
137
(new-file-input character-listp :hyp :fguard
138
"Remaining portion of file input"))
139
:parents (earley-parser)
140
;:assert (stringp booleanp character-listp)
141
(let ((whitespace (list #\Space #\Tab))
142
;; (lexeme "") ; not used
145
(lexeme eof-p new-file-input)
146
(read-next-bnf-lexeme1 file-input
150
(cons #\Newline whitespace))
152
(mv (str-trim (list #\< #\>) lexeme)
156
(defthm read-next-bnf-lexeme-termination-weak
157
(<= (acl2-count (mv-nth 2 (read-next-bnf-lexeme file-input keep-newline)))
158
(acl2-count file-input))
159
:hints (("Goal" :in-theory (enable read-next-bnf-lexeme)))
160
:rule-classes (:rewrite :linear))
162
(with-waterfall-parallelism
163
(defthm read-next-bnf-lexeme-termination-strong
164
(implies (not (mv-nth 1 (read-next-bnf-lexeme file-input keep-newline)))
165
(< (acl2-count (mv-nth 2 (read-next-bnf-lexeme file-input keep-newline)))
166
(acl2-count file-input)))
167
:hints (("Goal" :in-theory (enable read-next-bnf-lexeme)))
168
:rule-classes (:rewrite :linear)))
171
(define read-next-bnf-production1 ((file-input character-listp)
172
(keep-newline booleanp)
173
(production string-list-p)
174
(lexeme-cache string-list-p))
175
:returns (mv (productions string-list-p :hyp :fguard)
176
(remaining-file-input character-listp :hyp :fguard)
177
(lexeme-cache string-list-p :hyp :fguard))
178
:parents (earley-parser)
179
(mv-let (lexeme eof-p new-file-input)
180
(read-next-bnf-lexeme file-input keep-newline) ; make sure I account for this in translation
181
(cond ((or (equal lexeme "") eof-p) ; check used to be for (null lexeme) - SOURCE OF RUNTIME BUG
182
(mv (reverse production) new-file-input lexeme-cache))
183
;; If we just read a "::=" and there already is one in the
184
;; production -> Push it and last lexeme onto cache instead of
186
((and (equal lexeme "::=")
187
(member-equal lexeme production))
188
(let* ((lexeme-cache (cons lexeme lexeme-cache))
189
(lexeme-cache (cons (car production) lexeme-cache))
190
(production (cdr production)))
191
(mv (rev production) new-file-input lexeme-cache)))
192
(t (let ((production (cons lexeme production)))
193
(read-next-bnf-production1 new-file-input keep-newline
194
production lexeme-cache))))))
196
(defthm read-next-bnf-production1-weak
197
(<= (acl2-count (mv-nth 1 (read-next-bnf-production1 file-input keep-newline
198
production lexeme-cache)))
199
(acl2-count file-input))
200
:hints (("Goal" :in-theory (enable read-next-bnf-production1)))
201
:rule-classes (:rewrite :linear))
204
(defthm read-next-bnf-production1-strong
205
(implies (and (character-listp file-input)
206
(booleanp keep-newline)
207
(string-list-p production)
209
(string-list-p lexeme-cache)
211
(len (mv-nth 0 (read-next-bnf-production1 file-input keep-newline
212
production lexeme-cache)))))
213
(< (acl2-count (mv-nth 1 (read-next-bnf-production1 file-input keep-newline
214
production lexeme-cache)))
215
(acl2-count file-input)))
216
:hints (("Goal" :in-theory (enable read-next-bnf-production1)))
217
:rule-classes (:rewrite :linear))
219
(define fix-production ((lexeme-cache string-list-p)
220
(production string-list-p))
221
:returns (productions string-list-p "New list of production strings" :hyp
223
:parents (earley-parser)
224
;:assert string-list-p
225
(append (rev lexeme-cache) production)
229
(defthm fix-production-preserves-string-list-p
230
(implies (and (string-listp x)
232
(string-listp (fix-production x y))))
233
(defthm fix-production-length
234
(implies (string-listp x)
235
(equal (len (fix-production x nil))
237
(defthm fix-production-of-nil
238
(equal (fix-production x nil)
241
; To figure out how important the lexeme-cache is, I could call
242
; my-f-put-global/my-f-get-global and trace those. Would need to run on more
243
; complicated examples. Having a functional model that doesn't involve state
244
; would be a nice improvement.
246
; I did the above and found that it was used a small amount, even in our small
249
(defmacro string-list-p-alias (x)
252
(define read-next-bnf-production ((file-input character-listp)
253
(keep-newline booleanp)
255
(lexeme-cache string-list-p)
257
"Reads and returns the next Backus-Naur production from file."
258
; :guard (and (boundp-global 'lexeme-cache state)
259
; (string-list-p (get-global 'lexeme-cache state)))
260
:returns (mv (productions string-list-p :hyp :fguard)
261
(remaining-file-input character-listp :hyp :fguard)
262
; (state state-p :hyp :fguard)
263
(lexeme-cache string-list-p :hyp :fguard))
264
:parents (earley-parser)
265
;:assert (string-list-p character-listp string-list-p-alias)
266
; Seems to be working when compared with CL trace output.
267
(let (;(lexeme-cache (f-get-global 'lexeme-cache state)) ; silly
268
(production nil)) ; silly
269
;; If there is anything in the cache, pop it off and add it to production
270
;; (read-next-bnf-production-cache lexeme-cache lexeme-cache production)
271
(let* ((production (fix-production lexeme-cache production)) ; silly but it's part of the original code
272
;(state (f-put-global 'lexeme-cache nil state))
273
(lexeme-cache nil) ; just integrated its value above
275
(read-next-bnf-production1 file-input keep-newline production lexeme-cache))))
277
(defthm read-next-bnf-production-weak
278
(<= (acl2-count (mv-nth 1 (read-next-bnf-production file-input keep-newline
280
(acl2-count file-input))
281
:hints (("Goal" :in-theory (enable read-next-bnf-production)))
282
:rule-classes (:rewrite :linear))
284
(defthm read-next-bnf-production-strong-works
285
(implies (and (character-listp file-input)
286
(booleanp keep-newline)
287
(string-list-p production)
288
(string-list-p lexeme-cache)
289
(< (len (rev lexeme-cache)) ; production
290
(len (mv-nth 0 (read-next-bnf-production1
291
file-input keep-newline
294
(< (acl2-count (mv-nth 1 (read-next-bnf-production file-input keep-newline
296
(acl2-count file-input)))
297
:hints (("Goal" :do-not-induct t
298
:in-theory (e/d (read-next-bnf-production)
299
(read-next-bnf-production1-strong)))
300
("Goal'" :use ((:instance read-next-bnf-production1-strong
301
(production (rev lexeme-cache))
302
(lexeme-cache nil)))))
303
:rule-classes (:rewrite :linear))
308
(defthm read-next-bnf-production-strong
309
(implies (and (character-listp file-input)
310
(booleanp keep-newline)
311
(string-list-p production)
312
(string-list-p lexeme-cache)
313
(mv-nth 0 (read-next-bnf-production file-input keep-newline lexeme-cache)))
314
(< (acl2-count (mv-nth 1 (read-next-bnf-production file-input keep-newline
316
(acl2-count file-input)))
317
:hints (("Goal" :do-not-induct t
318
:in-theory (e/d (read-next-bnf-production)
319
(read-next-bnf-production1-strong
320
read-next-bnf-production1)))
321
("Goal'" :use ((:instance read-next-bnf-production1-strong
322
(production (rev lexeme-cache))
323
(lexeme-cache nil)))))
325
:rule-classes (:rewrite :linear))
328
(define inject-expansions! ((target-list string-list-p)
329
(expansion string-list-p)
331
(production (and (string-list-p production)
332
(consp production))))
333
:returns (rules rule-list-p :hyp :fguard)
334
:parents (earley-parser)
337
;; No more expansions -> Inject last one
338
((atom target-list) ; was null in cl impl
339
(let ((curr-val (cdr (hons-get (car production) rules))))
341
(hons-acons (car production)
342
(cons expansion curr-val)
345
;; More expansion -> Inject current and continue
346
((equal (first target-list) "|")
347
(let* ((curr-val (cdr (hons-get (car production) rules)))
348
(rules (hons-shrink-alist
349
(hons-acons (car production)
350
(cons expansion curr-val)
354
(inject-expansions! (cdr target-list) nil #|expansion|# rules production)))
355
;; Expansion not ended -> Collect rest of expansion
357
(inject-expansions! (cdr target-list)
358
(append expansion (list (first target-list)))
362
((skip-proofs ; unsound but doing for now. Commented hyps will be needed eventually
363
(defthm read-next-bnf-production-strong
364
(implies (and ;; (character-listp file-input)
365
;; (booleanp keep-newline)
366
;; (string-list-p production)
367
;; (string-list-p lexeme-cache)
368
(mv-nth 0 (read-next-bnf-production file-input keep-newline lexeme-cache)))
369
(< (acl2-count (mv-nth 1 (read-next-bnf-production file-input keep-newline
371
(acl2-count file-input)))
372
:hints (("Goal" :do-not-induct t
373
:in-theory (e/d (read-next-bnf-production)
374
(read-next-bnf-production1-strong)))
375
("Goal'" :use ((:instance read-next-bnf-production1-strong
376
(production (rev lexeme-cache))
377
(lexeme-cache nil)))))
378
:rule-classes (:rewrite :linear))))
382
(define load-bnf-grammar2 ((file-input character-listp)
384
(lexeme-cache string-list-p))
385
:returns (mv (rules rule-list-p :hyp :fguard)
386
(lexeme-cache string-list-p :hyp :fguard))
387
;:assert (rule-list-p string-list-p)
388
:parents (earley-parser)
389
(mv-let (production new-file-input lexeme-cache)
391
; could use a flag other than production to indicate termination
393
(read-next-bnf-production file-input nil lexeme-cache)
394
(cond ((atom production) ; used to be null but should be fine
395
(mv rules lexeme-cache))
396
((atom (cdr production))
397
(prog2$ (er hard? 'load-bnf-grammar2
399
; Ee could try changing the above "hard?" to "hard", and after proving a lemma
400
; about read-next-bnf-production, prove that this branch is unreachable.
402
"Programming error: production should have been either ~
403
nil or a list of at least length two, but was ~x0"
405
(mv rules lexeme-cache)))
406
(t (let ((rules (inject-expansions! (cddr production) nil rules production)))
407
(load-bnf-grammar2 new-file-input rules lexeme-cache))))))
409
(define load-bnf-grammar1 ((file-input character-listp))
410
:returns (grammar grammar-p :hyp :fguard)
412
:parents (earley-parser)
413
(mv-let (rules lexeme-cache)
415
; For each production in the BNF file, create a list of possible ordered symbol
416
; sequences (list of strings) that is a legal expression for the symbol.
418
(load-bnf-grammar2 file-input nil nil)
419
(declare (ignore lexeme-cache))
420
(let ((rules (hons-shrink-alist rules nil)))
421
(make-grammar :rules rules))))
423
(define load-bnf-grammar ((filename stringp)
425
"Reads a grammar on Backus-Naur form into the representation of the context
427
:returns (mv (grammar grammar-p :hyp :fguard)
428
(state state-p :hyp :fguard))
429
:parents (earley-parser)
430
(mv-let (file-input state)
431
(read-file-characters-no-error filename state)
432
(let ((grammar (load-bnf-grammar1 file-input)))
433
(mv grammar state))))
435
;;;; Lexicon functions
436
;;;;------------------
437
;;;; Reads a dictionary on the form:
439
;;;; <word> :class <class> :gender <gender>
441
;;;; into a hashtable of lists of word instances and
442
;;;; a list of word classes (part of speech).
444
;; (defn keyword-string-pair-p (x)
446
;; (keywordp (car x))
447
;; (stringp (cdr x))))
449
;; (std::deflist keyword-string-pair-list-p (x)
450
;; (keyword-string-pair-p x)
451
;; :elementp-of-nil nil
454
(std::defalist keyword-string-pair-list-p (x)
458
:parents (earley-parser))
462
(implies (and (force (character-listp chars))
463
(force (stringp str))
464
(not (member-symbol-name str
465
(pkg-imports "KEYWORD"))))
466
(equal (symbol-package-name
467
(intern-in-package-of-symbol str
468
:a-random-symbol-for-intern))
473
(implies (and (force (character-listp chars))
474
(force (stringp str))
475
(not (member-symbol-name str
477
(equal (symbol-package-name
478
(intern-in-package-of-symbol str
479
:a-random-symbol-for-intern))
483
(include-book "std/misc/intern-in-package-of-symbol" :dir :system))
485
(defthm run-pkg-imports-on-keyword
486
(equal (pkg-imports "KEYWORD")
490
(defun double-cdr (x)
493
(double-cdr (cddr x)))))
495
(defn even-string-list-p (x)
498
(t (and (stringp (car x))
501
(even-string-list-p (cddr x))))))
503
(defthm even-string-list-p-implies-string-list-p
504
(implies (even-string-list-p x)
506
:rule-classes :forward-chaining)
510
(define read-lexicon-line-options ((strings even-string-list-p))
511
:measure (len strings)
512
:returns (ans keyword-string-pair-list-p :hyp :fguard)
513
:parents (earley-parser)
514
(cond ((atom strings)
516
((member-symbol-name (str-trim '(#\:)
517
(str::upcase-string (car strings)))
518
(pkg-imports "KEYWORD"))
519
(er hard? 'read-lexicon-line-options
520
"The lexicon string cannot be converted to a keyword symbol ~
521
because it already exists in the keyword package (this would ~
522
not be a problem if this symbol had not been imported from ~
523
another package). Offending string is: ~x0"
525
(str::upcase-string (car strings)))))
526
(t (cons (cons (intern (str-trim '(#\:)
527
(str::upcase-string (car strings)))
530
(read-lexicon-line-options (cddr strings))))))
535
(defthm read-lexicon-line-subgoal-4-a
536
(implies (and (alistp lst)
538
(consp (assoc-equal x lst)))))
541
(defthm read-lexicon-line-subgoal-4-b
542
(implies (even-string-list-p (cdr (str::strtok (mv-nth 0 (read-line! file-input))
545
(alistp (READ-LEXICON-LINE-OPTIONS
546
(CDR (STR::STRTOK (MV-NTH 0 (READ-LINE! FILE-INPUT))
550
;; (defthm read-lexicon-line-subgoal-4
553
;; (character-listp file-input)
554
;; (mv-nth 0 (read-line! file-input))
555
;; (not (equal (mv-nth 0 (read-line! file-input))
557
;; (consp (str::strtok (mv-nth 0 (read-line! file-input))
559
;; (stringp (car (str::strtok (mv-nth 0 (read-line! file-input))
561
;; (even-string-list-p (cdr (str::strtok (mv-nth 0 (read-line! file-input))
566
;; :class (read-lexicon-line-options
567
;; (cdr (str::strtok (mv-nth 0 (read-line! file-input))
570
;; :class (read-lexicon-line-options
571
;; (cdr (str::strtok (mv-nth 0 (read-line! file-input))
572
;; '(#\space)))))))))
575
;; (defthm member-assoc-equivalence-1-A
576
;; (implies (alistp lst)
577
;; (iff (member key (strip-cars lst))
582
;; (defthm member-assoc-equivalence-1-B
583
;; (implies (alistp lst)
584
;; (iff (assoc key lst)
585
;; (member key (strip-cars lst))))))
589
;; (defthm member-assoc-equivalence-2
590
;; (implies (and (alistp lst) (cdr (assoc key lst)))
591
;; (member key (strip-cars lst)))))
594
(defthm read-lexicon-line-subgoal-3-lemma
595
(implies (and (alistp lst)
596
(keyword-string-pair-list-p lst)
597
(assoc-equal key lst))
598
(stringp (cdr (assoc-equal key lst))))))
601
;; (defthm read-lexicon-line-subgoal3
604
;; (character-listp file-input)
605
;; (mv-nth 0 (read-line! file-input))
606
;; (not (equal (mv-nth 0 (read-line! file-input))
608
;; (consp (str::strtok (mv-nth 0 (read-line! file-input))
610
;; (stringp (car (str::strtok (mv-nth 0 (read-line! file-input))
612
;; (even-string-list-p (cdr (str::strtok (mv-nth 0 (read-line! file-input))
615
;; :class (read-lexicon-line-options
616
;; (cdr (str::strtok (mv-nth 0 (read-line! file-input))
620
;; :class (read-lexicon-line-options
621
;; (cdr (str::strtok (mv-nth 0 (read-line! file-input))
622
;; '(#\space))))))))))
624
(define read-lexicon-line ((file-input character-listp))
625
"Read a line from the given file, and return the corresponding terminal."
626
;; Parse a line from the file
627
:returns (mv (terminal (implies terminal (terminal-p terminal)))
628
(remaining-file-input character-listp :hyp :fguard))
629
:parents (earley-parser)
630
(mv-let (line new-file-input)
631
(read-line! file-input)
632
(cond ((or (null line) (equal line ""))
633
(mv nil new-file-input))
635
(b* ((words ;(remove-equal "" ; remove call is unnecessary
636
(str::strtok line '(#\Space))) ;)
637
((unless (and (consp words) (stringp (car words))))
638
(mv (er hard? 'read-lexicon-line
639
"Read-lexicon-line does not accept empty lines")
641
(string (first words))
642
( ;(when (and (consp (cdr words)) (atom (cddr words))))
643
(unless (even-string-list-p (cdr words)))
644
(mv (er hard? 'read-lexicon-line
645
"Read-lexicon-line given lexicon input line that ~
646
doesn't have _pairs_ (not in the lisp sense) of ~
647
options and values. Input line is:~% ~x0"
650
(options (read-lexicon-line-options (cdr words))))
651
;; Create the word object
652
(mv (make-terminal :word string ; the regex would be read in here
653
:class (str-trim '(#\< #\>)
654
(or (cdr (assoc :class
657
:gender (cdr (assoc :gender options)))
663
;; (defthm dumb-lemma
664
;; (implies (and (character-listp x) x)
665
;; (< 0 (length x)))))
668
(defthm read-lexicon-line-weak
669
(<= (acl2-count (mv-nth 1 (read-lexicon-line file-input)))
670
(acl2-count file-input))
671
:rule-classes (:rewrite :linear))
678
; Provides us with the following, necessary for read-lexicon-line-strong
680
;;; (implies (and (stringp x)
681
;;; n (not (equal x "")))
682
;;; (< 0 (length x)))))
684
"str/arithmetic" :dir :system))
686
(defthm read-lexicon-line-strong
688
(mv-nth 0 (read-lexicon-line file-input))
689
(force (character-listp
690
; Could possibly be removed if we did more sophisticated reasoning for
693
(< (acl2-count (mv-nth 1 (read-lexicon-line file-input)))
694
(acl2-count file-input)))
695
:hints (("Goal" :in-theory (enable read-lexicon-line))
696
("Goal'''" :in-theory (e/d (read-lexicon-line)
698
:use ((:instance read-line!-strong))))
699
:rule-classes (:rewrite :linear))))
702
;; (defthm load-lexicon1-lemma
703
;; (implies (and (stringp word)
704
;; (lexicon-p lexicon)
705
;; (terminal-p terminal))
706
;; (lexicon-p (hons-acons word
708
;; (cdr (hons-get word lexicon-p)))
710
;; :hints (("Goal" :in-theory (enable lexicon-p terminal-p)))))
713
;; (defthm load-lexicon1-lemmaaaa
714
;; (implies (and (dictionary-p dictionary)
716
;; (terminal-list-p (hons-assoc-equal word
719
(defthm load-lexicon1-lemma
721
(and (character-listp file-input)
722
(string-list-p part-of-speech-list)
723
(dictionary-p dictionary)
724
(mv-nth 0 (read-lexicon-line file-input)))
725
(terminal-p (mv-nth 0 (read-lexicon-line file-input)))))
727
(define load-lexicon1 ((dictionary dictionary-p)
728
(part-of-speech-list string-list-p)
729
(file-input character-listp))
730
:returns (mv (dictionary dictionary-p :hyp :fguard)
731
(part-of-speech-list string-list-p :hyp :fguard)
732
(remaining-file-input character-listp :hyp :fguard))
733
:parents (earley-parser)
734
(if (mbe :logic (not (character-listp file-input))
736
(mv dictionary part-of-speech-list file-input)
737
(mv-let (terminal new-file-input)
738
(read-lexicon-line file-input)
739
(cond ((null terminal) ; source of a bug, what if the lex file contains a blank line?,
740
(mv dictionary part-of-speech-list new-file-input))
742
(b* ((word (terminal->word terminal))
743
(curr-terminal-list (cdr (hons-get word dictionary)))) ; source of bug?, "working" version doesn't call cdr, but it's "correctly" needed for the proof to go through
744
(load-lexicon1 (hons-acons word
745
(hons terminal curr-terminal-list)
747
(cons (terminal->class terminal)
749
new-file-input)))))))
751
(define load-lexicon ((filename stringp)
753
:returns (mv (lexicon lexicon-p :hyp :fguard)
754
(state state-p :hyp :fguard))
755
:parents (earley-parser)
756
(let ((dictionary nil) ; use a hashtable
757
(part-of-speech-list nil))
758
(mv-let (file-input state)
759
(read-file-characters-no-error filename state)
760
(mv-let (dictionary part-of-speech-list new-file-input)
761
(load-lexicon1 dictionary part-of-speech-list file-input)
762
(declare (ignore new-file-input))
763
(mv (make-lexicon :dictionary dictionary
764
:part-of-speech part-of-speech-list)