5
(define (buf-read-line)
6
(let ((line (read-line (input-port) #t)))
9
(let ((chars (circular-list line)))
12
((eq? (string-size line) 0)
13
(lineno (+ (lineno) 1))
15
((string-incomplete? line)
16
(lineno (+ (lineno) 1))
18
((eq? (string-byte-ref line 0) 35) ;; '#' = 35 (ASCII)
20
((#/^\#\s+(\d+)\s+"(\S+)"/ line) (_ num file)
21
(lineno (- (string->number num) 1))
24
(test (#/^\#\s*define\s(([^ \x28]+)(?:\(([^\x29]+)\))?)/ line)
26
(lineno (+ (lineno) 1))
27
(enqueue! (macro-queue)
28
(list* (match 1) ;; foo(x)
29
(cons (filename) (lineno)) ;; ("foo.h" . 123)
31
(and-let* ((args (match 3)))
33
(string-split args #[\s,]))))) ;; (x)
36
(lineno (+ (lineno) 1))
39
(lineno (+ (lineno) 1))
40
(let ((chars (string->list (string-append line "\n"))))
44
(define (buf-read-char)
45
(let ((rest (rest-chars)))
47
(set! rest (buf-read-line)))
48
(rest-chars (cdr rest))
51
(define (buf-peek-char)
52
(let ((rest (rest-chars)))
54
(set! rest (buf-read-line)))
57
(define (buf-next-char)
58
(rest-chars (cdr (rest-chars))))
90
(__volatile__ VOLATILE)
99
(__restrict__ RESTRICT)
100
(__extension__ EXTENSION)
101
(__attribute__ ATTRIBUTE) ; ignore
102
(@interface AT_INTERFACE)
103
(@implementation AT_IMPLEMENTATION)
105
(@selector AT_SELECTOR)
109
(@private AT_PRIVATE)
110
(@protected AT_PROTECTED)
111
(@protocol AT_PROTOCOL)
113
(@compatibility_alias AT_ALIAS)
117
(@finally AT_FINALLY)
118
(@synchronized AT_SYNCHRONIZED)
161
(define %keyword-tbl (let ((tbl (make-hash-table)))
162
(for-each (lambda (lst)
163
(hash-table-put! tbl (car lst) lst))
167
(define %operator-tbl (let ((tbl (make-hash-table)))
168
(for-each (lambda (lst)
169
(hash-table-put! tbl (car lst) lst))
173
(define (get-keyword-val symbol) (hash-table-get %keyword-tbl symbol #f))
175
(define (get-operator-val symbol) (hash-table-get %operator-tbl symbol #f))
180
;; gcc of GCC allows dollar sign
181
;;(define initial-identifier-charset #[A-Za-z_])
182
;;(define identifier-charset #[A-Za-z_0-9])
183
(define initial-identifier-charset #[A-Za-z_$@])
184
(define identifier-charset #[A-Za-z_$0-9])
186
(define operator-charset #[*~!+\-/\^&%=?<>])
187
(define special-operator-charset #[|.]) ; cant be in scheme symbol
189
(define scheme-token-charset
201
(define (char-digit? c) (char-set-contains? #[0-9] c))
202
(define (char-nonzero-digit? c) (char-set-contains? #[1-9] c))
203
(define (char-hex-digit? c) (char-set-contains? #[0-9A-Fa-f] c))
204
(define (char-octal-digit? c) (char-set-contains? #[0-7] c))
207
(let ((space-char-list '(#\space #\tab #\x0b #\page)))
209
(let loop ((c (buf-peek-char)))
210
(when (memq c space-char-list)
212
(loop (buf-peek-char)))))))
215
(let loop ((c (buf-peek-char)))
216
(if (and (not (eof-object? c))
217
(not (char=? c #\newline)))
220
(loop (buf-peek-char))))))
222
(define (l->symbol l)
223
(string->symbol (list->string (reverse l))))
225
(define (read-identifier l)
226
(let ((c (buf-peek-char)))
227
(if (char-set-contains? identifier-charset c)
230
(read-identifier (cons c l)))
231
(let ((s (l->symbol l)))
233
((get-keyword-val s) => (lambda (k)
234
;; skip __attribute__ keyword
235
(if (eq? (cadr k) 'ATTRIBUTE)
238
((memq s '(int long double char short float))
240
((and (pair? (last-token)) (eq? (car (last-token)) 'TYPENAME))
241
(cons 'IDENTIFIER s))
242
((or (eq? (last-token) 'STRUCT) (eq? (last-token) 'UNION))
243
(cons 'IDENTIFIER s))
244
((eq? (last-token) 'AT_INTERFACE)
246
(cons 'IDENTIFIER s))
250
(cons 'IDENTIFIER s)))))))
252
(define (read-operator c)
254
(get-operator-val (l->symbol l)))
255
(let lp ((cand (l->opval (list c)))
257
(let* ((c (buf-peek-char))
258
(k (l->opval (cons c l))))
269
(define (hexchar->integer c)
270
(cond ((char=? c #\0) 0)
280
((char-ci=? c #\a) 10)
281
((char-ci=? c #\b) 11)
282
((char-ci=? c #\c) 12)
283
((char-ci=? c #\d) 13)
284
((char-ci=? c #\e) 14)
285
((char-ci=? c #\f) 15)
287
(error "can not be, there must be a bug in lexer"))))
289
(define (backslash c)
290
(define (readn c n v)
291
(if (and (> n 0) (char-octal-digit? c))
294
(readn (buf-peek-char)
296
(+ (* v 8) (hexchar->integer c))))
301
(if (char-hex-digit? c)
305
(+ (* v 16) (hexchar->integer c))))
308
(define (range-check v)
309
;; CARM: Sec 2.9, p41
314
(and (<= #xd800 v) (<= v #xdfff))))))
319
(if (char-hex-digit? c)
324
(+ (* v 16) (hexchar->integer c))))
327
((not (or (= n 4) (= n 8)))
328
(error "Universal character name must be 4 or 8 hex-digit"))
329
((not (range-check v))
330
(errorf "\\u~4,'0x is not a valid universal character" v))
335
(let ((c (buf-read-char)))
337
((char=? c #\n) #\newline) ; NL 0A ^J
338
((char=? c #\x) ; not char-ci=?, cf. CARM p. 35
339
(if (char-hex-digit? (buf-peek-char))
340
(readx (buf-peek-char))
341
(error "\\x must be followed by hex-digit" (buf-peek-char))))
342
((char=? c #\t) #\tab) ; HT 09 ^I
344
(if (char-hex-digit? (buf-peek-char))
345
(readu (buf-peek-char))
346
(error "\\u must be followed by hex-digit" (buf-peek-char))))
347
((char-octal-digit? c)
348
(readn (buf-peek-char) 3 (hexchar->integer c)))
349
((char-set-contains? #[?\'\"\\] c) c) ; first two backslash
350
; are placed to fool emacs
352
;; TODO: pascal string
354
((char=? c #\a) #\x07) ; BEL 07 ^G
355
((char=? c #\b) #\x08) ; BS 08 ^H
356
((char=? c #\v) #\x0b) ; VT 0B ^K
357
((char=? c #\f) #\page) ; NP 0C ^L
358
((char=? c #\r) #\return) ; CR 0D ^M
359
((char-set-contains? #[a-z] c)
360
(warning "Unknown lower case escape character is used: ~a" c)
362
((char-set-contains? #[A-Z] c)
363
(warning "No upper case espace character is defined: ~a" c)
369
(define (read-string-literal)
370
(let lp ((c (buf-read-char))
373
(error "missing double quote")
375
(apply string (reverse s))
376
(let ((cc (backslash c)))
377
(lp (buf-read-char) (cons cc s)))))))
379
(define (read-character-constant)
380
(let lp ((c (buf-read-char))
383
(error "missing quote")
386
(let ((cc (backslash c)))
387
;; Meaning of Multicharacter constant is implementation
388
;; dependent. Here we implement a convention with left-to-right
389
;; packing, which is described in CARM pp. 31--32.
392
(char->integer cc))))))))
394
;;; read-hexadecimal, octal.
396
(define (read-octal-or-flonum l)
398
(for-each (lambda (c)
399
(if (not (char-set-contains? #[0-7] c))
400
(error "invalid char in octal" c)))
402
(strtoll (apply string (reverse l))))
404
(let ((c (buf-peek-char)))
406
((char-set-contains? #[0-9] c)
408
(read-octal-or-flonum (cons c l)))
411
(read-flonum (cons c l) #[0-9] 10 #[Ee]))
412
((char-set-contains? #[Ee] c)
414
(read-expnum (cons c l) 10))
415
((char-set-contains? #[ULul] c)
417
(list (integer-suffix c)
420
(list 'int (rl->n l))))))
423
;;; read decimal integer or floating point
425
(define (read-number-constant l ics radix ecs)
428
(strtoll (apply string (reverse l))))
430
(let ((c (buf-peek-char)))
432
((char-set-contains? ics c)
434
(read-number-constant (cons c l) ics radix ecs))
437
(read-flonum (cons c l) ics radix ecs))
438
((char-set-contains? ecs c)
440
(read-expnum (cons c l) radix))
441
((char-set-contains? #[ULul] c)
443
(list (integer-suffix c)
446
(list 'int (rl->n l))))))
448
(define (read-decimal l)
449
(read-number-constant l #[0-9] 10 #[Ee]))
450
(define (read-hexadecimal l)
451
(read-number-constant l #[0-9A-Fa-f] 16 #[Pp]))
453
(define (integer-suffix c)
457
(let ((c (buf-peek-char)))
458
(cond ((char=? c #\l)
460
(let ((c (buf-peek-char)))
468
(let ((c (buf-peek-char)))
478
(let ((c (buf-peek-char)))
479
(cond ((char=? c #\l)
482
(let ((c (buf-peek-char)))
483
(if (char-ci=? c #\u)
484
(begin (buf-next-char)
493
(let ((c (buf-peek-char)))
494
(cond ((char=? c #\L)
497
(let ((c (buf-peek-char)))
498
(if (char-ci=? c #\u)
499
(begin (buf-next-char)
507
(error "there is a bug in lexer"))))
509
(define (read-flonum l ics radix ecs)
511
(strtod (apply string (reverse l))))
513
(define (error-if-hex)
515
(error "hexadecimal floating constants require an exponent")))
517
(let ((c (buf-peek-char)))
519
((char-set-contains? ics c)
521
(read-flonum (cons c l) ics radix ecs))
522
((char-set-contains? ecs c)
524
(read-expnum (cons c l) radix))
528
(list 'float (rl->n l)))
532
(list 'long-double (rl->n l)))
535
(list 'double (rl->n l))))))
540
(define (read-expnum l radix)
542
(define (rl->n l) (strtod (apply string (reverse l))))
548
(exp1 (buf-peek-char) (cons c l)))
551
(list 'float (rl->n l)))
554
(list 'long-double (rl->n l)))
556
(list 'double (rl->n l)))))
558
(if (char-set-contains? #[0-9\-\+] (buf-peek-char))
559
(let ((c (buf-read-char)))
560
(exp1 (buf-peek-char) (cons c l)))
561
(error "malformed floating point expression")))
566
(define (follow expect ifyes ifno)
567
(let ((c (buf-peek-char)))
568
(if (char=? c expect)
569
(begin (buf-next-char)
577
(define sc-ignore skip-line)
578
(define sc-do-sharp-space skip-line)
580
(define sharp-commands
584
(define (do-sharp-command)
585
(if (char=? (buf-peek-char) #\space)
589
(let lp ((c (buf-peek-char))
591
(cond ((char-alphabetic? c)
593
(lp (buf-peek-char) (cons c l)))
595
(let ((cmd (assq (l->symbol l) sharp-commands)))
601
;;; Changes from c-lex.scm:
603
;;; STRING -> STRING_LITERAL
605
;;; (ASSIGN . OR) -> OR_ASSIGN
608
(let ((token (%c-scan)))
614
(let loop ((c (buf-read-char)))
616
((eof-object? c) '*eoi*)
617
((char=? c #\newline)
619
(loop (buf-read-char)))
622
((char-ci=? (buf-peek-char) #\x)
623
(begin (buf-next-char)
624
(cons 'CONSTANT (read-hexadecimal '(#\x #\0)))))
626
(cons 'CONSTANT (read-octal-or-flonum (list c))))))
629
(cons 'CONSTANT (read-decimal (list c))))
631
((and (char=? c #\.) (char-numeric? (buf-peek-char)))
632
(cons 'CONSTANT (read-flonum (list c) #[0-9] 10 #[Ee])))
635
(if (char=? (buf-peek-char) #\.)
638
(if (char=? (buf-peek-char) #\.)
645
((and (char=? c #\L) (char=? (buf-peek-char) #\"))
647
(cons 'STRING (read-string-literal)))
648
((and (char=? c #\L) (char=? (buf-peek-char) #\'))
650
(cons 'CONSTANT (list 'wchar (read-character-constant))))
651
((and (char=? c #\@) (char=? (buf-peek-char) #\"))
653
(cons 'OBJC_STRING (read-string-literal)))
655
(cons 'STRING (read-string-literal)))
657
(cons 'CONSTANT (list 'int (read-character-constant))))
659
((char-set-contains? initial-identifier-charset c)
660
(read-identifier (list c)))
662
((char-set-contains? operator-charset c)
665
((char=? c #\,) 'COMMA)
666
((char=? c #\:) 'COLON)
667
((char=? c #\;) 'SEMICOLON)
668
((char=? c #\() 'LPAREN)
669
((char=? c #\)) 'RPAREN)
670
((char=? c #\{) 'LCBRA)
671
((char=? c #\}) 'RCBRA)
672
((char=? c #\[) 'LSBRA)
673
((char=? c #\]) 'RSBRA)
675
;; special case for c-operator due to Scheme
676
((char=? c #\|) (or (follow #\| 'OR_OP #f) ; ||
677
(follow #\= 'OR_ASSIGN #f) ; |=
680
(error "waring: illegal character: " c)
682
(loop (buf-read-char))))))
684
;;;;;; END OF LEXER PART