sytse: I think that'll do. added by zilti on Sun Jan 7 20:38:40 2018

(import chicken scheme)
(require-extension sequences lalr-driver srfi-13)

(include "eolian.yy.scm")

(define (force-output) #f)
(define (port-line port)
  (let-values (((line _) (port-position port)))
    line))
(define (port-column port)
  (let-values (((_ column) (port-position port)))
    column))

;; State
(define context (list outside:))
(define step #f)
(define (errorp . stuff)
  #f)

;; Readers

(define (skip-spaces)
  (let loop ((c (peek-char)))
    (if (and (not (eof-object? c))
	     (or (char=? c #\space) (char=? c #\tab) (char=? c #\newline)))
	(begin
	  (read-char)
	  (loop (peek-char))))))

(define (read-linecomm l)
  (if (char=? (car l) #\newline)
      (read-char)
      (read-linecomm (list (read-char)))))

(define (read-comm l)
  (if (and (char=? (car l) #\*)
	   (char=? (peek-char) #\/))
      (read-char)
      (read-comm (list (read-char)))))

(define (read-doc l)
  (if (and (char=? (car l) #\])
	   (char=? (peek-char) #\]))
      (read-char)
      (read-doc (list (read-char)))))

(define (valuechar? c)
  (or (char-alphabetic? c)
      (char-numeric? c)
      (char=? c #\_)
      (char=? c #\*)))

(define (read-value l)
  (if (length=1? l)
      (begin
	(and (char-alphabetic? (car l)) (set! context (cons symbol-value: context)))
	(and (char-numeric? (car l))    (set! context (cons number-value: context))))
      (if (valuechar? (peek-char))
	  (read-value (cons (read-char) l))
	  (let ((loc (peek context)))
	    (set! context (pop context))
	    (case loc
	      ((symbol-value:) (string->symbol (apply string (reverse l))))
	      ((number-value:) (string->number (apply string (reverse l)))))))))

(define (read-single-char l)
  (let ((result (read-char)))
    (read-char)
    result))

(define (read-str l)
  (if (and (not (char=? (car l) #\\))
	   (char=? (peek-char) #\"))
      (apply string (reverse l))
      (read-str (cons (read-char) l))))

(define (read-op l)
  (cond
   ((char=? (car l) #\=) (cond ((char=? (peek-char) #\=) (read-char) "==")
			       (else                                 "=")))
   ((char=? (car l) #\!) (cond ((char=? (peek-char) #\=) (read-char) "!=")
			       (else                                 "!")))
   ((char=? (car l) #\<) (cond ((char=? (peek-char) #\=) (read-char) "<=")
			       ((char=? (peek-char) #\<) (read-char) "<<")
			       (else                                 "<")))
   ((char=? (car l) #\>) (cond ((char=? (peek-char) #\=) (read-char) ">=")
			       ((char=? (peek-char) #\>) (read-char) ">>")
			       (else                                 ">")))
   (else                 (apply string l))))

(define (read-number l)
  (let ((c (peek-char)))
    (if (or (char-numeric? c)
	    (char=? c #\.)
	    (char=? c #\x)
	    (char=? c #\U)
	    (char=? c #\L)
	    (char=? c #\F))
	(read-number (cons (read-char) l))
	(apply string (reverse l)))))

(define keywords
  '("abstract"
    "class"
    "mixin"
    "interface"
    "type"
    "const"
    "var"
    "accessor"
    "array"
    "iterator"
    "hash"
    "list"
    "own"
    "free"
    "struct"
    "enum"
    "@extern"
    "@free"
    "true"
    "false"
    "null"
    "legacy_prefix"
    "eo_prefix"
    "methods"
    "events"
    "data"
    "implements"
    "constructors"
    "@auto"
    "@empty"
    "@private"
    "@protected"
    "@beta"
    "@hot"
    "@const"
    "@class"
    "@pure_virtual"
    "get"
    "set"
    "keys"
    "values"
    "@nonull"
    "@nullable"
    "@optional"
    "@in"
    "@out"
    "@inout"
    "legacy"
    "@warn_unused"
    "return"))

(define (else-token location x)
  (make-lexical-token (string->keyword (string-upcase x)) location x))

(define (read-else location l)
  (let ((c (peek-char)))
    (if (not (or (char=? c #\space)
		 (char=? c #\newline)))
	(read-else (cons (read-char) l))
	(let* ((r (apply string (reverse l)))
	       (special (find (lambda (x) (string=? r x)) keywords)))
	  (if special
	      (else-token location special)
	      (let* ((split (intersperse (string-split r ".") ".")))
		(for-each
		 (lambda (x)
		   (if (string=? x ".")
		       (make-lexical-token 'DOT     location #f)
		       (make-lexical-token 'VALUE location x)))
		 split)))))))

;; Category recognizers
(define (docst? c1 c2)
  (and (char=? c1 #\[)
       (char=? c2 #\[)))

(define (commst? c1 c2)
  (and (char=? c1 #\/)
       (char=? c2 #\*)))

(define (lcommst? c1 c2)
  (and (char=? c1 #\/)
       (char=? c2 #\/)))

(define (opchar? c1 c2)
  (or (char=? c1 #\+)
      (char=? c1 #\-)
      (char=? c1 #\!)
      (char=? c1 #\~)
      (char=? c1 #\*)
      (char=? c1 #\/)
      (char=? c1 #\%)
      (char=? c1 #\<)
      (char=? c1 #\>)
      (char=? c1 #\&)
      (char=? c1 #\|)
      (char=? c1 #\^)
      (and (char=? c1 #\=) (char=? c2 #\=))
      (and (char=? c1 #\!) (char=? c2 #\=))
      (and (char=? c1 #\>) (char=? c2 #\=))
      (and (char=? c1 #\<) (char=? c2 #\=))
      (and (char=? c1 #\<) (char=? c2 #\<))
      (and (char=? c1 #\>) (char=? c2 #\>))))

(define (nop-eq? c1 c2)
  (and (char=? c1 #\=)
       (not (char=? c2 #\=))))

(define (nop-lt? c1 c2)
  (and (char=? c1 #\<)
       (not (char=? c2 #\<))
       (not (char=? c2 #\=))))

(define (nop-gt? c1 c2)
  (and (char=? c1 #\>)
       (not (char=? c2 #\>))
       (not (char=? c2 #\=))))

(define (make-lexer) 
  (let loop ()
    (let* ((location (make-source-location "blah" (port-line (current-input-port)) (port-column (current-input-port)) -1 -1))
	   (c (read-char)))
      (skip-spaces) 
      (cond
       ((eof-object? c)                     '*eoi*)
       ((char=? c #\.)                      (make-lexical-token 'DOT       location #f))
       ((char=? c #\:)                      (make-lexical-token ':         location #f))
       ((char=? c #\*)                      (make-lexical-token '*         location #f))
       ((nop-lt? c (peek-char))             (make-lexical-token '<         location #f))
       ((nop-gt? c (peek-char))             (make-lexical-token '>         location #f))
       ((nop-eq? c (peek-char))             (make-lexical-token '=         location #f))
       ((char=? c #\,)                      (make-lexical-token 'COMMA     location #f))
       ((char=? c #\;)                      (make-lexical-token 'SEMICOL   location #f))
       ((char=? c #\{)                      (make-lexical-token 'LBR       location #f))
       ((char=? c #\})                      (make-lexical-token 'RBR       location #f))
       ((char=? c #\()                      (make-lexical-token 'LPAR      location #f))
       ((char=? c #\))                      (make-lexical-token 'RPAR      location #f))
       ((char=? c #\')                      (make-lexical-token 'CHAR      location (read-single-char (list c))))
       ((char=? c #\")                      (make-lexical-token 'STR       location (read-str (list c))))
       ((opchar? c (peek-char))             (make-lexical-token 'OP        location (read-op (list c))))
       ((docst? c (peek-char))              (make-lexical-token 'DOC       location (read-doc (list c))))
       ((commst? c (peek-char))             (make-lexical-token 'COMM      location (read-comm (list c))))
       ((lcommst? c (peek-char))            (make-lexical-token 'COMM      location (read-linecomm (list c))))
       ((char-numeric? c)                   (make-lexical-token 'NUM       location (read-number (list c))))
       (else                                (read-else                     location (list c))))
      (if (not (eof-object? c))
	  (loop)))))