Gemtext->Sexpr reader added by siiky on Tue Jul 5 02:07:49 2022

(import
 chicken.base
 chicken.io
 chicken.irregex
 srfi-13)

;;;
;;; Reading Gemtext into Sexps:
;;;
;;; # Headers
;;; 
;;;   ^(#+)\s+(.*)$
;;;
;;;   `(header ,(string-length $1) ,$2)
;;;
;;; NOTE: Accept non-strictly-confirming Gemtext (e.g. ####)
;;;
;;; # Lists
;;;
;;;   * item 1
;;;   * item 2
;;;
;;;   `(list "item 1"
;;;          "item 2")
;;;
;;; # Code blocks
;;;
;;;   ```some optional alt text
;;;   some
;;;   pre-formatted
;;;   text
;;;   ```
;;;
;;;   `(code "some optional alt text"
;;;          "some"
;;;          "pre-formatted"
;;;          "text")

;; Predicates return either an irregex match object, if the regex matches the
;; line; otherwise, #f
;; "Constructors" accept an 

(define header? (cute irregex-match (irregex "(#+)\\s+(.*)") <>))
(define link? (cute irregex-match (irregex "=>\\s*([^\\s]+)(\\s+(.*))?") <>))
(define list-item? (cute irregex-match (irregex "\\*\\s+(.*)") <>))
(define begin-code-line? (cute irregex-match (irregex "```(.*)") <>))
(define end-code-line? (cute string=? "```" <>))

(define-syntax match-let
  (syntax-rules ()
    ((match-let match ((var idx) ...) body ...)
     (let ((%match match))
       (receive (var ...) (values (irregex-match-substring %match idx) ...)
         body ...)))))

(define (header ret match)
  (match-let match ((level 1) (title 2))
   (let ((level (string-length level)))
    (cons `(header ,level ,title) ret))))

(define (link ret match)
  (match-let match ((url 1) (text 3))
    (cons `(link ,url ,text) ret)))

(define (list-item ret match)
  (match-let match ((text 1))
    (cons text ret)))

(define (end-list items ret)
  (cons `(list . ,(reverse items)) ret))

(define (begin-code ret match)
  (match-let match ((text 1))
    (cons text ret)))

(define (end-code code-lines ret)
  (cons `(code . ,(reverse code-lines)) ret))

(define (text ret txt)
  (cons txt ret))

(define eof? null?)
(define finish-off reverse)


(define ((goto st piece ret tail) match)
  (st (piece ret match) tail))

(define ((st:list ret) items lines)
  (if (eof? lines)
    (finish-off (end-list items ret))
    (let ((head (car lines))
          (tail (cdr lines)))
      (cond
        ((list-item? head)
         => (goto (st:list ret) list-item items tail))
        (else
          (st:text (end-list items ret) lines))))))

(define ((st:code ret) code-lines lines)
  (if (eof? lines)
    (finish-off (end-code code-lines ret))
    (let ((head (car lines))
          (tail (cdr lines)))
      (cond
        ((end-code-line? head)
         (st:text (end-code code-lines ret) tail))
        (else
          ((st:code ret) (cons head code-lines) tail))))))

(define (st:text ret lines)
  (if (eof? lines)
    (finish-off ret)
    (let ((head (car lines))
          (tail (cdr lines)))
      (cond
        ((header? head)
         => (goto st:text header ret tail))
        ; TODO: links
        ((link? head)
         => (goto st:text link ret tail))
        ((list-item? head)
         => (goto (st:list ret) list-item '() tail))
        ((begin-code-line? head)
         => (goto (st:code ret) begin-code '() tail))
        (else
          ((goto st:text text ret tail) head))))))

(define (gmi:read #!optional (port (current-input-port)))
  (st:text '() (read-lines port)))