This way, Mr. Markov... added by c-Keen on Tue Dec 17 16:39:19 2013

(use (srfi 13 69) utf8)

(require-library alist-lib)
(import (only alist-lib alist-ref/default))

(define (new-hash) (make-hash-table string=? string-hash))

;(define *punctuation* (string-append ",.;:!?2`´»«'-…" (string #\newline #\space #\tab #\")))
;; We use   space for token separation below

(define (insert-ngrams ht s #!key (n 2))
  (let ((tokens (string-split s " ")))
    (unless (< n (length tokens))
      (error "String is too short for ngrams with n=" n))
    (let loop ((ngram (take tokens n))
               (rest (drop tokens n)))
      (unless (null? rest)
        (hash-table-update!/default ht (string-intersperse ngram)
                                    (lambda (v)
                                      (alist-update! (car rest)
                                                     (add1
                                                      (alist-ref/default v
                                                                         (car rest)
                                                                         0
                                                                         string-ci=?))
                                                     v
                                                     string-ci=?))
                                    `((,(car rest) . 0)))
        (loop (append (cdr ngram) (list (car rest)))
              (cdr rest)))))
  ht)

(define (fill-hash-from-file file #!key (ht (new-hash)))
  (insert-ngrams ht
                 (string-intersperse (with-input-from-file file read-lines))))

(define (spew ht number-of-sentences)
  (let* ((keys (filter
                (lambda (k)
                  (or
                   (string=? (string-titlecase k) k)
                   (string=? (string-upcase k) k)))
                (hash-table-keys ht)))
         (numkeys (length keys))
         (seed (random numkeys)))
    (let loop ((n number-of-sentences)
               (s (list-ref keys seed))
               (t (list (list-ref keys seed))))
      (if (zero? n)
          (string-intersperse (reverse t))
          (let* ((all-words (hash-table-ref ht s))
                 (sum-occurrances (fold (lambda (x s) (+ s (cdr x))) 0 all-words))
                 (probs (fold
                         (lambda (p ps)
                           (append (make-list (cdr p) (car p))  ps))
                         '()
                         all-words))
                 (word (list-ref probs (random (length probs)))))
            (loop (if (string-suffix? "." word) (sub1 n) n)
                  (string-intersperse (list (second (string-split s)) word))
                  (cons word t)))))))