Welcome to the CHICKEN Scheme pasting service
This way, Mr. Markov... added by c-Keen on Tue Dec 17 16:39:19 2013
(use (srfi 13 69) utf8) (require-library alist-lib) (import (only alist-lib alist-ref/default)) (define (new-hash) (make-hash-table string=? string-hash)) ;(define *punctuation* (string-append ",.;:!?2`´»«'-…" (string #\newline #\space #\tab #\"))) ;; We use space for token separation below (define (insert-ngrams ht s #!key (n 2)) (let ((tokens (string-split s " "))) (unless (< n (length tokens)) (error "String is too short for ngrams with n=" n)) (let loop ((ngram (take tokens n)) (rest (drop tokens n))) (unless (null? rest) (hash-table-update!/default ht (string-intersperse ngram) (lambda (v) (alist-update! (car rest) (add1 (alist-ref/default v (car rest) 0 string-ci=?)) v string-ci=?)) `((,(car rest) . 0))) (loop (append (cdr ngram) (list (car rest))) (cdr rest))))) ht) (define (fill-hash-from-file file #!key (ht (new-hash))) (insert-ngrams ht (string-intersperse (with-input-from-file file read-lines)))) (define (spew ht number-of-sentences) (let* ((keys (filter (lambda (k) (or (string=? (string-titlecase k) k) (string=? (string-upcase k) k))) (hash-table-keys ht))) (numkeys (length keys)) (seed (random numkeys))) (let loop ((n number-of-sentences) (s (list-ref keys seed)) (t (list (list-ref keys seed)))) (if (zero? n) (string-intersperse (reverse t)) (let* ((all-words (hash-table-ref ht s)) (sum-occurrances (fold (lambda (x s) (+ s (cdr x))) 0 all-words)) (probs (fold (lambda (p ps) (append (make-list (cdr p) (car p)) ps)) '() all-words)) (word (list-ref probs (random (length probs))))) (loop (if (string-suffix? "." word) (sub1 n) n) (string-intersperse (list (second (string-split s)) word)) (cons word t)))))))