nondeterministic behavior in UTF-8 test script added by phm on Sun May 17 22:46:09 2026

(import (scheme base) (scheme write) (scheme process-context))

;;; Mesotest assumes that when an implementation attempts to decode invalid
;;; UTF-8, it either
;;; 
;;; a) throws an exception
;;; b) outputs nonsense to the string

(define should-raise-exception?
  (cond
    ((member "--should-raise-exception" (command-line)) #t)
    (else #f)))

(define (try-to-decode bv)
  (call/cc
   (lambda (return)
     (with-exception-handler (lambda (ex)
                               (unless should-raise-exception?
                                 (display "decode ")
                                 (write bv)
                                 (display "error")
                                 (newline))
                               (return #f))
       (lambda ()
         (let ((str (utf8->string bv)))
           (display "decode ")
           (write bv)
           (display ": ")
           (write (map char->integer (string->list str)))
           (newline)))))))

(define (from-to ranges)
  ;; Ranges is a list of two-lists, inclusive ranges.
  (let ((bv (make-bytevector (length ranges) 0)))
    (let loop ((ranges ranges)
               (i 0))
      (cond
        ((null? ranges) (try-to-decode bv))
        (else (do ((byte (caar ranges) (+ byte 1)))
                  ((> byte (cadar ranges)))
                (bytevector-u8-set! bv i byte)
                (loop (cdr ranges) (+ i 1))))))))

(display "lone continuation bytes\n")
(from-to '((#x80 #xBF)))

(display "single start byte\n")
(from-to '((#xC0 #xDF)))
(from-to '((#xE0 #xEF)))
(from-to '((#xF0 #xF4)))

(display "start byte followed by start byte\n")
(from-to '((#xC0 #xDF) (#x20 #x20)))
(from-to '((#xE0 #xEF) (#x20 #x20)))
(from-to '((#xF0 #xF4) (#x20 #x20)))

(display "truncated 3 byte sequences\n")
(from-to '((#xE0 #xEF) (#x80 #xBF)))

(display "truncated 4 byte sequences\n")
(from-to '((#xF0 #xF4) (#x80 #xBF) (#x80 #xBF)))