uber-fast utf8-1 lower ASCII validation pasted by iterrogo on Tue Nov 25 22:40:16 2014

(define (valid-utf8? s)
  (or (let ((len (string-length s)))
         ; Try to validate as an ascii string first. Its essentially
         ; free, doesn't generate garbage and is many, many times
         ; faster than the general purpose validator.
         (define-external ws_utlen int len)
         (define-external ws_uts scheme-pointer s)
         (= 1
            ((foreign-lambda* int ()
"
    if (ws_utlen > UINT_MAX) { return -1; }

    int i;
    for (i = ws_utlen; i != 0; --i)
    {
        if (*((unsigned char*)ws_uts++) > 127)
        {
            C_return(0);
        }
    }

    C_return(1);
"))))
      (parse utf8-string (->parser-input s))))

updated utf8 validate pasted by iterrogo on Wed Nov 26 00:42:10 2014

(define (valid-utf8? s)
  (or (let ((len (string-length s)))
         ; Try to validate as an ascii string first. Its essentially
         ; free, doesn't generate garbage and is many, many times
         ; faster than the general purpose validator.
         (define-external ws_utlen int len)
         (define-external ws_uts scheme-pointer s)
         (= 1
            ((foreign-lambda* int ()
"
    size_t i;
    for (i = ws_utlen; i != 0; --i)
    {
        if (*((unsigned char*)ws_uts++) > 127)
        {
            C_return(0);
        }
    }

    C_return(1);
"))))
      (parse utf8-string (->parser-input s))))

up-updated utf8 validate added by iterrogo on Wed Nov 26 00:43:54 2014

(define (valid-utf8? s)
  (or (let ((len (string-length s)))
         ; Try to validate as an ascii string first. Its essentially
         ; free, doesn't generate garbage and is many, many times
         ; faster than the general purpose validator.
         (define-external ws_utlen size_t len)
         (define-external ws_uts scheme-pointer s)
         (= 1
            ((foreign-lambda* int ()
"
    size_t i;
    for (i = ws_utlen; i != 0; --i)
    {
        if (*((unsigned char*)ws_uts++) > 127)
        {
            C_return(0);
        }
    }

    C_return(1);
"))))
      (parse utf8-string (->parser-input s))))