;; 2013-03 chicken 4.8.0.1 ;; experiment with inlining string port read-char to find maximum theoretical ;; speedup if we specialize read-char on input string ports. ;; speedup is about 3x with simple inlining ;; speedup is about 20x if we restructure the code a little so the compiler ;; likes it better (allow it to generate a pure goto loop); this requires ;; us to reach into the read-char implementation. ;; Practically has problems because it's quite a bit of code to inline for ;; a simple read-char, and you won't get the spectacular speedup unless ;; you're in a very tight loop without any stack allocation. (define n 16000000) (define s (make-string n)) (gc #t) ;; csi: 5.094s CPU time, 0.02s GC time (major), 20 mutations, 6/64085 GCs (major/minor) ;; csc -O3: 0.998s CPU time, 4 mutations, 0/10259 GCs (major/minor) ;; csc -O3: 0.925s CPU time, 4 mutations, 0/10259 GCs (major/minor) (using EOF) (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (read-char p)))))) ;; csc -O3: 0.477s CPU time, 4 mutations, 0/2935 GCs (major/minor) ;; csc -O3: 0.389 CPU time, 4 mutations, 0/2935 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (let ([c (if (##sys#slot p 6) (begin (##sys#setislot p 6 #f) #!eof) ((##sys#slot (##sys#slot p 2) 0) p) ) ] ) ; read-char (cond [(eq? c #\newline) (##sys#setislot p 4 (fx+ (##sys#slot p 4) 1)) (##sys#setislot p 5 0) ] [(not (##core#inline "C_eofp" c)) (##sys#setislot p 5 (fx+ (##sys#slot p 5) 1)) ] ) c)))))) ;; csc -O3: 0.351s CPU time, 4 mutations, 0/2203 GCs (major/minor) ;; csc -O3: 0.270s CPU time, 4 mutations, 0/2203 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (let ([c (if (##sys#slot p 6) (begin (##sys#setislot p 6 #f) #!eof) (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c) ) )) ] ) (cond [(eq? c #\newline) (##sys#setislot p 4 (fx+ (##sys#slot p 4) 1)) (##sys#setislot p 5 0) ] [(not (##core#inline "C_eofp" c)) (##sys#setislot p 5 (fx+ (##sys#slot p 5) 1)) ] ) c)))))) ;; csc -O3: 0.270s CPU time, 4 mutations, 0/2203 GCs (major/minor) ;; (raise one EOF object out of the let statement) ;; csc -O3: 0.235s CPU time, 4 mutations, 0/2203 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (if (##sys#slot p 6) (begin (##sys#setislot p 6 #f) #!eof) (let ([c (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c) ) ) ] ) (cond [(eq? c #\newline) (##sys#setislot p 4 (fx+ (##sys#slot p 4) 1)) (##sys#setislot p 5 0) ] [(not (##core#inline "C_eofp" c)) (##sys#setislot p 5 (fx+ (##sys#slot p 5) 1)) ] ) c))))))) ;; csc -O3: 0.287s CPU time, 4 mutations, 0/494 GCs (major/minor) ;; (raise all EOFs out of LET. Can't do this without inlining the ;; port's read-char, as it may return EOF and we need to process ;; the resulting char to update the position.) ;; csc -O3: 0.049s CPU time, 4 mutations, 0/494 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (if (##sys#slot p 6) (begin (##sys#setislot p 6 #f) #!eof) (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) (cond [(eq? c #\newline) (##sys#setislot p 4 (fx+ (##sys#slot p 4) 1)) (##sys#setislot p 5 0) ] [(not (##core#inline "C_eofp" c)) (##sys#setislot p 5 (fx+ (##sys#slot p 5) 1)) ] ) c))))))))) ;; csc -O3: 0.267s CPU time, 4 mutations, 0/494 GCs (major/minor) ;; (remove port position update. no appreciable improvement) ;; csc -O3: 0.043s CPU time, 4 mutations, 0/494 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ( (c #f)) (if (eof-object? c) 'done (loop (if (##sys#slot p 6) (begin (##sys#setislot p 6 #f) #!eof) (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c))))))))) ;; csc -O3: 0.257s CPU time, 4 mutations, 0/494 GCs (major/minor) ;; (remove EOF update. no appreciable improvement) ;; csc -O3: 0.043 CPU time, 4 mutations, 0/494 GCs (major/minor) (time (let ((p (open-input-string s))) (let loop ( (c #f)) (if (eof-object? c) 'done (loop (if (##sys#slot p 6) #!eof (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c))))))))) ;; csc -O3: 0.071s CPU time ;; use fx- instead of EOF (don't use -, it's extremely slow) (time (let ((p (open-input-string s))) (let loop ((n (string-length s)) (c #f)) (if (= 0 n) 'done (loop (fx- n 1) (if (##sys#slot p 6) #!eof (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c))))))))) ;; csc -O3: 0.044s CPU time ;; use eof-object? (time (let ((p (open-input-string s))) (let loop ((c #f)) (if (eof-object? c) 'done (loop (if (##sys#slot p 6) #!eof (let ([position (##sys#slot p 10)] [string (##sys#slot p 12)] [len (##sys#slot p 11)] ) (if (fx>= position len) #!eof (let ((c (##core#inline "C_subchar" string position))) (##sys#setislot p 10 (fx+ position 1)) c)))))))))