cdx-parser added by niadh on Sat Nov 24 13:26:05 2018
(import (chicken io)) (import (chicken keyword)) (import (chicken string)) (import (srfi 13)) (import (srfi 69)) (define (cdx-reader file) (define (process-columns columns) ; Return a list of cdx lines as hash-tables (alist->hash-table (map cons '(SURT: DATE: URL: MIMETYPE: RESPONSE_CODE: DIGEST: REDIRECT: META_TAGS: LENGTH: OFFSET: WARC_FILE: ORIG_LENGTH: ORIG_OFFSET: ORIG_WARC_FILE:) columns))) (call-with-input-file file (lambda (input-port) (let loop ((line (read-line input-port)) (objs '())) (if (not (eof-object? line)) (if (equal? (car (string-split line)) "CDX") (loop (read-line input-port) objs) (loop (read-line input-port) (cons (process-columns (string-split line)) objs))) (reverse objs))))))