cdx-parser added by niadh on Sat Nov 24 13:26:05 2018

(import (chicken io))
(import (chicken keyword))
(import (chicken string))
(import (srfi 13))
(import (srfi 69))

(define (cdx-reader file)
  (define (process-columns columns)
    ; Return a list of cdx lines as hash-tables
    (alist->hash-table
      (map
        cons
        '(SURT: DATE: URL: MIMETYPE: RESPONSE_CODE: DIGEST: REDIRECT: META_TAGS: LENGTH: OFFSET: WARC_FILE: ORIG_LENGTH: ORIG_OFFSET: ORIG_WARC_FILE:)
        columns)))

  (call-with-input-file
    file
    (lambda (input-port)
        (let loop ((line (read-line input-port)) (objs '()))       
          (if (not (eof-object? line))
            (if (equal? (car (string-split line)) "CDX")
              (loop (read-line input-port) objs)
              (loop (read-line input-port) (cons (process-columns (string-split line)) objs)))
            (reverse objs))))))