email-address added by andyjpb on Wed Apr 9 17:05:07 2014
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; eMail Address Parsers ;;; ;;; email-address provides eMail address handing procedures for reading eMail ;;; addresses in RFC 822 format as well as reading lists of addresses, such as ;;; commonly found in a 'To:' header as specified in RFC 2822. ;;; ;;; The address specification that we conform to when reading and parsing ;;; addresses is taken from RFC 822, Section 6. ;;; http://tools.ietf.org/html/rfc822#section-6 ;;; ;;; The specification for the format of the 'To:' header is taken from ;;; RFC 2822, Section 3.6.3. ;;; http://tools.ietf.org/html/rfc2822#section-3.6.3 ;;; ;;; Some of the examples used for conformance testing have been drawn from ;;; RFC 3696, Section 3. ;;; http://tools.ietf.org/html/rfc3696#section-3 ;;; ;;; The parser design was borrowed from uri-generic. ;;; ;;; ;;; Copyright (C) 2014, Andy Bennett ;;; All rights reserved. ;;; ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions are met: ;;; ;;; Redistributions of source code must retain the above copyright notice, this ;;; list of conditions and the following disclaimer. ;;; Redistributions in binary form must reproduce the above copyright notice, ;;; this list of conditions and the following disclaimer in the documentation ;;; and/or other materials provided with the distribution. ;;; Neither the name of the author nor the names of its contributors may be ;;; used to endorse or promote products derived from this software without ;;; specific prior written permission. ;;; ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ;;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ;;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE ;;; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ;;; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ;;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ;;; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ;;; POSSIBILITY OF SUCH DAMAGE. ;;; ;;; Andy Bennett, 2014/03/26 18:16 ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (module email-address (email-address ;email-address? ;email-address-list ;email-address->string ;local-part ;domain-part) ) (import chicken scheme) (use srfi-1 extras data-structures comparse srfi-14) (require-extension records) ; fIorz's separated-by parser : (define (separated-by sep-parser field-parser) (sequence* ((head field-parser) (tail (zero-or-more (preceded-by sep-parser field-parser)))) (result (cons head tail)))) ; RFC 822, Section 3.3 : http://tools.ietf.org/html/rfc822#section-3.3 ; ; ucs-range->char-set lower and upper bounds define a half-open range ; [LOWER,UPPER). The ranges in RFC822 are inclusive. (define CHAR (in (ucs-range->char-set 0 128))) ; 0 -> 127 ; char-set:ascii? (define ALPHA (in (char-set-union (ucs-range->char-set 65 91) ; 65 -> 90 (ucs-range->char-set 97 123)))) ; 97 -> 122 (define DIGIT (in (ucs-range->char-set 48 58))) ; 48 -> 57 (define CTL (in (char-set-union (ucs-range->char-set 0 32) ; 0 -> 31 (ucs-range->char-set 127 128)))) ; 127 (define CR (in (ucs-range->char-set 13 14))) ; 13 (define LF (in (ucs-range->char-set 10 11))) ; 10 (define SPACE (in (ucs-range->char-set 32 33))) ; 32 (define HTAB (in (ucs-range->char-set 9 10))) ; 9 (define DQUOTE (in (ucs-range->char-set 34 35))) ; 34 (define CRLF (sequence CR LF)) (define LWSP-char (any-of SPACE HTAB)) (define linear-white-space (one-or-more ; 1*([CRLF] LWSP-char) (any-of (sequence CRLF LWSP-char) LWSP-char))) (define specials (in (string->char-set "()<>@,;:\\\".[]"))) ; text ; ; => atoms, specials, comments and quoted-strings are NOT recognized. ; Preserves case (RFC 822, Section 3.4.7). (define text (none-of* CRLF CHAR)) ; atom ; 1* (define atom (as-string (one-or-more (none-of* specials SPACE CTL CHAR)))) (define BSLASH (in (string->char-set "\\"))) ; qtext ; , "\" & CR, and including linear-white-space>. ; Preserves case (RFC 822, Section 3.4.7). (define qtext (any-of linear-white-space (none-of* DQUOTE BSLASH CR CHAR))) ; quoted-pair ; "\" CHAR ; Preserves case (RFC 822, Section 3.4.7). (define quoted-pair (sequence (char-seq "\\") CHAR)) ; quoted-string ; <"> *(qtext/quoted-pair) <"> (define quoted-string (sequence* ((_ DQUOTE) (string (as-string (zero-or-more (any-of qtext quoted-pair)))) (_ DQUOTE)) (result string))) (define BRACKETS (in (string->char-set "[]"))) ; dtext ; Preserves case (define dtext (any-of linear-white-space (none-of* BRACKETS BSLASH CR CHAR))) ; domain-literal ; "[" *(dtext / quoted-pair) "]" (define domain-literal (sequence (char-seq "[") (zero-or-more (any-of dtext quoted-pair)) (char-seq "]"))) ; ;;;;;;;;;;;;;;;;;;;;;;;;;; NOT REQUIRED?? ;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define PARENS (in (string->char-set "()"))) ; ctext ; Preserves case (RFC 822, Section 3.4.7). (define ctext (any-of linear-white-space (none-of* PARENS BSLASH CR CHAR))) ; comment ; From RFC 822, Section 3.4.2 : http://tools.ietf.org/html/rfc822#section-3.4.3 ; When a comment acts as the delimiter between a sequence of two lexical ; symbols, such as two atoms, it is lexically equivalent with a single ; SPACE, for the purposes of regenerating the sequence, such as when ; passing the sequence onto a mail protocol server. (define (comment . args) (apply (sequence (char-seq "(") (zero-or-more (any-of ctext quoted-pair comment)) (char-seq ")")) args)) ; delimiters (define delimiters (any-of specials linear-white-space comment)) ; ;;;;;;;;;;;;;;;;;;;;;;;;;; NOT REQUIRED?? ;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; word (define word (any-of atom quoted-string)) ; phrase ; 1*word ; Sequence of words ; Assume words can be separated by whitespace that will be preserved. ; Use linear-white-space as per RFC 822, Section 3.1.1 (define phrase (as-string (sequence* ((head word) (tail (zero-or-more (sequence linear-white-space word)))) (result (cons head tail))))) (define (assert-phrase x) (assert (string? x))) ; RFC 822, Section 6. : http://tools.ietf.org/html/rfc822#section-6 ; ; local-part ; Preserves case (RFC 822, Section 3.4.7). ; word *("." word) (define local-part (sequence* ((head word) (tail (zero-or-more (preceded-by (char-seq ".") word)))) (result (cons head tail)))) ; domain-ref (define domain-ref atom) ; sub-domain ; domain-ref / domain-literal (define sub-domain (any-of domain-ref domain-literal)) ; domain ; sub-domain *("." sub-domain) (define domain (sequence* ((head sub-domain) (tail (zero-or-more (preceded-by (char-seq ".") sub-domain)))) (result (cons head tail)))) ; addr-spec ; local-part "@" domain ; Result: '(local-part domain-part) (define addr-spec (sequence* ((local-part local-part) (_ (char-seq "@")) (domain-part domain)) (result (list local-part domain-part)))) (define (assert-addr-spec x) (assert (list? x)) (assert (= 2 (length x)))) ; route ; 1#("@" domain) ":" (define route (sequence* ((domains (separated-by (is #\,) (maybe (preceded-by (is #\@) domain)))) (_ (is #\:))) (let ((domains (remove not domains))) (if (null? domains) fail (result domains))))) (define (assert-route x) (assert (list x))) ; route-addr ; "<" [route] addr-spec ">" ; Result: '(route addr-spec) (define route-addr (sequence* ((_ (char-seq "<")) (route (zero-or-more route)) (addr-spec addr-spec) (_ (char-seq ">"))) (result (list route addr-spec)))) (define (assert-route-addr x) (assert (list? x)) (assert (= 2 (length x)))) ; stolen from db-hacks (define (make-record-printer type-name field-names) (lambda (record port) (fprintf port "`(~A)\n" (string-intersperse (map (lambda (field-name) (sprintf "(~A . ~S)" field-name ((record-accessor type-name field-name) record))) field-names) "\n ")))) ; EMAIL-ADDRESS ADTs (define email-address-fields '(name local-part domain-part route)) (define email-address-rtd (make-record-type 'EMAIL-ADDRESS email-address-fields)) (define (new-email-address) (apply (record-constructor email-address-rtd email-address-fields) (make-list (length email-address-fields) #f))) (define-record-printer EMAIL-ADDRESS (make-record-printer email-address-rtd email-address-fields)) (define (set e field value) (printf "Setting ~A to ~A\n" field value) ((record-modifier email-address-rtd field) e value)) (define (set-addr-spec e x) (set e 'local-part (first x)) (set e 'domain-part (second x))) (define (mailbox->email-address name route-addr) (let ((e (new-email-address))) (assert-phrase name) (assert-route-addr route-addr) (set e 'name name) (let ((route (first route-addr)) (addr-spec (second route-addr))) (assert-route route) (assert-addr-spec addr-spec) (set e 'route route) (set-addr-spec e addr-spec)) e)) (define (addr-spec->email-address addr-spec) (assert-addr-spec addr-spec) (let ((e (new-email-address))) (set-addr-spec e addr-spec) e)) (define email-group-fields '(name mailboxes)) (define email-group-rtd (make-record-type 'EMAIL-ADDRESS-GROUP email-group-fields)) (define make-email-group (record-constructor email-group-rtd '(name mailboxes))) (define-record-printer EMAIL-ADDRESS-GROUP (make-record-printer email-address-rtd email-address-fields)) (define (group->email-group name mailboxes) (assert (list? mailboxes)) (assert-phrase name) (make-email-group name mailboxes)) ;;; ; mailbox ; addr-spec / phrase route-addr ; addr-spec ; Result: EMAIL-ADDRESS (define mailbox-1 (sequence* ((addr-spec addr-spec)) (result (addr-spec->email-address addr-spec)))) ; phrase route-addr ; Result: EMAIL-ADDRESS (define mailbox-2 (sequence* ((phrase phrase) (_ (zero-or-more linear-white-space)) (route-addr route-addr)) (result (mailbox->email-address phrase route-addr)))) (define mailbox (any-of mailbox-1 mailbox-2)) ; group ; phrase ":" [#mailbox] ";" (define group (sequence* ((phrase phrase) (_ (char-seq ":")) (mailboxes (separated-by (char-seq ",") (maybe ; support null elements mailbox))) (_ (char-seq ";"))) (result (group->email-group phrase mailboxes)))) ; address ; mailbox / group (define address (any-of mailbox group)) (define one-address (sequence* ((address address) (_ (none-of item))) ; end of data (result address))) ;;; (define (email-address str-port-or-seq) (receive (result rest) (parse one-address str-port-or-seq) result)) ;(define (email-address-list str-port-or-seq) ; (let-values loop (((result rest) (parse address str-port-or-seq))) ;) ;(parse ; route ; ",@bla,,@frig,,,:") )