email-address added by andyjpb on Wed Apr 9 17:05:07 2014

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; eMail Address Parsers
;;;
;;; email-address provides eMail address handing procedures for reading eMail
;;; addresses in RFC 822 format as well as reading lists of addresses, such as
;;; commonly found in a 'To:' header as specified in RFC 2822.
;;;
;;; The address specification that we conform to when reading and parsing
;;; addresses is taken from RFC 822, Section 6.
;;;   http://tools.ietf.org/html/rfc822#section-6
;;;
;;; The specification for the format of the 'To:' header is taken from
;;; RFC 2822, Section 3.6.3.
;;;   http://tools.ietf.org/html/rfc2822#section-3.6.3
;;;
;;; Some of the examples used for conformance testing have been drawn from
;;; RFC 3696, Section 3.
;;;   http://tools.ietf.org/html/rfc3696#section-3
;;;
;;; The parser design was borrowed from uri-generic.
;;;
;;;
;;;  Copyright (C) 2014, Andy Bennett
;;;  All rights reserved.
;;;
;;;  Redistribution and use in source and binary forms, with or without
;;;  modification, are permitted provided that the following conditions are met:
;;;
;;;  Redistributions of source code must retain the above copyright notice, this
;;;  list of conditions and the following disclaimer.
;;;  Redistributions in binary form must reproduce the above copyright notice,
;;;  this list of conditions and the following disclaimer in the documentation
;;;  and/or other materials provided with the distribution.
;;;  Neither the name of the author nor the names of its contributors may be
;;;  used to endorse or promote products derived from this software without
;;;  specific prior written permission.
;;;
;;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
;;;  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
;;;  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;;  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
;;;  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
;;;  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
;;;  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;;  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
;;;  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
;;;  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;;;  POSSIBILITY OF SUCH DAMAGE.
;;;
;;; Andy Bennett , 2014/03/26 18:16
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(module email-address
	(email-address
	 ;email-address?
	 ;email-address-list
	 ;email-address->string
	 ;local-part
	 ;domain-part)
	)

(import chicken scheme)
(use srfi-1 extras data-structures comparse srfi-14)
(require-extension records)

; fIorz's separated-by parser :
(define (separated-by sep-parser field-parser)
  (sequence* ((head field-parser)
	      (tail (zero-or-more
		      (preceded-by
			sep-parser
			field-parser))))
	     (result (cons head tail))))

; RFC 822, Section 3.3 : http://tools.ietf.org/html/rfc822#section-3.3
;
; ucs-range->char-set lower and upper bounds define a half-open range
; [LOWER,UPPER). The ranges in RFC822 are inclusive.
(define CHAR   (in (ucs-range->char-set  0 128)))     ;   0 -> 127 ; char-set:ascii?
(define ALPHA  (in (char-set-union
		     (ucs-range->char-set 65  91)     ;  65 ->  90
		     (ucs-range->char-set 97 123))))  ;  97 -> 122
(define DIGIT  (in (ucs-range->char-set 48  58)))     ;  48 ->  57
(define CTL    (in (char-set-union
		     (ucs-range->char-set   0  32)    ;   0 ->  31
		     (ucs-range->char-set 127 128)))) ; 127
(define CR     (in (ucs-range->char-set 13 14)))      ;  13
(define LF     (in (ucs-range->char-set 10 11)))      ;  10
(define SPACE  (in (ucs-range->char-set 32 33)))      ;  32
(define HTAB   (in (ucs-range->char-set  9 10)))      ;   9
(define DQUOTE (in (ucs-range->char-set 34 35)))      ;  34
(define CRLF   (sequence CR LF))
(define LWSP-char (any-of SPACE HTAB))

(define linear-white-space (one-or-more          ; 1*([CRLF] LWSP-char)
			     (any-of
			       (sequence CRLF LWSP-char)
			       LWSP-char)))

(define specials (in (string->char-set "()<>@,;:\\\".[]")))

; text
;  ; => atoms, specials, comments and quoted-strings are NOT recognized.
;  Preserves case (RFC 822, Section 3.4.7).
(define text (none-of* CRLF CHAR))

; atom
; 1*
(define atom (as-string (one-or-more (none-of* specials SPACE CTL CHAR))))

(define BSLASH (in (string->char-set "\\")))

; qtext
; , "\" & CR, and including linear-white-space>.
; Preserves case (RFC 822, Section 3.4.7).
(define qtext (any-of
		linear-white-space
		(none-of* DQUOTE BSLASH CR CHAR)))

; quoted-pair
; "\" CHAR
; Preserves case (RFC 822, Section 3.4.7).
(define quoted-pair (sequence
		      (char-seq "\\")
		      CHAR))

; quoted-string
; <"> *(qtext/quoted-pair) <">
(define quoted-string (sequence* ((_ DQUOTE)
				  (string (as-string
			  (zero-or-more
			    (any-of
			      qtext
			      quoted-pair))))
				  (_ DQUOTE))
		       (result string)))

(define BRACKETS (in (string->char-set "[]")))

; dtext
; Preserves case
(define dtext (any-of
		linear-white-space
		(none-of*
		  BRACKETS BSLASH CR CHAR)))

; domain-literal
; "[" *(dtext / quoted-pair) "]"
(define domain-literal (sequence
			 (char-seq "[")
			 (zero-or-more
			   (any-of
			     dtext
			     quoted-pair))
			 (char-seq "]")))

; ;;;;;;;;;;;;;;;;;;;;;;;;;; NOT REQUIRED?? ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define PARENS (in (string->char-set "()")))

; ctext
; Preserves case (RFC 822, Section 3.4.7).
(define ctext (any-of
		linear-white-space
		(none-of*
		  PARENS BSLASH CR CHAR)))

; comment
; From RFC 822, Section 3.4.2 : http://tools.ietf.org/html/rfc822#section-3.4.3
; When a comment acts as the delimiter  between  a sequence of two lexical
; symbols, such as two atoms, it is lexically equivalent with a single
; SPACE,  for  the  purposes  of regenerating  the  sequence, such as when
; passing the sequence onto a mail protocol server.
(define (comment . args)
  (apply
    (sequence
      (char-seq "(")
      (zero-or-more
	(any-of
	  ctext
	  quoted-pair
	  comment))
      (char-seq ")"))
    args))

; delimiters
(define delimiters (any-of specials linear-white-space comment))
; ;;;;;;;;;;;;;;;;;;;;;;;;;; NOT REQUIRED?? ;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; word
(define word (any-of atom quoted-string))

; phrase
; 1*word
; Sequence of words
; Assume words can be separated by whitespace that will be preserved.
; Use linear-white-space as per RFC 822, Section 3.1.1
(define phrase (as-string
		 (sequence* ((head word)
			     (tail (zero-or-more
				     (sequence
				       linear-white-space
				       word))))

			    (result (cons head tail)))))

(define (assert-phrase x)
  (assert (string? x)))



; RFC 822, Section 6. : http://tools.ietf.org/html/rfc822#section-6
;

; local-part
; Preserves case (RFC 822, Section 3.4.7).
; word *("." word)
(define local-part (sequence* ((head word)
			       (tail (zero-or-more
				       (preceded-by
					 (char-seq ".")
					 word))))

			      (result (cons head tail))))

; domain-ref
(define domain-ref atom)

; sub-domain
; domain-ref / domain-literal
(define sub-domain (any-of
		     domain-ref
		     domain-literal))

; domain
; sub-domain *("." sub-domain)
(define domain (sequence* ((head sub-domain)
			   (tail (zero-or-more
				   (preceded-by
				     (char-seq ".")
				     sub-domain))))

			  (result (cons head tail))))

; addr-spec
; local-part "@" domain
; Result: '(local-part domain-part)
(define addr-spec (sequence* ((local-part local-part)
			      (_ (char-seq "@"))
			      (domain-part domain))

			     (result (list local-part domain-part))))

(define (assert-addr-spec x)
 (assert (list? x))
 (assert (= 2 (length x))))

; route
; 1#("@" domain) ":"
(define route (sequence* ((domains (separated-by
				     (is #\,)
				     (maybe
				       (preceded-by
					 (is #\@)
					 domain))))
			  (_ (is #\:)))

	       (let ((domains (remove not domains)))
		(if (null? domains)
		 fail
		 (result domains)))))

(define (assert-route x)
  (assert (list x)))


; route-addr
; "<" [route] addr-spec ">"
; Result: '(route addr-spec)
(define route-addr (sequence* ((_ (char-seq "<"))
			       (route (zero-or-more route))
			       (addr-spec addr-spec)
			       (_ (char-seq ">")))

		    (result (list route addr-spec))))

(define (assert-route-addr x)
 (assert (list? x))
 (assert (= 2 (length x))))


; stolen from db-hacks
(define (make-record-printer type-name field-names)
  (lambda (record port)
    (fprintf port "`(~A)\n" (string-intersperse (map (lambda (field-name)
						       (sprintf "(~A . ~S)" field-name ((record-accessor type-name field-name) record)))
						     field-names) "\n "))))

; EMAIL-ADDRESS ADTs
(define email-address-fields
  '(name
     local-part
     domain-part
     route))

(define email-address-rtd
  (make-record-type 'EMAIL-ADDRESS email-address-fields))

(define (new-email-address)
  (apply
    (record-constructor email-address-rtd email-address-fields)
    (make-list (length email-address-fields) #f)))

(define-record-printer EMAIL-ADDRESS
		       (make-record-printer email-address-rtd email-address-fields))

(define (set e field value)
  (printf "Setting ~A to ~A\n" field value)
  ((record-modifier email-address-rtd field) e value))

(define (set-addr-spec e x)
  (set e 'local-part  (first x))
  (set e 'domain-part (second x)))

(define (mailbox->email-address name route-addr)
  (let ((e (new-email-address)))
    (assert-phrase     name)
    (assert-route-addr route-addr)

    (set e 'name name)

    (let ((route     (first route-addr))
	  (addr-spec (second route-addr)))
      (assert-route     route)
      (assert-addr-spec addr-spec)

      (set e 'route route)
      (set-addr-spec e addr-spec))

    e))

(define (addr-spec->email-address addr-spec)
  (assert-addr-spec addr-spec)

  (let ((e (new-email-address)))

    (set-addr-spec e addr-spec)

    e))


(define email-group-fields
  '(name
     mailboxes))

(define email-group-rtd
  (make-record-type 'EMAIL-ADDRESS-GROUP email-group-fields))

(define make-email-group (record-constructor email-group-rtd '(name mailboxes)))

(define-record-printer EMAIL-ADDRESS-GROUP
		       (make-record-printer email-address-rtd email-address-fields))

(define (group->email-group name mailboxes)
  (assert (list? mailboxes))
  (assert-phrase name)

  (make-email-group name mailboxes))


;;;

; mailbox
; addr-spec / phrase route-addr

; addr-spec
; Result: EMAIL-ADDRESS
(define mailbox-1 (sequence* ((addr-spec addr-spec))
			     (result
			       (addr-spec->email-address addr-spec))))


; phrase route-addr
; Result: EMAIL-ADDRESS
(define mailbox-2 (sequence* ((phrase phrase)
			      (_ (zero-or-more linear-white-space))
			      (route-addr route-addr))

			     (result
			       (mailbox->email-address phrase route-addr))))

(define mailbox (any-of mailbox-1 mailbox-2))



; group
; phrase ":" [#mailbox] ";"
(define group (sequence* ((phrase phrase)
			  (_ (char-seq ":"))
			  (mailboxes (separated-by
				       (char-seq ",")
				       (maybe ; support null elements
					 mailbox)))
			  (_ (char-seq ";")))

			 (result
			   (group->email-group phrase mailboxes))))

; address
; mailbox / group
(define address (any-of mailbox group))

(define one-address (sequence* ((address address)
				(_ (none-of item))) ; end of data

			       (result address)))


;;;

(define (email-address str-port-or-seq)
 (receive (result rest) (parse one-address str-port-or-seq)
  result))

;(define (email-address-list str-port-or-seq)
; (let-values loop (((result rest) (parse address str-port-or-seq)))
;)

;(parse
;  route
;  ",@bla,,@frig,,,:")

)