A scraper for getting all salons from a particular website -- a little bit evil :) added by sjamaan on Mon Jan 27 12:36:39 2014

(use srfi-1 extras html-parser http-client posix
     sxpath mysql-client sxml-modifications)

(define conn
  (make-mysql-connection "localhost" "root" #f "direct_result_dev"))

(define all-results '())

((conn "SELECT DISTINCT(SUBSTRING(postcode,1,4)) FROM `postcode` ORDER BY postcode")
 (lambda (r)
   (let ((postalcode (car r))
         (remove-distance
          (sxml-modify
           '("//li[@class=\"salonInfo\"]/div/h2/span" delete))))
     (fprintf (current-error-port)
         "Running for postal code ~S\n" postalcode)
     (let lp ((i 0))
       (handle-exceptions exn
           (begin (fprintf (current-error-port)
                      "ERROR FOR POSTAL CODE ~S\n" postalcode)
                  (when (= i 0)
                    (fprintf (current-error-port) "Retrying\n")
                    (lp (add1 i))))
         (let* ((document
                 (with-input-from-request
                  "http://www.sebastianprofessional.com/en-EN/salon-locator.aspx"
                  `((ctl00$mainContent$SebastianSalonLocator$btnFind . "SEARCH NOW")
                    (ctl00$mainContent$SebastianSalonLocator$dCountrySelector . "NL")
                    (ctl00$mainContent$SebastianSalonLocator$mile . 5)
                    (ctl00$mainContent$SebastianSalonLocator$tbPostalCode . ,postalcode)
                    (__VIEWSTATE . "/wEPDwUJNzEwODM3Nzg1D2QWAmYPZBYCAgMPZBYCAgUPZBYCAgEPZBYQZg8QZBAVFwlBdXN0cmFsaWEHQXVzdHJpYQdCZWxnaXVtBkJyYXppbAZDYW5hZGEFQ2hpbmEGQ3lwcnVzBkZyYW5jZQdHZXJtYW55B0lyZWxhbmQFSXRhbHkFSmFwYW4GTWV4aWNvC05ldGhlcmxhbmRzC05ldyBaZWFsYW5kEVJlcHVibGljIG9mIEtvcmVhElJ1c3NpYW4gRmVkZXJhdGlvbgxTb3V0aCBBZnJpY2EFU3BhaW4LU3dpdHplcmxhbmQOVW5pdGVkIEtpbmdkb20NVW5pdGVkIFN0YXRlcwAVFwJBVQJBVAJCRQJCUgJDQQJDTgJDWQJGUgJERQJJRQJJVAJKUAJNWAJOTAJOWgJLUgJSVQJaQQJFUwJDSAJHQgJVUwAUKwMXZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dkZAICDxYEHgV2YWx1ZQUBMR4EbmFtZQUEbWlsZWQCAw8WBB8ABQEyHwEFBG1pbGVkAgQPFgQfAAUBNR8BBQRtaWxlZAIFDxYEHwAFAjEwHwEFBG1pbGVkAgYPFgQfAAUCMjAfAQUEbWlsZWQCCg8WAh4HVmlzaWJsZWhkAgsPFgIfAmdkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYGBTJjdGwwMCRtYWluQ29udGVudCRTZWJhc3RpYW5TYWxvbkxvY2F0b3Ikc2Fsb24xbWlsZQUyY3RsMDAkbWFpbkNvbnRlbnQkU2ViYXN0aWFuU2Fsb25Mb2NhdG9yJHNhbG9uMm1pbGUFMmN0bDAwJG1haW5Db250ZW50JFNlYmFzdGlhblNhbG9uTG9jYXRvciRzYWxvbjVtaWxlBTNjdGwwMCRtYWluQ29udGVudCRTZWJhc3RpYW5TYWxvbkxvY2F0b3Ikc2Fsb24xMG1pbGUFM2N0bDAwJG1haW5Db250ZW50JFNlYmFzdGlhblNhbG9uTG9jYXRvciRzYWxvbjIwbWlsZQUyY3RsMDAkbWFpbkNvbnRlbnQkU2ViYXN0aWFuU2Fsb25Mb2NhdG9yJGNiRmxhZ1NoaXA="))
                  html->sxml))
                (salons ((sxpath "//li[@class=\"salonInfo\"]")
                         (remove-distance document)))
                (unique-salons (remove (lambda (s)
                                         (member s all-results))
                                       salons)))
           (cond ((and (null? salons) (= i 0))
                  (fprintf (current-error-port)
                      "NO SALONS for postcode ~S. Retrying..."
                      postalcode)
                  (sleep 2)             ; Just in case
                  (lp (add1 i)))
                 ((and (null? salons) (= i 1))
                  (fprintf (current-error-port)
                      "NO SALONS for postcode ~S. Waiting 5 minutes"
                    postalcode)
                  (sleep (* 60 5))
                  (lp (add1 i)))
                 ((null? salons)
                  (fprintf (current-error-port)
                      "STILL NO SALONS for postcode ~S.  Skipping..."
                    postalcode))
                 (else
                  (for-each pp unique-salons)
                  (set! allresults (append! unique-salons all-results))))))))))