Welcome to the CHICKEN Scheme pasting service
A scraper for getting all salons from a particular website -- a little bit evil :) added by sjamaan on Mon Jan 27 12:36:39 2014
(use srfi-1 extras html-parser http-client posix sxpath mysql-client sxml-modifications) (define conn (make-mysql-connection "localhost" "root" #f "direct_result_dev")) (define all-results '()) ((conn "SELECT DISTINCT(SUBSTRING(postcode,1,4)) FROM `postcode` ORDER BY postcode") (lambda (r) (let ((postalcode (car r)) (remove-distance (sxml-modify '("//li[@class=\"salonInfo\"]/div/h2/span" delete)))) (fprintf (current-error-port) "Running for postal code ~S\n" postalcode) (let lp ((i 0)) (handle-exceptions exn (begin (fprintf (current-error-port) "ERROR FOR POSTAL CODE ~S\n" postalcode) (when (= i 0) (fprintf (current-error-port) "Retrying\n") (lp (add1 i)))) (let* ((document (with-input-from-request "http://www.sebastianprofessional.com/en-EN/salon-locator.aspx" `((ctl00$mainContent$SebastianSalonLocator$btnFind . "SEARCH NOW") (ctl00$mainContent$SebastianSalonLocator$dCountrySelector . "NL") (ctl00$mainContent$SebastianSalonLocator$mile . 5) (ctl00$mainContent$SebastianSalonLocator$tbPostalCode . ,postalcode) (__VIEWSTATE . "/wEPDwUJNzEwODM3Nzg1D2QWAmYPZBYCAgMPZBYCAgUPZBYCAgEPZBYQZg8QZBAVFwlBdXN0cmFsaWEHQXVzdHJpYQdCZWxnaXVtBkJyYXppbAZDYW5hZGEFQ2hpbmEGQ3lwcnVzBkZyYW5jZQdHZXJtYW55B0lyZWxhbmQFSXRhbHkFSmFwYW4GTWV4aWNvC05ldGhlcmxhbmRzC05ldyBaZWFsYW5kEVJlcHVibGljIG9mIEtvcmVhElJ1c3NpYW4gRmVkZXJhdGlvbgxTb3V0aCBBZnJpY2EFU3BhaW4LU3dpdHplcmxhbmQOVW5pdGVkIEtpbmdkb20NVW5pdGVkIFN0YXRlcwAVFwJBVQJBVAJCRQJCUgJDQQJDTgJDWQJGUgJERQJJRQJJVAJKUAJNWAJOTAJOWgJLUgJSVQJaQQJFUwJDSAJHQgJVUwAUKwMXZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dkZAICDxYEHgV2YWx1ZQUBMR4EbmFtZQUEbWlsZWQCAw8WBB8ABQEyHwEFBG1pbGVkAgQPFgQfAAUBNR8BBQRtaWxlZAIFDxYEHwAFAjEwHwEFBG1pbGVkAgYPFgQfAAUCMjAfAQUEbWlsZWQCCg8WAh4HVmlzaWJsZWhkAgsPFgIfAmdkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYGBTJjdGwwMCRtYWluQ29udGVudCRTZWJhc3RpYW5TYWxvbkxvY2F0b3Ikc2Fsb24xbWlsZQUyY3RsMDAkbWFpbkNvbnRlbnQkU2ViYXN0aWFuU2Fsb25Mb2NhdG9yJHNhbG9uMm1pbGUFMmN0bDAwJG1haW5Db250ZW50JFNlYmFzdGlhblNhbG9uTG9jYXRvciRzYWxvbjVtaWxlBTNjdGwwMCRtYWluQ29udGVudCRTZWJhc3RpYW5TYWxvbkxvY2F0b3Ikc2Fsb24xMG1pbGUFM2N0bDAwJG1haW5Db250ZW50JFNlYmFzdGlhblNhbG9uTG9jYXRvciRzYWxvbjIwbWlsZQUyY3RsMDAkbWFpbkNvbnRlbnQkU2ViYXN0aWFuU2Fsb25Mb2NhdG9yJGNiRmxhZ1NoaXA=")) html->sxml)) (salons ((sxpath "//li[@class=\"salonInfo\"]") (remove-distance document))) (unique-salons (remove (lambda (s) (member s all-results)) salons))) (cond ((and (null? salons) (= i 0)) (fprintf (current-error-port) "NO SALONS for postcode ~S. Retrying..." postalcode) (sleep 2) ; Just in case (lp (add1 i))) ((and (null? salons) (= i 1)) (fprintf (current-error-port) "NO SALONS for postcode ~S. Waiting 5 minutes" postalcode) (sleep (* 60 5)) (lp (add1 i))) ((null? salons) (fprintf (current-error-port) "STILL NO SALONS for postcode ~S. Skipping..." postalcode)) (else (for-each pp unique-salons) (set! allresults (append! unique-salons all-results))))))))))