(defclass invitrogen-products () ((antibody-roots :initarg :antibody-roots :initform nil :accessor antibody-roots) (antibody-ids :initarg :antibody-ids :initform nil :accessor antibody-ids) (cookie :initarg :cookie :initform nil :accessor cookie) )) ;; categories pulled manually from http://www.invitrogen.com/content.cfm?pageID=10610 primary antibodies ;; robots.txt is cool with us. (defparameter *invitrogen* (make-instance 'invitrogen-products :antibody-roots (loop for category in '("2348" "2349" "2350" "2352" "2356" "2517" "2361" "2382" "2383" "2384" "2521" "2386" "2387" "2388" "2389" "2392" "2397" "2398") collect (format nil "https://catalog.invitrogen.com/index.cfm?fuseaction=viewCatalog.viewCategories&pc=~a&npc=92&nc=2346&" category)) :cookie "CFID=19420578; CFTOKEN=447c585f021bdbf2%2DBC1BE748%2D09F2%2D2325%2D18181FFC72FC0AAC; CK_GID=1; CK_ISO_CODE=us; s_cc=true; s_invisit=true; s_nr=1154207519748; s_sq=invitrogenqa%3D%2526pid%253Dantibodies%252520%252526%252520immunodetection%252520%25253A%252520antibodies%252520%252526%252520immunodetection%252520home%2526pidt%253D1%2526oid%253Dhttp%25253A//www.invitrogen.com/content.cfm%25253Fpageid%25253D11356%2526ot%253DA; s_visit=1; CP=null*; CFID=15618101; CFTOKEN=12a2ede41e08b4df-BC1BA287-E146-AF7A-196BE6F92F6F88DF; UNIQUEID=BC1BA313%2DD89B%2D9801%2D73300467622C4736; HBXRETURNVISITOR=1; HBXNEWVISITOR=1; s_vnum=1156799178709%26vn%3D1; s_lastvisit=1154207178720")) (defmethod cache-index-pages ((i invitrogen-products)) (let ((ids (make-hash-table :test 'equal))) (loop for url in (antibody-roots i) do (dolist (id (all-matches (get-url url :persist t :verbose t :cookiestring (cookie i)) "&productDescription=([^&]+)&" 1)) (setf (gethash (car id) ids) t))) ids)) (defmethod antibody-ids :around ((a invitrogen-products)) (or (call-next-method) (let ((them nil)) (maphash (lambda(id _) (declare (ignore _)) (assert (stringp id) () "oops ~a" id) (push id them)) (setq @ (cache-index-pages a))) (setf (antibody-ids a) them) them))) (defmethod cache-datasheet-pages ((i invitrogen-products)) (loop for id in (antibody-ids i) for url = (format nil "https://catalog.invitrogen.com/index.cfm?fuseaction=viewCatalog.viewProductDetails&productDescription=~a&" id) do (unless (probe-file (url-cached-file-name url)) (get-url url :dont-cache t :persist t :verbose t :cookiestring (cookie i))) (sleep .001))) ;https://www.invitrogen.com/content/sfs/manuals/BioSource%2044-609G.pdf ;https://www.invitrogen.com/search/index.cfm?fuseaction=google.search&searchTerm=44%2D609G&num=10&category=Manuals (cached-url-safari "https://catalog.invitrogen.com/index.cfm?fuseaction=viewCatalog.viewProductDetails&productDescription=30202&")