(defclass phosphosite () ((entries :initarg :entries :initform (make-hash-table :test 'equal) :accessor entries) (indices :initarg :indices :initform nil :accessor indices) (roots :initarg :roots :initform nil :accessor roots) )) (defmethod each-phosphosite-page ((p phosphosite) function) (loop for site in (remove-duplicates (loop for (nil url) in (roots p) for page = (get-url (if (numberp url) (format nil "http://www.phosphosite.org/ParentProteinHomePage.jsp?proteinId=~a&organismId=5" url) url) :nofetch t) append (mapcar 'car (all-matches page "PhosphoSiteHomePage.jsp\\?psdbId=(\\d+)" 1)) ) :test 'equal) do (funcall function site))) (defmethod extract-site-page-headers ((p phosphosite) site) (let ((page (page-for-site p site))) (remove-if-not (lambda(s) (char= (char s (1- (length s))) #\:)) (mapcar 'car (all-matches page "(?i)(?s)(.*?)(){0,1}\\s*([^<]*?)\\s*\\s*([^<]*?)\\s*\\s*([^<]*?)
[^<]*?\\s*~a\\s*(){0,1}
.*?.*?(.*?)" header) 2)))) (assert header-body () "Header body for ~a not found" header) (loop for (name linkbody) in (all-matches header-body "(?s)(?i)(){0,1}\\s*([^>]*?)(){0,1}\\s*\\(([^)]*)\\),{0,1}" 2 4) do (assert linkbody () "Blast - didn't find the links!") collect (list name (mapcan 'car (all-matches linkbody "(()|()|())\\s*(\\d+)(){0,1}(,\\s*){0,1}" 5)))))) (defmethod page-for-site ((p phosphosite) site &key (fetch-ok nil)) (get-url (format nil "http://www.phosphosite.org/PhosphoSiteHomePage.jsp?psdbId=~a" site) :nofetch (not fetch-ok) :verbose t )) (defmethod parse-site-annotations ((p phosphosite) site) (let ((headers (extract-site-page-headers p site)) (page (page-for-site p site)) (ignore-these-headers '("View predicted information about this site:" "Orthologous residues:" "Blast this site:"))) (setq headers (set-difference headers ignore-these-headers :test 'equal)) (if (equal page "") (format t "need to check ~a~%" site) (append (site-site p page) (loop for header in headers append (list (header-to-keyword p (regex-replace-all "<[^<]*>" header "")) (parse-enumerated-list p page header))) `(:references ,(parse-site-page-references p page)))))) (defmethod pull-protein-site-information ((p phosphosite) url &key fetch-ok) (let ((page (get-url (if (numberp url) (format nil "http://www.phosphosite.org/ParentProteinHomePage.jsp?proteinId=~a&organismId=5" url) url) :nofetch (not fetch-ok) :verbose t))) (let ((protein (caar (all-matches page "(?s)(?i)\\s*([^\\r\\n]*)" 1)))) (list :protein protein :url url :sites (loop for site in (sites-for-protein p page) collect (parse-site-annotations p site)))))) (defmethod site-site ((p phosphosite) page) (destructuring-bind (site protein species) (car (all-matches page "<span class=\"titleProtein\">([^<]*?)\\s*</span><span class=\"title\">\\s*-\\s*(.*?)\\((.*?)\\)</span>" 1 2 3)) (list :site site :protein (regex-replace-all "\\s*&.*" protein "") :species species))) (defmethod header-to-keyword ((p phosphosite) header) (if (search "Effects of Site's Phosphorylation" header) :effect (or (second (assoc header '(("Methods used to characterize site In Vivo:" :in-vivo) ("Putative upstream kinases:" :upstream-kinases) ("Ligands:" :ligands) ("Kinase, in vitro:" :kinase-in-vitro) ("Disease tissue studied:" :tissue) ("Putative upstream phosphatases:" :upstream-phosphatases) ("Inhibit interaction with:" :inhibits-interaction-with) ("Induce interaction with:" :induces-interaction-with) ("Receptor or Cellular Protein:" :receptor) ("Treatments:" :treatments) ("Phosphatases, in vitro:" :phosphatase-in-vitro)) :test 'equal)) (error "unknown header: ~a" header)))) (defmethod do-pi3k ((p phosphosite)) (loop for (nil url) in (append *phosphosite-links* '((nil 1733) (nil 1696) (nil 3983) (nil 1179) (nil 4333))) for entry = (pull-protein-site-information p url) for id = (if (numberp url) (princ-to-string url) (caar (all-matches url "proteinId=(\\d+)&" 1))) do (setf (gethash (getf entry :protein) (entries p)) entry) (setf (gethash id (entries p)) entry))) (defmethod report-sites ((p phosphosite)) (let ((them nil)) (maphash (lambda(k v) (push (list* k (remove-duplicates (remove nil (mapcar (lambda(e) (let ((kinases (remove-duplicates (append(mapcar 'car (getf e :upstream-kinases)) (mapcar 'car (getf e :kinase-in-vitro))) :test 'equal))) (and (getf e :references) (equalp (getf e :species) "human") (member "Akt" kinases :test 'search) (list* (getf e :site) kinases)))) (getf v :sites))) :test 'equal :key 'car)) them)) (entries p)) them)) (defun browse-phosphopage (path) (let ((path (regex-replace-all "/" (namestring (url-cached-file-name path)) ":" ))) (run-shell-command (format nil "osascript -e 'tell application \"Safari\"' -e 'open \"~a\"' -e \"end tell\"" path)))) (defmethod get-entry ((p phosphosite) proteinid) (gethash (if (numberp proteinid) (princ-to-string proteinid) proteinid) (entries p))) (defmethod dump-entries ((p phosphosite) path) (let ((written (make-hash-table :test 'equalp))) (with-open-file (f path :direction :output :if-does-not-exist :create :if-exists :supersede) (maphash (lambda(k v) (declare (ignore k)) (unless (getf v :entrez-id) (let ((entrez (resolve-name (getf v :protein)))) (assert (= (length entrez) 1) () "Don't know unambiguous entrez gene id for ~a" (getf v :protein)) (setf (getf v :entrez-id) (car entrez)))) (if (numberp (getf v :url)) (setf (getf v :url) (format nil "http://www.phosphosite.org/ParentProteinHomePage.jsp?proteinId=~a&organismId=5" (getf v :url)))) (unless (getf v :phosphosite-id) (setf (getf v :phosphosite-id) (caar (all-matches (getf v :url) "proteinId=(\\d+)" 1)))) (unless (gethash v written) (pprint v f) (setf (gethash v written) t))) (entries p))))) (defmethod retrieve-entries ((p phosphosite) path) (with-open-file (f path :direction :input) (loop for entry = (read f nil :eof) until (equal entry :eof) do (setf (gethash (getf entry :phosphosite-id) (entries p)) entry) ))) (defmethod getby ((p phosphosite) field value) (let ((index (getf (indices p) field))) (if index (gethash value index) (let ((new (make-hash-table :test 'equalp))) (maphash (lambda(k v) (declare (ignore k)) (setf (gethash (getf v field) new) v)) (entries p)) (setf (getf (indices p) field) new) (gethash value new))))) ; (dump-entries *phosphosite* "/Users/ruttenbe/lsw/hcls/biordf/reagents/pi3k-phosphosite2.txt") (defparameter *phosphosite* (make-instance 'phosphosite)) ; (retrieve-entries *phosphosite* "/Users/ruttenbe/lsw/hcls/biordf/reagents/pi3k-phosphosite2.txt")