(defparameter *infobiogen-synonyms* nil) ;; (defun parse-infobiogen-synonyms () ;; (let ((page (get-url "http://www.infobiogen.fr/services/chromcancer/Genes/Geneliste.html"))) ;; (let ((bit (subseq page 4000 10000))) ;; (let ((entries (all-matches bit "(?s)[^<]*[^<]*

" 0))) ;; (length entries))))) (defun parse-infobiogen-synonyms () (let ((page (get-url "http://www.infobiogen.fr/services/chromcancer/Genes/Geneliste.html" :persist t))) (let ((bit (subseq page (search "Annotated genes


" page)))) (let ((entries (all-matches bit "(?si)\\s*
" 0))) (loop for (entry) in entries with has = 0 and hasnt = 0 for clean = (#"replaceAll" entry "(?si)( \\s*)|
" " ") for ((name)) = (all-matches clean ">(.*)<" 1) for aliases = (mapcar 'car (all-matches clean "(?si).*?Alias\\s*(.*?)(\\n|
)" 1)) do (setq name (#"replaceAll" name "\\s+" " ")) (when (char= (char name (1- (length name))) #\)) (setq name (#"replaceAll" name "\\s\\(.*" ""))) (if (unique-human-gene-id? name) (incf has) (progn (setf (gethash name *infobiogen-synonyms*) (cons name aliases)) (incf hasnt))) (dolist (alias aliases) (setq alias (#"replaceAll" alias "\\s+" " ")) (when (char= (char alias (1- (length alias))) #\)) (setq alias (#"replaceAll" alias "\\s\\(.*" ""))) (if (unique-human-gene-id? alias) (incf has) (progn (setf (gethash alias *infobiogen-synonyms*) (cons name aliases)) (incf hasnt)))) finally (return (values has hasnt))))))) (defun infobiogen-synonyms () (or *infobiogen-synonyms* (progn (setq *infobiogen-synonyms* (make-hash-table :test 'equalp)) (parse-infobiogen-synonyms) *infobiogen-synonyms*))) ;
\\s*

-- \\S+A --
#|
CTIP2    (Ctip-2) chicken ovalbumin upstream promoter transcription factor (COUP-TF)-interacting protein
   Alias BCL11B    (B-cell lymphoma/leukaemia 11B)
   Alias Rit1    zinc finger protein hRit1 alpha (not to be confused with RIT1 on chr. 1q22)

|# #| (.*?)( \\s*)+(.*)\\n(
( )+Alias (.*?)( \\s*)*(.*)(\\n)*