;http://www.gene.ucl.ac.uk/nomenclature/data/gdlw_columndef.html (setq *families* (make-hash-table :test 'equal)) (defun each-hugo-synonyms (function) (and (config :hugo) (with-open-file (f (config :hugo)) (let ((headers (mapcar (lambda(h) (intern (substitute #\- #\space (regex-replace-all "[()]" (string-upcase h) "")) 'keyword)) (split-at-char (read-line f) #\tab)))) (let ((to-reap (list (list (position :previous-symbols headers) :multiple) (list (position :approved-name headers) :single) (list (position :approved-symbol headers) :single) (list (position :previous-symbols headers) :multiple) (list (position :previous-names headers) :multiple) (list (position :aliases headers) :multiple))) (entrez-pos1 (position :entrez-gene-id headers)) (entrez-pos2 (position :entrez-gene-id-mapped-data headers)) (gfn (position :gene-family-name headers))) (flet ((split-multiple (s) (and s (let ((split (remove nil (mapcar 'car (mapcar (lambda(s) (split-at-char s ", ")) (split-at-char s #\")))))) (if (equal (car split) "") (cdr split) split)))) (not-empty (s) (if (or (null s) (equal s "")) nil s))) (loop for line = (read-line f nil :eof) until (eq line :eof) for fields = (split-at-char line #\tab) ; for paired = (loop for h in headers for f in fields collect (list h f)) for entrez = (or (not-empty (nth entrez-pos1 fields)) (not-empty (nth entrez-pos2 fields))) for names = (loop for (pos quant) in to-reap for names = (if (eq quant :single) (list (nth pos fields)) (split-multiple (nth pos fields))) append names) for fn = (nth gfn fields) when (and entrez names) do (funcall function entrez names) when (and entrez (not-empty fn)) do (push entrez (gethash fn *families*))) )))))) #| ((:HGNC-ID "6871") (:APPROVED-SYMBOL "MAPK1") (:APPROVED-NAME "mitogen-activated protein kinase 1") (:STATUS "Approved") (:LOCUS-TYPE "undef") (:PREVIOUS-SYMBOLS "PRKM2, PRKM1") (:PREVIOUS-NAMES "") (:ALIASES "ERK, ERK2, p41mapk, p38, p38, MAPK2") (:CHROMOSOME "22q11.2") (:DATE-APPROVED "05/11/1993") (:DATE-MODIFIED "23/09/1999") (:DATE-NAME-CHANGED "") (:ACCESSION-NUMBERS "M84489") (:ENZYME-IDS "") (:ENTREZ-GENE-ID "") (:MGD-ID "") (:MISC-IDS "") (:PUBMED-IDS "") (:REFSEQ-IDS "") (:GENE-FAMILY-NAME "") (:GDB-ID-MAPPED-DATA "GDB:135677") (:ENTREZ-GENE-ID-MAPPED-DATA "5594") (:OMIM-ID-MAPPED-DATA "176948") (:REFSEQ-MAPPED-DATA "NM_002745") (:UNIPROT-ID-MAPPED-DATA "P28482")) |#