;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Mon October 24, 2005: INCOMPATIBLE CHANGE. In the versions after binary asn, there is an extra { in the input ;; causing the old reader to try to read the whole 1 gig file into a single list. The workaround is to read-line the ;; first line away. ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Converting binary to text asn ;; To convert binary asn to text asn. ;; Get asn tools from ncbi. ;; assume it is in $ncbi ;; open $ncbi/make/xCode/NCBI.xcode (so xcode can compile it. Darwinports doesn't do it) ;; select target all command line tools (or something like that). Build. ;; Find asntool. (on my machine I seem to have set the build directory to be ~/safari/build) ;; $ncbi = "~/Desktop/ncbi/"; ;; $in = "~/Desktop/BigStuff/Entrez\ Gene/Homo_sapiens.ags" ;; $out = "~/Desktop/BigStuff/Entrez\ Gene/Homo_sapiens.asn" ;; ./asntool -m $ncbi/access/entrezgene.asn -d $in -t Entrezgene-Set -p $out -M `find $ncbi -name \*\.asn | perl -e '@a=<>;print join(",",map {chop;$_} @a)'` ;; The find finds all the asn specification files that ncbi has nested. ;; Actually, when converting entrez, it logs only the following as necessary. ;; access/entrez2.asn,access/entrezgene.asn,access/mim.asn,access/taxon3.asn,api/fastadl.asn,asn/access.asn,asn/biblio.asn,asn/featdef.asn,asn/gbseq.asn,asn/general.asn, ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; some documentation about the file format: ;; ;; The schema: ;; http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/objects/entrezgene/entrezgene.asn ;; ;; Makes reference to other scheme, described here: ;; http://sdm.lbl.gov/OPM/DM_TOOLS/OPM/MBD/DIR_LIB/NCBI.html ;; to find NCBI-gene, use "exports gene" inurl:IEB inurl:ToolBox inurl:source inurl:src -inurl:cpp -inurl:lib ;; -inurl:in site:www.ncbi.nlm.nih.gov ;; ;; Shows how to retrieve things if you are used to locuslink: ;; http://www.ncbi.nlm.nih.gov/entrez/query/static/help/genehelp.html#program ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Generic code to read asn. See asn-each for description of parsed record. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defparameter *asn-readtable* (copy-readtable)) (defun braces-reader (stream char ) (declare (ignore char)) (read-delimited-list #\} stream t)) (defun return-comma (stream char) (declare (ignore stream char)) #\,) ; { starts a list (set-macro-character #\{ 'braces-reader nil *asn-readtable*) ; } ends a list (set-macro-character #\} (get-macro-character #\) *asn-readtable*) nil *asn-readtable*) ; returns a #\, separator (set-macro-character #\, 'return-comma nil *asn-readtable*) ; #\: is not package separator (set-syntax-from-char #\: #\A *asn-readtable*) ; #\\ is not quote char (it is junk) (set-syntax-from-char #\\ #\A *asn-readtable*) (defun asn-read (stream &optional (eof-marker :eof)) (let ((*readtable* *asn-readtable*) (*package* (load-time-value (find-package :keyword)))) (asn-uncomma (read stream nil eof-marker)))) ; each list is of the form (entry[, entry]*), where entry is key [type] value ; This removes the type and the commas, effectively making it a plist (defun asn-uncomma (asn) (if (null asn) nil (if (listp asn) (loop with head = asn for (a b c d) = head if (eq c #\,) collect (asn-uncomma a) and collect (asn-uncomma b) and do (setq head (cdddr head)) else if (eq d #\,) collect (asn-uncomma a) and collect (asn-uncomma c) and do (setq head (cddddr head)) else if (null (cdr head)) collect (asn-uncomma a) and do (setq head nil) ; change - use cdr since can't tell head = (1 nil) from head = (1) before else if (null (cddr head)) collect (asn-uncomma a) and collect (asn-uncomma b) and do (setq head nil) else if (null (cdddr head)) collect (asn-uncomma a) and collect (asn-uncomma c) and do (setq head nil) else collect (asn-uncomma a) and collect (asn-uncomma b) and do (setq head (cddr head)) while head) asn))) ; Iterate over a specified set of fields in a parsed asn record. ; ; At this point the asn record looks like a plist at top level. ; The values of each entry are either a plist, a list of plists, or some value (possibly list valued) ; ; The tags tells you how to get the part you want. ; ; Each tag is either a keyword or a list of key value pairs (possibly nil) ; ; If the tag is a keyword then you retrieve the value of the record specified by the tag ; If the tag is a list of (key value) pairs then the you are looking through a list of plists ; and operating on any that have that list of key values in them. As a special case, if the tag ; is nil then you operate on all of them ; This process is repeated until you process all the tags. If you have anything left, then fn is called on the value ; (possibly multiple times) ; ; There are two special values of fn that are special cased. ; If fn is :collect then the values are collected into a list ; If fn is :collect-distinct then the values are collected into a list and duplicates are removed using 'equal (defun asn-each (asn fn &rest tags) (labels ((asn-each-1 (asn tags) (cond ((null tags) (funcall fn asn)) ((listp (car tags)) ;expect a list of plists and find the one with bindings specificed as pairs (loop for plist in asn when (loop for (key value) on (car tags) by #'cddr always (equal (getf plist key) value)) do (asn-each-1 plist (cdr tags)))) (t (asn-each-1 (getf asn (car tags)) (cdr tags)))))) (if (member fn '(:collect :collect-distinct)) (let ((them nil)) (apply 'asn-each asn (lambda(item) (push item them)) tags) (if (eq fn :collect-distinct) (remove-duplicates them :test 'equal) them)) (asn-each-1 asn tags)))) ; When you know there is only one value then asn-get returns it. Same method as asn-each. (defun asn-get (asn &rest tags) (apply #'asn-each asn (lambda(item) (return-from asn-get item)) tags)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Code to deal with entrez gene asn file specifically ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun get-gene-dbrefs (e) (flet ((normalize-gene-db (db) (cond ((string-equal db "locusid") :locuslink) ((string-equal db "mim") :omim) ((string-equal db "geneid") :gene) (t db)))) (loop for (nil db nil id) in e collect (normalize-gene-db db) collect id))) ;; writes a tab delimited file with the bits of gene that we want. Fields with multiple values have the values separated by ;; "|". A header line names the fields. ;; The fields are: ;; :id - The entrez gene id ;; :name - The name of the protein. Sometimes long, sometimes short. ;; :type - One of: :snrna :snorna :rrna :trna :miscrna :other :unknown :pseudo :protein-coding ;; :refseq-mrna - refseq identifiers (list) (possibly nil) ;; :refseq-protein - refseq identifiers (list) (possibly nil) ;; :synonyms - list of synonyms (includes name as first element) ;; :go - list of go categories (numerical id as string) (possibly nil) ;; :unigene - unigene ids (list) (possibly nil) ;; :summary - the longer description of the protein ;; :current-id, :current-locusid - if this record is superseded, where id for the current record. (possibly nil) ;; :omim - omim id (possibly nil) ;; :locuslink - locuslink id (possibly nil) ;; :species - :human, :mouse, or :rat (defun get-gene-info (e) (flet ((clean-whitespace (s) (if (and s (scan "\\s+|\\s*\\n\\s*" s)) (regex-replace-all "\\s+|\\s*\\n\\s*" s " ") s))) (let* ((locus (asn-get e :gene :locus)) (name (clean-whitespace (asn-get e :gene :desc))) (synonyms `(,@(and locus (list locus)) ; may or may not be one ,@(and name (list name)) ; should always be one (NOT!, see e.g. id 44) ,@(mapcar #'clean-whitespace (asn-get e :gene :syn)) ,@(mapcar #'clean-whitespace (asn-get e :prot :name)))) (type (asn-get e :type)) (dbs (get-gene-dbrefs (asn-get e :gene :db))) (summary (clean-whitespace (asn-get e :summary))) (id (asn-get e :track-info :geneid)) (current-ids (get-gene-dbrefs (asn-get e :track-info :current-id))) (status (asn-get e :track-info :status)) (refseq-mrna (union (asn-each e :collect-distinct :locus '(:type :genomic) :products '(:type :mrna) :accession) (asn-each e :collect :comments '(:heading "NCBI Reference Sequences (RefSeq)") :products '(:type :mrna) :accession) :test 'equal)) (refseq-protein (union (asn-each e :collect-distinct :locus '(:type :genomic) :products '(:type :mrna) :products '(:type :peptide) :accession) (asn-each e :collect :comments '(:heading "NCBI Reference Sequences (RefSeq)") :products '(:type :peptide) :accession) :test 'equal)) (go-categories (asn-each e :collect-distinct :properties '(:type :comment :heading "GeneOntology") :comment nil :comment '(:type :comment) :source nil :src :tag)) (unigene (asn-each e :collect-distinct ':comments '(:heading "Additional Links") :comment '(:text "UniGene") :source nil :src :tag)) (chromosome (asn-get e :locus '(:type :genomic) :accession)) (range (let ((interval (asn-get e :locus '(:type :genomic) :seqs :int))) (list (case (getf interval :strand) (:minus :-) (:plus :+) (otherwise nil)) (getf interval :from) (getf interval :to)))) (species (asn-get e :source :org :taxname))) (assert species) (setq chromosome (regex-replace "^NC_0*" chromosome "")) (setq synonyms (remove-duplicates synonyms :test 'equal)) (nsubstitute :current-locuslink :locuslink current-ids) (nsubstitute :current-id :gene current-ids) `(:id ,id :name ,(or locus name) :type ,type :species ,species :status ,status :refseq-mrna ,refseq-mrna :refseq-protein ,refseq-protein :synonyms ,synonyms :go ,go-categories :unigene ,unigene ,@(if chromosome (list :chromosome chromosome :strand (first range) :start (second range) :end (third range))) :summary ,summary ,@current-ids ,@dbs)))) (defclass entrez-gene-summary () ((table :initarg :table :initform nil :accessor table-slot) (equivalent-ids :initarg :equivalent-ids :initform nil :accessor equivalent-ids-slot) )) (defvar *entrez-gene* (make-instance 'entrez-gene-summary)) (defmethod fields ((s entrez-gene-summary)) '(:id :status :name :type :species :locuslink :current-id :current-locuslink :refseq-mrna :refseq-protein :omim :unigene :go :chromosome :strand :start :end :synonyms :summary)) (defmethod keyword-fields ((s entrez-gene-summary)) '(:status :type :species :strand)) (defmethod list-fields ((s entrez-gene-summary)) '(:go :synonyms :refseq-mrna :refseq-protein :unigene)) (defmethod table ((s entrez-gene-summary)) (or (table-slot s) (progn (read-summary s) (table-slot s)))) (defmethod read-summary ((s entrez-gene-summary)) (let ((table (make-hash-table :test 'equal)) (keywords (keyword-fields s)) (lists (list-fields s))) (with-open-file (f (config :entrez-gene-summary)) (loop for line = (read-line f nil :eof) until (eq line :eof) with headers = (mapcar (lambda(s) (intern (string-upcase s) 'keyword)) (split-at-char (read-line f) #\tab)) for fields = (split-at-char line #\tab) for plist = (loop for header in headers for field in fields for field-parsed = (cond ((member header keywords :test 'eq) (intern (string-upcase field) 'keyword)) ((member header lists :test 'eq) (if (equal field "") nil (split-at-char field #\|))) (t field)) collect header collect field-parsed) do (setf (gethash (getf plist :id) table) plist)) (setf (table-slot s) table)))) (defmethod equivalent-ids ((s entrez-gene-summary) ll) (or (equivalent-ids-slot s) (let ((table (make-hash-table :test 'equal))) (maphash (lambda(id info) (when (eq (getf info :status) :secondary) (when (getf info :current-id) (pushnew (getf info :current-id) (gethash id table) :test 'equal) (pushnew id (gethash (getf info :current-id) table) :test 'equal)) (when (getf info :current-locuslink) (pushnew (getf info :current-locuslink) (gethash id table) :test 'equal) (pushnew id (gethash (getf info :current-locuslink) table) :test 'equal)) )) (table s)) (setf (equivalent-ids-slot s) table))) (gethash ll (equivalent-ids-slot s))) (defmethod info ((s entrez-gene-summary) locuslink) (unless (table s) (read-summary s)) (gethash locuslink (table s))) (defmethod gene-field ((s entrez-gene-summary) locuslink field) (unless (table s) (read-summary s)) (let ((res (getf (gethash locuslink (table s)) field))) (and (not (equal res "")) res))) ;; Read asn, write out the tab delimited file (defun create-entrez-gene-summary (&key (source (merge-pathnames "homo_sapiens" (config :entrez-gene-asn))) (dest (config :entrez-gene-summary))) (with-open-file (in source) (read-line in) ; skip first line (see comment regarding INCOMPATIBLE CHANGE at top) (with-open-file (out dest :if-does-not-exist :create :direction :output :if-exists :supersede) (let ((fields '(:id :status :name :type :species :locuslink :current-id :current-locuslink :refseq-mrna :refseq-protein :omim :unigene :go :chromosome :strand :start :end :synonyms :summary))) (apply 'print-tabbed out (mapcar (lambda(f) (string-upcase (symbol-name f))) fields)) (loop for form = (if (equal (peek-char t in) #\}) :eof ; extra trailing brace (see comment regarding INCOMPATIBLE CHANGE at top) (asn-read in)) when (listp form) do (let ((info (get-gene-info form))) (apply 'print-tabbed out (mapcar (lambda(f) (let ((it (or (getf info f) ""))) (cond ((listp it) (format nil "~{~a~^|~}" it)) ((stringp it) it) ((symbolp it) (string-downcase (string it))) (t (princ-to-string it))))) fields))) until (eq form :eof)))))) ;; What jar uses in locuslink digest ;; locusid ;; product (description field in paris) (look and see what this is) ;; organism (human mouse rat) but jar only uses human now. (like "Homo Sapiens") ;; searchs for a name official symbol then preferred symbol then alias symbol. ;; current locusid ;; iterate over gene summary entries ;; returns value as either plist of specific list of fields. ;; If you want a specific list of fields passed as argument to function then put the list of them in boa ;; Turns empty fields into nil. ;; Keywordifies species and type and status fields ;; Splits apart list valued fields (defun each-entrez-gene-summary (function &key (path (config :entrez-gene-summary)) boa) (flet ((parse-field (header value) (if (= (length value) 0) nil (if (member header '(:Synonyms :refseq-mrna :refseq-protein :go :unigene) :test 'eq) (split-at-char value #\|) (if (member header '(:species :type :status) :test 'eq) (intern (string-upcase value) 'keyword) value))))) (with-open-file (f path) (let ((headers (mapcar (lambda(s)(intern (string-upcase s) 'keyword)) (split-at-char (read-line f) #\tab)))) (let ((boapos (mapcar (lambda(key) (position key headers)) boa))) (loop for line = (read-line f nil :eof) until (eq line :eof) for fields = (split-at-char line #\tab) do (apply function (if boa (loop for pos in boapos for boa in boa collect (parse-field boa (nth pos fields))) (loop for header in headers for field in fields collect header collect (parse-field header field)))))))))) (defvar *locuslink-to-info* nil) (defun locuslink-to-info () (or *locuslink-to-info* (setq *locuslink-to-info* (let ((table (make-hash-table :test 'equal :size 150000))) (each-entrez-gene-summary (lambda (&rest info) (when (member (getf info :species) '(:human :|HOMO SAPIENS|) :test 'eq) (setf (gethash (getf info :id) table) info)))) table)) )) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; tests ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; '(with-input-from-string (s *asn-test*) (read-line s) ; (asn-read s) ; (asn-read s) (asn-each (asn-read s) :collect :locus '(:type :genomic) :products '(:type :mrna) :products '(type :peptide) :accession)) ;(defparameter *asn-test* "Entrezgene ::= { (defparameter *asn-test* "Entrezgene-Set ::= { { track-info { geneid 1, status live, create-date std { year 2003, month 8, day 28, hour 20, minute 30, second 0 }, current-id { { db \"LocusID\", tag id 193217 }, { db \"GeneID\", tag id 193217 } }, update-date std { year 2004, month 10, day 13, hour 12, minute 5, second 0 } }, type protein-coding, source { genome genomic, origin natural, org { taxname \"Homo sapiens\", common \"human\", db { { db \"taxon\", tag id 9606 } }, syn { \"man\" }, orgname { name binomial { genus \"Homo\", species \"sapiens\" }, lineage \"Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo\", gcode 1, mgcode 2, div \"PRI\" } }, subtype { { subtype chromosome, name \"19\" } } }, gene { locus \"A1BG\", desc \"alpha-1-B glycoprotein\", maploc \"19q13.4\", db { { db \"LocusID\", tag id 1 }, { db \"MIM\", tag id 138670 } }, syn { \"A1B\", \"ABG\", \"GAB\", \"HYST2477\" }, locus-tag \"HGNC:5\" }, prot { name { \"alpha 1B-glycoprotein\", \"alpha 2B-glycoprotein\" } }, summary \"The protein encoded by this gene is a plasma glycoprotein of unknown function. The protein shows sequence similarity to the variable regions of some immunoglobulin supergene family member proteins.\", location { { display-str \"19q13.4\", method map-type cyto } }, gene-source { src \"LocusLink\", src-int 1, src-str2 \"1\" }, locus { { type genomic, heading \"Reference\", accession \"NC_000019\", v version 8, seqs { int { from 63548355, to 63556668, strand minus, id gi 42406306 } }, products { { type mRNA, heading \"Reference\", accession \"NM_130786\", version 2, genomic-coords { mix { int { from 63548355, to 63550206, strand minus, id gi 42406306 }, int { from 63550530, to 63550817, strand minus, id gi 42406306 }, int { from 63553547, to 63553828, strand minus, id gi 42406306 }, int { from 63554568, to 63554864, strand minus, id gi 42406306 }, int { from 63555460, to 63555732, strand minus, id gi 42406306 }, int { from 63556105, to 63556374, strand minus, id gi 42406306 }, int { from 63556469, to 63556504, strand minus, id gi 42406306 }, int { from 63556581, to 63556668, strand minus, id gi 42406306 } } }, seqs { whole gi 21071029 }, products { { type peptide, heading \"Reference\", accession \"NP_570602\", version 2, genomic-coords { packed-int { { from 63550199, to 63550206, strand minus, id gi 42406306 }, { from 63550530, to 63550817, strand minus, id gi 42406306 }, { from 63553547, to 63553828, strand minus, id gi 42406306 }, { from 63554568, to 63554864, strand minus, id gi 42406306 }, { from 63555460, to 63555732, strand minus, id gi 42406306 }, { from 63556105, to 63556374, strand minus, id gi 42406306 }, { from 63556469, to 63556504, strand minus, id gi 42406306 }, { from 63556581, to 63556614, strand minus, id gi 42406306 } } }, seqs { whole gi 21071030 } } } } } }, { type genomic, heading \"Reference\", accession \"NT_011109\", version 15, seqs { int { from 31124733, to 31133046, strand minus, id gi 29800594 } }, products { { type mRNA, heading \"Reference\", accession \"NM_130786\", version 2, genomic-coords { mix { int { from 31124733, to 31126584, strand minus, id gi 29800594 }, int { from 31126908, to 31127195, strand minus, id gi 29800594 }, int { from 31129925, to 31130206, strand minus, id gi 29800594 }, int { from 31130946, to 31131242, strand minus, id gi 29800594 }, int { from 31131838, to 31132110, strand minus, id gi 29800594 }, int { from 31132483, to 31132752, strand minus, id gi 29800594 }, int { from 31132847, to 31132882, strand minus, id gi 29800594 }, int { from 31132959, to 31133046, strand minus, id gi 29800594 } } }, seqs { whole gi 21071029 }, products { { type peptide, heading \"Reference\", accession \"NP_570602\", version 2, genomic-coords { packed-int { { from 31126577, to 31126584, strand minus, id gi 29800594 }, { from 31126908, to 31127195, strand minus, id gi 29800594 }, { from 31129925, to 31130206, strand minus, id gi 29800594 }, { from 31130946, to 31131242, strand minus, id gi 29800594 }, { from 31131838, to 31132110, strand minus, id gi 29800594 }, { from 31132483, to 31132752, strand minus, id gi 29800594 }, { from 31132847, to 31132882, strand minus, id gi 29800594 }, { from 31132959, to 31132992, strand minus, id gi 29800594 } } }, seqs { whole gi 21071030 } } } } } }, { type genomic, heading \"Reference\", accession \"NT_086907\", version 1, seqs { int { from 8163589, to 8172398, strand minus, id gi 51475048 } }, products { { type mRNA, heading \"Reference\", accession \"NM_130786\", version 2, genomic-coords { mix { int { from 8163589, to 8165440, strand minus, id gi 51475048 }, int { from 8165763, to 8166050, strand minus, id gi 51475048 }, int { from 8169274, to 8169555, strand minus, id gi 51475048 }, int { from 8170297, to 8170593, strand minus, id gi 51475048 }, int { from 8171190, to 8171462, strand minus, id gi 51475048 }, int { from 8171835, to 8172104, strand minus, id gi 51475048 }, int { from 8172199, to 8172234, strand minus, id gi 51475048 }, int { from 8172311, to 8172398, strand minus, id gi 51475048 } } }, seqs { whole gi 21071029 }, products { { type peptide, heading \"Reference\", accession \"NP_570602\", version 2, genomic-coords { packed-int { { from 8165433, to 8165440, strand minus, id gi 51475048 }, { from 8165763, to 8166050, strand minus, id gi 51475048 }, { from 8169274, to 8169555, strand minus, id gi 51475048 }, { from 8170297, to 8170593, strand minus, id gi 51475048 }, { from 8171190, to 8171462, strand minus, id gi 51475048 }, { from 8171835, to 8172104, strand minus, id gi 51475048 }, { from 8172199, to 8172234, strand minus, id gi 51475048 }, { from 8172311, to 8172344, strand minus, id gi 51475048 } } }, seqs { whole gi 21071030 } } } } } } }, properties { { type comment, heading \"GeneOntology\", source { { pre-text \"Provided by\", anchor \"GOA\", url \"http://www.ebi.ac.uk/GOA/\" } }, comment { { type comment, label \"Function\", comment { { type comment, refs { pmid 3458201 }, source { { src { db \"GO\", tag id 5554 }, anchor \"molecular_function unknown\", post-text \"evidence: ND\" } } } } }, { type comment, label \"Process\", comment { { type comment, source { { src { db \"GO\", tag id 4 }, anchor \"biological_process unknown\", post-text \"evidence: ND\" } } } } }, { type comment, label \"Component\", comment { { type comment, refs { pmid 3458201 }, source { { src { db \"GO\", tag id 5576 }, anchor \"extracellular\", post-text \"evidence: IDA\" } } } } } } } }, comments { { type comment, heading \"LocusTagLink\", source { { src { db \"HGNC\", tag id 5 } } } }, { type comment, heading \"RefSeq Status\", label \"REVIEWED\" }, { type comment, refs { pmid 15221005, pmid 14702039, pmid 12477932, pmid 8889549, pmid 3458201, pmid 2591067 } }, { type comment, heading \"Markers (Sequence Tagged Sites/STS)\", comment { { type comment, source { { src { db \"UniSTS\", tag id 10673 }, anchor \"RH65092\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"WIAF-3414-STS\" }, { type other, label \"Alternate name\", text \"stSG15578\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 52209 }, anchor \"WI-16009\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"EST265925\" }, { type other, label \"Alternate name\", text \"RH55926\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 89991 }, anchor \"SHGC-67307\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"RH80032\" }, { type other, label \"Alternate name\", text \"RH86145\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 136670 }, anchor \"G59506\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"SHGC-130059\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 147089 }, anchor \"D16S325\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"GDB:185460\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 152074 }, anchor \"D11S2921\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"GDB:461809\" } } }, { type comment, source { { src { db \"UniSTS\", tag id 155756 }, anchor \"D10S16\", post-text \"(e-PCR)\" } }, comment { { type other, label \"Alternate name\", text \"D10S23\" }, { type other, label \"Alternate name\", text \"GDB:193809\" } } } } }, { type comment, heading \"NCBI Reference Sequences (RefSeq)\", products { { type mRNA, heading \"mRNA Sequence\", accession \"NM_130786\", version 2, source { { src { db \"Nucleotide\", tag id 21071029 }, anchor \"NM_130786\" } }, seqs { whole gi 21071029 }, products { { type peptide, heading \"Product\", accession \"NP_570602\", version 2, source { { src { db \"Protein\", tag id 21071030 }, anchor \"NP_570602\", post-text \"alpha 1B-glycoprotein\" } }, seqs { whole gi 21071030 }, comment { { type other, heading \"Conserved Domains\", source { { src { db \"PROT_CDD\", tag id 21071030 }, pre-text \"(1)\", anchor \"summary\" } }, comment { { type other, source { { src { db \"CDD\", tag id 365 }, anchor \"smart00408: IGc2; Immunoglobulin C-2 Type\" } }, comment { { type other, text \"Location: 223 - 282 Blast Score: 103\" } } } } } } } }, comment { { type other, heading \"Source Sequence\", source { { src { db \"Nucleotide\", tag str \"AF414429,AK055885,AK056201\" }, anchor \"AF414429,AK055885,AK056201\" } }, comment { { type other } } } } } } }, { type comment, heading \"Related Sequences\", products { { type genomic, heading \"Genomic\", accession \"AC010642\", version 5, source { { src { db \"Nucleotide\", tag id 9929687 }, anchor \"AC010642\" } }, seqs { int { from 41119, to 43581, strand plus, id gi 9929687 } }, products { { type peptide, text \"None\" } } }, { type mRNA, heading \"mRNA\", accession \"AB073611\", version 1, source { { src { db \"Nucleotide\", tag id 51555784 }, anchor \"AB073611\" } }, seqs { whole gi 51555784 }, products { { type peptide, accession \"BAD38648\", version 1, source { { src { db \"Protein\", tag id 51555785 }, anchor \"BAD38648\" } }, seqs { whole gi 51555785 } } } }, { type mRNA, heading \"mRNA\", accession \"AF414429\", version 1, source { { src { db \"Nucleotide\", tag id 15778555 }, anchor \"AF414429\" } }, seqs { whole gi 15778555 }, products { { type peptide, accession \"AAL07469\", version 1, source { { src { db \"Protein\", tag id 15778556 }, anchor \"AAL07469\" } }, seqs { whole gi 15778556 } } } }, { type mRNA, heading \"mRNA\", accession \"AK055885\", version 1, source { { src { db \"Nucleotide\", tag id 16550723 }, anchor \"AK055885\" } }, seqs { whole gi 16550723 }, products { { type peptide, text \"None\" } } }, { type mRNA, heading \"mRNA\", accession \"AK056201\", version 1, source { { src { db \"Nucleotide\", tag id 16551539 }, anchor \"AK056201\" } }, seqs { whole gi 16551539 }, products { { type peptide, text \"None\" } } }, { type mRNA, heading \"mRNA\", accession \"BC035719\", version 1, source { { src { db \"Nucleotide\", tag id 23273475 }, anchor \"BC035719\" } }, seqs { whole gi 23273475 }, products { { type peptide, accession \"AAH35719\", version 1, source { { src { db \"Protein\", tag id 23273476 }, anchor \"AAH35719\" } }, seqs { whole gi 23273476 } } } }, { type other, text \"None\", products { { type peptide, accession \"P04217\", source { { src { db \"Protein\", tag id 46577680 }, anchor \"P04217\" } }, seqs { whole gi 46577680 } } } } } }, { type comment, heading \"Additional Links\", comment { { type comment, source { { src { db \"Evidence Viewer\", tag str \"1\" }, anchor \"Evidence Viewer\", url \"http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti g=NT_011109.15&gene=A1BG&lid=1&from=31124734&to=31133047\" } } }, { type comment, source { { src { db \"ModelMaker\", tag str \"1\" }, anchor \"ModelMaker\", url \"http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96 06&contig=NT_011109.15&gene=A1BG&lid=1\" } } }, { type comment, source { { src { db \"Evidence Viewer\", tag str \"1\" }, anchor \"Evidence Viewer\", url \"http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti g=NT_086907.1&gene=A1BG&lid=1&from=8163590&to=8172399\" } } }, { type comment, source { { src { db \"ModelMaker\", tag str \"1\" }, anchor \"ModelMaker\", url \"http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96 06&contig=NT_086907.1&gene=A1BG&lid=1\" } } }, { type comment, text \"UniGene\", xtra-properties { { tag \"UNIGENE\", value \"Hs.390608\" } }, source { { src { db \"UniGene\", tag str \"Hs.390608\" }, anchor \"Hs.390608\", url \"http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=39 0608\" } } }, { type comment, text \"MIM\", source { { src { db \"MIM\", tag str \"138670\" }, anchor \"138670\" } } }, { type comment, source { { src { db \"HomoloGene\", tag str \"1\" }, anchor \"HomoloGene\", url \"http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT= 1[loc]&TAXID=9606\" } } }, { type comment, source { { src { db \"AceView\", tag id 1 }, anchor \"AceView\", url \"http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l ocusid&org=9606&l=1\" } } }, { type comment, source { { src { db \"GDB\", tag str \"GDB:119638\" } } } }, { type comment, source { { src { db \"Ensembl\", tag str \"\" }, url \"http://www.ensembl.org/Homo_sapiens/contigview?geneid=AK055 885\" } } }, { type comment, source { { src { db \"UCSC\", tag str \"\" }, url \"http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position= AK055885\" } } }, { type comment, source { { src { db \"MGC\", tag str \"BC035719\" }, anchor \"MGC\", url \"http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC035719\" } } } } } }, unique-keys { { db \"LocusID\", tag id 1 }, { db \"MIM\", tag id 138670 } }, xtra-index-terms { \"LOC1\" } }")