(in-package :ecocyc)
;;;kr:Aug-4-2006 Put this in its own file.
;;;
;;; Also, expanded the information in each assignment to include analysis symbols.
;;; Each mapping is a list, where the first element is a string, which is the abbreviation
;;; of the palsson-cpd that is being mapped.
;;; The second element is a frame ID, coming from either EcoCyc or MetaCyc.
;;;
;;; The remaining optional elements in a list are symbols that will be placed in the
;;; analysis slot of the palsson-cpd structure.
;;; --empty-- = If there is no additional analysis symbol, it means just a direct match with EcoCyc.
;;; :metacyc
;;; :i-o-c = instance of a class (the AF cpd is an instance of a biocyc class)
;;; :dispute = some unclarified disagreement between AF and biocyc
;;; :protein-instance = links to an ecocyc protein, which ought to be generalized to a class instead
;;; :polymer-section = a hypothetical, hopefully representative segment out of a much larger polymer
;;; Basically all mapping in this file should also carry this symbol:
;;; :manual : The match was made by human inspection, not by automated software, which for the most part
;;; is in (map-palsson-cpd-to-ecocyc ...)
;;;kr:Jan-6-2006 For wiring up some mappings by hand, from the abbreviation to frame ID :
;;;
(defparameter *one-time-cpd-assignments*
'(
;;kr:Aug-22-2006 probably, the following 3 have been replaced in the meantime by expanded out instances...
("cpe_EC" L-1-PHOSPHATIDYL-ETHANOLAMINE)
("pe_EC" L-1-PHOSPHATIDYL-ETHANOLAMINE) ;; C00350 why are there 2 of these ???
("ps_EC" L-1-PHOSPHATIDYL-SERINE)
("arab-L" ARABINOSE)
("glu-D" D-GLT)
("scsertrna" |Charged-SEC-tRNAs|)
;;kr:Jan-14-2006 After elements were excluded from the search indices:
("ni2" NI+2)
("k" K+)
("cd2" CD+2)
("ag" AG+) ;;kr:Mar-1-2006 Used to be CPD-1485 , but must have gotten renamed.
("na1" NA+)
("zn2" ZN+2)
("mg2" MG+2)
;;kr:Mar-1-2006 These new additions became necessary, as some frames must have been deleted or changed...
("sucgsa" CPD-822) ;; used to be N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE
("fldox" |Oxidized-flavodoxins|) ;; used to be OX-FLAVODOXIN
("fldrd" |Reduced-flavodoxins|) ;; used to be RED-FLAVODOXIN
;;kr:Jul-27-2006 finally fixed this long-standing ambiguity:
("sectrna" |Charged-SEC-tRNAs|) ;; the class for "L-selenocysteinyl-tRNAsec" , used in 2.9.1.1-RXN
("sertrna(sec)" |L-seryl-SEC-tRNAs|) ;; for "L-seryl-tRNAsec" , used in 2.9.1.1-RXN
;;kr:Aug-4-2006 Adam Feist hand-mapped almost 300 cpds from the iAF1237 spreadsheet.
;; The following are taken from /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/all-unmapped-cpds-060721.xls
("12dgr120" DIACYLGLYCEROL :i-o-c)
("12dgr140" DIACYLGLYCEROL :i-o-c)
("12dgr141" DIACYLGLYCEROL :i-o-c)
("12dgr160" DIACYLGLYCEROL :i-o-c)
("12dgr161" DIACYLGLYCEROL :i-o-c)
("12dgr180" DIACYLGLYCEROL :i-o-c)
("12dgr181" DIACYLGLYCEROL :i-o-c)
("12ppd-S" |PROPANE-1,2-DIOL|) ;; synonym with (S) is missing. also, the (R) enantiomer is completely absent.
;; lots of lyso confusion.
;;kr:Aug-22-2006 i think biocyc does not even have the correct classes to map the AF compounds...
("1ddecg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1hdec9eg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1hdecg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1odec11eg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1odecg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1tdec7eg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("1tdecg3p" ACYL-SN-GLYCEROL-3P :i-o-c)
("23ccmp" CPD-3713 :metacyc)
("23cump" CPD-3725 :metacyc)
("2dmmql8" DEMETHYLMENAQUINONE :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?)
("2ombzl" OCTAPRENYL-METHOXY-BENZOQUINONE :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?)
("2omhmbl" OCTAPRENYL-METHYL-OH-METHOXY-BENZQ :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?)
("2ommbl" OCTAPRENYL-METHYL-METHOXY-BENZQ :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?)
("3c4mop" CPD-7100)
("3haACP" OH-ACYL-ACP :i-o-c)
("3hcddec5eACP" BETA-HYDROXY-CIS-DELTA5-DODECENOYL-ACP)
("3hcmrs7eACP" OH-ACYL-ACP :i-o-c)
("3hcpalm9eACP" OH-ACYL-ACP :i-o-c)
("3hcvac11eACP" OH-ACYL-ACP :i-o-c)
("3hdcoa" L-3-HYDROXYACYL-COA :i-o-c)
("3hddcoa" L-3-HYDROXYACYL-COA :i-o-c)
("3hddecACP" OH-ACYL-ACP :i-o-c)
("3hdecACP" BETA-HYDROXYDECANOYL-ACP)
("3hhdcoa" L-3-HYDROXYACYL-COA :i-o-c)
("3hhexACP" OH-ACYL-ACP :i-o-c)
("3hmrsACP" 3-OHMYRISTOYL-ACP) ;; AF just had a mapping to class OH-ACYL-ACP . also, see below for ???
("3hocoa" L-3-HYDROXYACYL-COA :i-o-c)
("3hoctaACP" OH-ACYL-ACP :i-o-c)
("3hoctACP" OH-ACYL-ACP :i-o-c)
("3hodcoa" L-3-HYDROXYACYL-COA :i-o-c)
("3hpalmACP" OH-ACYL-ACP :i-o-c)
("3htdcoa" L-3-HYDROXYACYL-COA :i-o-c)
("3ocddec5eACP" BETA-KETO-CIS-DELTA5-DODECENOYL-ACP)
("3ocmrs7eACP" B-KETOACYL-ACP :i-o-c)
("3ocpalm9eACP" B-KETOACYL-ACP :i-o-c)
("3ocvac11eACP" B-KETOACYL-ACP :i-o-c)
("3odcoa" 3-KETOACYL-COA :i-o-c)
("3oddcoa" 3-KETOACYL-COA :i-o-c)
("3oddecACP" B-KETOACYL-ACP :i-o-c)
("3odecACP" B-KETOACYL-ACP :i-o-c)
("3ohexACP" B-KETOACYL-ACP :i-o-c)
("3omrsACP" B-KETOACYL-ACP :i-o-c)
("3oocoa" 3-KETOACYL-COA :i-o-c)
("3ooctACP" B-KETOACYL-ACP :i-o-c)
("3ooctdACP" B-KETOACYL-ACP :i-o-c)
("3oodcoa" 3-KETOACYL-COA :i-o-c)
("3opalmACP" B-KETOACYL-ACP :i-o-c)
("3otdcoa" 3-KETOACYL-COA :i-o-c)
("acgal1p" CPD-7246 :metacyc)
("acolipa" L-ARA4N-MODIFIED-KDO2-LIPID-A)
("alpp" EG10544-MONOMER :protein-instance) ;; murein lipoprotein should rather be a class... but see below, what is "alpp" ???
("apg120" |Phospholipids| :i-o-c) ;; see q. below
("apg140" |Phospholipids| :i-o-c)
("apg141" |Phospholipids| :i-o-c)
("apg160" |Phospholipids| :i-o-c)
("apg161" |Phospholipids| :i-o-c)
("apg180" |Phospholipids| :i-o-c)
("apg181" |Phospholipids| :i-o-c)
("bglycogen" |Glycogens| :polymer-section) ;; in ecocyc, there is both |Glycogens| and |6-alpha-D--1-4-alpha-D-Glucano--Glucan| ...
("but2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("butACP" BUTYRYL-ACP) ;; AF just had a mapping to class ACYL-ACP . also, see below for ???
("ca2" CA+2)
("cddec5eACP" CIS-DELTA5-DODECENOYL-ACP)
("cdec3eACP" CIS-DELTA3-DECENOYL-ACP)
("cdpdddecg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdhdec9eg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdhdecg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdodec11eg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdodecg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdtdec7eg" CDPDIACYLGLYCEROL :i-o-c)
("cdpdtdecg" CDPDIACYLGLYCEROL :i-o-c)
("cenchddd" CARBOXYETHYL-3-5-CYCLOHEXADIENE-1-2-DIOL)
("clpn120" CARDIOLIPIN :i-o-c)
("clpn140" CARDIOLIPIN :i-o-c)
("clpn141" CARDIOLIPIN :i-o-c)
("clpn160" CARDIOLIPIN :i-o-c)
("clpn161" CARDIOLIPIN :i-o-c)
("clpn180" CARDIOLIPIN :i-o-c)
("clpn181" CARDIOLIPIN :i-o-c)
("cpe160" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c)
("cpe180" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c)
("cpg160" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c)
("cpg180" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c)
("cpgn" CPD0-621)
("dcaACP" ACYL-ACP :i-o-c)
("dcacoa" |All-Coas| :i-o-c)
("dd2coa" CPD-7222 :metacyc)
("ddcaACP" LAUROYL-ACP)
("dhptd" DIHYDROXYPENTANEDIONE :metacyc)
("didp" |All-Nucleosides| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
;;kr:Aug-21-2006 i ignored all the dsbABCG proteins, involved in disulfide exchanges.
;; doesn't seem like very clear rxn eqns will come out of that...
("ethso3" SULFONATES :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("fe3hox" |Ferric-Hydroxamate-Complexes| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
;; several siderophore related things missing in biocyc
("g3pg" GLYCEROPHOSPHOGLYCEROL :metacyc)
("gmhep7p" D-ALPHABETA-D-HEPTOSE-7-PHOSPHATE) ;; AF said: could be: E. coli K-12 Compound: D-¦Á,¦Â-D-heptose-7-phosphate, but structure is not given in EcoCyc and names are not identical
("grxrd" GLUTAREDOXIN-1-REDUCED :i-o-c) ;;kr:Aug-21-2006 the reduced glutaredoxin classes are a mess...
("hdcoa" |All-Coas| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("hdd2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("hdeACP" PALMITOLEOYL-ACP)
("hexACP" ACYL-ACP :i-o-c)
("hx2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("LalaDglu" L-ALA-GAMMA-D-GLU) ;; AF said not in biocyc. but it is gamma-D-glu, right ? yes.
("LalaDgluMdap" L-ALA-GAMMA-D-GLU-DAP) ;; AF said not in biocyc. but it is gamma-D-glu, right ? yes.
;;("LalaDgluMdapDala" ) ;;kr:Aug-21-2006 this one we are missing...
;;("LalaLglu" ) ;;kr:Aug-21-2006 this one we are missing...
("lald-D" CPD-358 :metacyc)
;;kr:Aug-23-2006 According to AF's email reply, "lpp" is "alpp" with a palmitate attached (by cutE). EcoCyc should have that as a mod.protein, but does not.
;;("lpp" EG10544-MONOMER :protein-instance) ;; murein lipoprotein should rather be a class...
("malthp" |Oligosaccharides| :i-o-c) ;; AF assigned this class, but it probably is too general. maybe it should be |16-alpha-D-Mannosyloligosaccharides| ?
("myrsACP" MYRISTOYL-ACP) ;; AF just had a mapping to class ACYL-ACP . also, see below for ???
("oc2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("ocACP" OCTANOYL-ACP)
("ocdcaACP" ACYL-ACP :i-o-c)
("octeACP" ACYL-ACP :i-o-c)
("od2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("odecoa" STEAROYL-COA :metacyc)
("pa120" L-PHOSPHATIDATE :i-o-c)
("pa140" L-PHOSPHATIDATE :i-o-c)
("pa141" L-PHOSPHATIDATE :i-o-c)
("pa160" L-PHOSPHATIDATE :i-o-c)
("pa161" L-PHOSPHATIDATE :i-o-c)
("pa180" L-PHOSPHATIDATE :i-o-c)
("pa181" L-PHOSPHATIDATE :i-o-c)
("palmACP" ACYL-ACP :i-o-c)
("pe120" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ;; AF has comment, need to look at naming, L- and 1- , but should be ok...
("pe140" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pe141" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pe160" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pe161" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pe180" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pe181" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c)
("pg120" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ;; ""
("pg140" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pg141" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pg160" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pg161" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pg180" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pg181" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c)
("pgp120" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ;; ""
("pgp140" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("pgp141" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("pgp160" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("pgp161" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("pgp180" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("pgp181" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c)
("ps120" L-1-PHOSPHATIDYL-SERINE :i-o-c) ;; ""
("ps140" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("ps141" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("ps160" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("ps161" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("ps180" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("ps181" L-1-PHOSPHATIDYL-SERINE :i-o-c)
("s" CPD-249)
("sulfac" |Alkanesulfonates| :i-o-c)
("t3c11vaceACP" TRANS-D2-ENOYL-ACP :i-o-c)
("t3c5ddeceACP" TRANS-DELTA3-CIS-DELTA5-DODECENOYL-ACP)
("t3c7mrseACP" TRANS-D2-ENOYL-ACP :i-o-c)
("t3c9palmeACP" TRANS-D2-ENOYL-ACP :i-o-c)
("td2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("tddec2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("tdeACP" ACYL-ACP :i-o-c)
("tdec2eACP" TRANS-D2-DECENOYL-ACP)
("tdecoa" ACYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too
("thex2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("tmrs2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("toct2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("toctd2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("tpalm2eACP" TRANS-D2-ENOYL-ACP :i-o-c)
("uagmda" C5)
;;kr:Aug-29-2006 For iAF1243 , added this one, after email exchange with AF:
("dtdp4addg" TDP-D-FUCOSAMINE)
) )
#|| ;;;kr:Aug-21-2006 questions to ask AF
- "3hmrsACP" should map to 3-OHMYRISTOYL-ACP , but formula is very different: C25H47N2O9PRS (???) vs. C14H27O2ACP
- "apg120" to "apg181" : AF comments: "phospholipid with 3 acyl chains" why 3 acyl chains ? the R hanging off the phosphate is not really a fatty acyl
- "butACP" should map to BUTYRYL-ACP , but formula is very different: C15H27N2O8PRS (???) vs. C4H7OSACP
- what is the difference between "lpp" and "alpp" ? are they the same ecocyc protein EG10544-MONOMER ? of not, what is "alpp" ??
- "myrsACP" should map to MYRISTOYL-ACP , but formula is very surprising: C25H47N2O8PRS (???) vs. no structure, but something like C14
||#