(in-package :ecocyc) ;;;kr:Aug-4-2006 Put this in its own file. ;;; ;;; Also, expanded the information in each assignment to include analysis symbols. ;;; Each mapping is a list, where the first element is a string, which is the abbreviation ;;; of the palsson-cpd that is being mapped. ;;; The second element is a frame ID, coming from either EcoCyc or MetaCyc. ;;; ;;; The remaining optional elements in a list are symbols that will be placed in the ;;; analysis slot of the palsson-cpd structure. ;;; --empty-- = If there is no additional analysis symbol, it means just a direct match with EcoCyc. ;;; :metacyc ;;; :i-o-c = instance of a class (the AF cpd is an instance of a biocyc class) ;;; :dispute = some unclarified disagreement between AF and biocyc ;;; :protein-instance = links to an ecocyc protein, which ought to be generalized to a class instead ;;; :polymer-section = a hypothetical, hopefully representative segment out of a much larger polymer ;;; Basically all mapping in this file should also carry this symbol: ;;; :manual : The match was made by human inspection, not by automated software, which for the most part ;;; is in (map-palsson-cpd-to-ecocyc ...) ;;;kr:Jan-6-2006 For wiring up some mappings by hand, from the abbreviation to frame ID : ;;; (defparameter *one-time-cpd-assignments* '( ;;kr:Aug-22-2006 probably, the following 3 have been replaced in the meantime by expanded out instances... ("cpe_EC" L-1-PHOSPHATIDYL-ETHANOLAMINE) ("pe_EC" L-1-PHOSPHATIDYL-ETHANOLAMINE) ;; C00350 why are there 2 of these ??? ("ps_EC" L-1-PHOSPHATIDYL-SERINE) ("arab-L" ARABINOSE) ("glu-D" D-GLT) ("scsertrna" |Charged-SEC-tRNAs|) ;;kr:Jan-14-2006 After elements were excluded from the search indices: ("ni2" NI+2) ("k" K+) ("cd2" CD+2) ("ag" AG+) ;;kr:Mar-1-2006 Used to be CPD-1485 , but must have gotten renamed. ("na1" NA+) ("zn2" ZN+2) ("mg2" MG+2) ;;kr:Mar-1-2006 These new additions became necessary, as some frames must have been deleted or changed... ("sucgsa" CPD-822) ;; used to be N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE ("fldox" |Oxidized-flavodoxins|) ;; used to be OX-FLAVODOXIN ("fldrd" |Reduced-flavodoxins|) ;; used to be RED-FLAVODOXIN ;;kr:Jul-27-2006 finally fixed this long-standing ambiguity: ("sectrna" |Charged-SEC-tRNAs|) ;; the class for "L-selenocysteinyl-tRNAsec" , used in 2.9.1.1-RXN ("sertrna(sec)" |L-seryl-SEC-tRNAs|) ;; for "L-seryl-tRNAsec" , used in 2.9.1.1-RXN ;;kr:Aug-4-2006 Adam Feist hand-mapped almost 300 cpds from the iAF1237 spreadsheet. ;; The following are taken from /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/all-unmapped-cpds-060721.xls ("12dgr120" DIACYLGLYCEROL :i-o-c) ("12dgr140" DIACYLGLYCEROL :i-o-c) ("12dgr141" DIACYLGLYCEROL :i-o-c) ("12dgr160" DIACYLGLYCEROL :i-o-c) ("12dgr161" DIACYLGLYCEROL :i-o-c) ("12dgr180" DIACYLGLYCEROL :i-o-c) ("12dgr181" DIACYLGLYCEROL :i-o-c) ("12ppd-S" |PROPANE-1,2-DIOL|) ;; synonym with (S) is missing. also, the (R) enantiomer is completely absent. ;; lots of lyso confusion. ;;kr:Aug-22-2006 i think biocyc does not even have the correct classes to map the AF compounds... ("1ddecg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1hdec9eg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1hdecg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1odec11eg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1odecg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1tdec7eg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("1tdecg3p" ACYL-SN-GLYCEROL-3P :i-o-c) ("23ccmp" CPD-3713 :metacyc) ("23cump" CPD-3725 :metacyc) ("2dmmql8" DEMETHYLMENAQUINONE :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?) ("2ombzl" OCTAPRENYL-METHOXY-BENZOQUINONE :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?) ("2omhmbl" OCTAPRENYL-METHYL-OH-METHOXY-BENZQ :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?) ("2ommbl" OCTAPRENYL-METHYL-METHOXY-BENZQ :dispute) ;; AF thinks it should be a quinol, but Alex Shearer thinks it is a quinone. (?) ("3c4mop" CPD-7100) ("3haACP" OH-ACYL-ACP :i-o-c) ("3hcddec5eACP" BETA-HYDROXY-CIS-DELTA5-DODECENOYL-ACP) ("3hcmrs7eACP" OH-ACYL-ACP :i-o-c) ("3hcpalm9eACP" OH-ACYL-ACP :i-o-c) ("3hcvac11eACP" OH-ACYL-ACP :i-o-c) ("3hdcoa" L-3-HYDROXYACYL-COA :i-o-c) ("3hddcoa" L-3-HYDROXYACYL-COA :i-o-c) ("3hddecACP" OH-ACYL-ACP :i-o-c) ("3hdecACP" BETA-HYDROXYDECANOYL-ACP) ("3hhdcoa" L-3-HYDROXYACYL-COA :i-o-c) ("3hhexACP" OH-ACYL-ACP :i-o-c) ("3hmrsACP" 3-OHMYRISTOYL-ACP) ;; AF just had a mapping to class OH-ACYL-ACP . also, see below for ??? ("3hocoa" L-3-HYDROXYACYL-COA :i-o-c) ("3hoctaACP" OH-ACYL-ACP :i-o-c) ("3hoctACP" OH-ACYL-ACP :i-o-c) ("3hodcoa" L-3-HYDROXYACYL-COA :i-o-c) ("3hpalmACP" OH-ACYL-ACP :i-o-c) ("3htdcoa" L-3-HYDROXYACYL-COA :i-o-c) ("3ocddec5eACP" BETA-KETO-CIS-DELTA5-DODECENOYL-ACP) ("3ocmrs7eACP" B-KETOACYL-ACP :i-o-c) ("3ocpalm9eACP" B-KETOACYL-ACP :i-o-c) ("3ocvac11eACP" B-KETOACYL-ACP :i-o-c) ("3odcoa" 3-KETOACYL-COA :i-o-c) ("3oddcoa" 3-KETOACYL-COA :i-o-c) ("3oddecACP" B-KETOACYL-ACP :i-o-c) ("3odecACP" B-KETOACYL-ACP :i-o-c) ("3ohexACP" B-KETOACYL-ACP :i-o-c) ("3omrsACP" B-KETOACYL-ACP :i-o-c) ("3oocoa" 3-KETOACYL-COA :i-o-c) ("3ooctACP" B-KETOACYL-ACP :i-o-c) ("3ooctdACP" B-KETOACYL-ACP :i-o-c) ("3oodcoa" 3-KETOACYL-COA :i-o-c) ("3opalmACP" B-KETOACYL-ACP :i-o-c) ("3otdcoa" 3-KETOACYL-COA :i-o-c) ("acgal1p" CPD-7246 :metacyc) ("acolipa" L-ARA4N-MODIFIED-KDO2-LIPID-A) ("alpp" EG10544-MONOMER :protein-instance) ;; murein lipoprotein should rather be a class... but see below, what is "alpp" ??? ("apg120" |Phospholipids| :i-o-c) ;; see q. below ("apg140" |Phospholipids| :i-o-c) ("apg141" |Phospholipids| :i-o-c) ("apg160" |Phospholipids| :i-o-c) ("apg161" |Phospholipids| :i-o-c) ("apg180" |Phospholipids| :i-o-c) ("apg181" |Phospholipids| :i-o-c) ("bglycogen" |Glycogens| :polymer-section) ;; in ecocyc, there is both |Glycogens| and |6-alpha-D--1-4-alpha-D-Glucano--Glucan| ... ("but2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("butACP" BUTYRYL-ACP) ;; AF just had a mapping to class ACYL-ACP . also, see below for ??? ("ca2" CA+2) ("cddec5eACP" CIS-DELTA5-DODECENOYL-ACP) ("cdec3eACP" CIS-DELTA3-DECENOYL-ACP) ("cdpdddecg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdhdec9eg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdhdecg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdodec11eg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdodecg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdtdec7eg" CDPDIACYLGLYCEROL :i-o-c) ("cdpdtdecg" CDPDIACYLGLYCEROL :i-o-c) ("cenchddd" CARBOXYETHYL-3-5-CYCLOHEXADIENE-1-2-DIOL) ("clpn120" CARDIOLIPIN :i-o-c) ("clpn140" CARDIOLIPIN :i-o-c) ("clpn141" CARDIOLIPIN :i-o-c) ("clpn160" CARDIOLIPIN :i-o-c) ("clpn161" CARDIOLIPIN :i-o-c) ("clpn180" CARDIOLIPIN :i-o-c) ("clpn181" CARDIOLIPIN :i-o-c) ("cpe160" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c) ("cpe180" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c) ("cpg160" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c) ("cpg180" |Phospholipid-Cyclopropane-Fatty-Acids| :i-o-c) ("cpgn" CPD0-621) ("dcaACP" ACYL-ACP :i-o-c) ("dcacoa" |All-Coas| :i-o-c) ("dd2coa" CPD-7222 :metacyc) ("ddcaACP" LAUROYL-ACP) ("dhptd" DIHYDROXYPENTANEDIONE :metacyc) ("didp" |All-Nucleosides| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ;;kr:Aug-21-2006 i ignored all the dsbABCG proteins, involved in disulfide exchanges. ;; doesn't seem like very clear rxn eqns will come out of that... ("ethso3" SULFONATES :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("fe3hox" |Ferric-Hydroxamate-Complexes| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ;; several siderophore related things missing in biocyc ("g3pg" GLYCEROPHOSPHOGLYCEROL :metacyc) ("gmhep7p" D-ALPHABETA-D-HEPTOSE-7-PHOSPHATE) ;; AF said: could be: E. coli K-12 Compound: D-¦Á,¦Â-D-heptose-7-phosphate, but structure is not given in EcoCyc and names are not identical ("grxrd" GLUTAREDOXIN-1-REDUCED :i-o-c) ;;kr:Aug-21-2006 the reduced glutaredoxin classes are a mess... ("hdcoa" |All-Coas| :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("hdd2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("hdeACP" PALMITOLEOYL-ACP) ("hexACP" ACYL-ACP :i-o-c) ("hx2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("LalaDglu" L-ALA-GAMMA-D-GLU) ;; AF said not in biocyc. but it is gamma-D-glu, right ? yes. ("LalaDgluMdap" L-ALA-GAMMA-D-GLU-DAP) ;; AF said not in biocyc. but it is gamma-D-glu, right ? yes. ;;("LalaDgluMdapDala" ) ;;kr:Aug-21-2006 this one we are missing... ;;("LalaLglu" ) ;;kr:Aug-21-2006 this one we are missing... ("lald-D" CPD-358 :metacyc) ;;kr:Aug-23-2006 According to AF's email reply, "lpp" is "alpp" with a palmitate attached (by cutE). EcoCyc should have that as a mod.protein, but does not. ;;("lpp" EG10544-MONOMER :protein-instance) ;; murein lipoprotein should rather be a class... ("malthp" |Oligosaccharides| :i-o-c) ;; AF assigned this class, but it probably is too general. maybe it should be |16-alpha-D-Mannosyloligosaccharides| ? ("myrsACP" MYRISTOYL-ACP) ;; AF just had a mapping to class ACYL-ACP . also, see below for ??? ("oc2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("ocACP" OCTANOYL-ACP) ("ocdcaACP" ACYL-ACP :i-o-c) ("octeACP" ACYL-ACP :i-o-c) ("od2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("odecoa" STEAROYL-COA :metacyc) ("pa120" L-PHOSPHATIDATE :i-o-c) ("pa140" L-PHOSPHATIDATE :i-o-c) ("pa141" L-PHOSPHATIDATE :i-o-c) ("pa160" L-PHOSPHATIDATE :i-o-c) ("pa161" L-PHOSPHATIDATE :i-o-c) ("pa180" L-PHOSPHATIDATE :i-o-c) ("pa181" L-PHOSPHATIDATE :i-o-c) ("palmACP" ACYL-ACP :i-o-c) ("pe120" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ;; AF has comment, need to look at naming, L- and 1- , but should be ok... ("pe140" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pe141" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pe160" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pe161" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pe180" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pe181" L-1-PHOSPHATIDYL-ETHANOLAMINE :i-o-c) ("pg120" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ;; "" ("pg140" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pg141" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pg160" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pg161" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pg180" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pg181" L-1-PHOSPHATIDYL-GLYCEROL :i-o-c) ("pgp120" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ;; "" ("pgp140" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("pgp141" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("pgp160" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("pgp161" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("pgp180" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("pgp181" L-1-PHOSPHATIDYL-GLYCEROL-P :i-o-c) ("ps120" L-1-PHOSPHATIDYL-SERINE :i-o-c) ;; "" ("ps140" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("ps141" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("ps160" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("ps161" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("ps180" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("ps181" L-1-PHOSPHATIDYL-SERINE :i-o-c) ("s" CPD-249) ("sulfac" |Alkanesulfonates| :i-o-c) ("t3c11vaceACP" TRANS-D2-ENOYL-ACP :i-o-c) ("t3c5ddeceACP" TRANS-DELTA3-CIS-DELTA5-DODECENOYL-ACP) ("t3c7mrseACP" TRANS-D2-ENOYL-ACP :i-o-c) ("t3c9palmeACP" TRANS-D2-ENOYL-ACP :i-o-c) ("td2coa" TRANS-D2-ENOYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("tddec2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("tdeACP" ACYL-ACP :i-o-c) ("tdec2eACP" TRANS-D2-DECENOYL-ACP) ("tdecoa" ACYL-COA :i-o-c) ;; AF said it is in metacyc, but the class is in ecocyc too ("thex2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("tmrs2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("toct2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("toctd2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("tpalm2eACP" TRANS-D2-ENOYL-ACP :i-o-c) ("uagmda" C5) ;;kr:Aug-29-2006 For iAF1243 , added this one, after email exchange with AF: ("dtdp4addg" TDP-D-FUCOSAMINE) ) ) #|| ;;;kr:Aug-21-2006 questions to ask AF - "3hmrsACP" should map to 3-OHMYRISTOYL-ACP , but formula is very different: C25H47N2O9PRS (???) vs. C14H27O2ACP - "apg120" to "apg181" : AF comments: "phospholipid with 3 acyl chains" why 3 acyl chains ? the R hanging off the phosphate is not really a fatty acyl - "butACP" should map to BUTYRYL-ACP , but formula is very different: C15H27N2O8PRS (???) vs. C4H7OSACP - what is the difference between "lpp" and "alpp" ? are they the same ecocyc protein EG10544-MONOMER ? of not, what is "alpp" ?? - "myrsACP" should map to MYRISTOYL-ACP , but formula is very surprising: C25H47N2O8PRS (???) vs. no structure, but something like C14 ||#