(in-package :ecocyc) #||;;;kr:Nov-12-2005 the actual invocation: (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: KEGG "C00559" pulled out DEOXYADENOSINE , whereas CAS "958-09-8" pulled out DAMP Warning: CAS ID "358-71-4" pulled out more than one cpd, namely: (CPD-4211 |delta(3)-isopentenyl-pp|) Warning: CAS ID "146-14-5" pulled out more than one cpd, namely: (FAD FARNESYL-PP) Warning: CAS ID "358-71-4" pulled out more than one cpd, namely: (CPD-4211 |delta(3)-isopentenyl-pp|) Warning: LIGAND ID "C00092" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) Warning: CAS ID "56-73-5" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) Warning: KEGG "C00092" pulled out ALPHA-GLC-6-P , CAS "56-73-5" pulled out ALPHA-GLC-6-P , whereas name "D-Glucose 6-phosphate" pulled out GLC-6-P Warning: KEGG "C00064" pulled out GLN , whereas CAS "56-85-9" pulled out GLUTAMIDE Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT METHYLENE-THF : 5,10-methylene-THF 5,10-methenyl-thf : 5,10-methenyl-THF OCTAPRENYL-DIPHOSPHATE : octaprenyl diphosphate FARNESYLFARNESYLGERANYL-PP : farnesylfarnesylgeranyl-PP Warning: CAS ID "50-70-4" pulled out more than one cpd, namely: (SORBITOL CPD-378) Warning: KEGG "C00794" pulled out CPD-378 , whereas CAS "50-70-4" pulled out SORBITOL modified-charged-selC-tRNA : L-selenocysteinyl-tRNAsec Charged-SEC-tRNAs : L-selenocysteinyl-tRNAsec NIL EC(57): (defparameter *unmapped-palsson-cpds* (loop for p-cpd being each hash-key in *palsson-cpds-ht* using (hash-value pals-struct) unless (or (palsson-cpd-ecocyc-id pals-struct) (palsson-cpd-metacyc-id pals-struct) ) collect pals-struct) ) EC(66): (length *UNMAPPED-PALSSON-CPDS*) 296 281 ;;kr:Nov-17-2005 after ingrid's first pass of resolving the KEGG and CAS issues. 277 ;;kr:Nov-23-2005 after searching for LIGAND-CPD instead of LIGAND ;;;kr:Dec-16-2005 [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: KEGG "C00259" pulled out ARABINOSE , CAS "147-81-9" pulled out ARABINOSE , whereas name "L-Arabinose" pulled out L-ARABINOSE Warning: LIGAND ID "C00092" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) Warning: CAS ID "56-73-5" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) Warning: KEGG "C00092" pulled out ALPHA-GLC-6-P , CAS "56-73-5" pulled out ALPHA-GLC-6-P , whereas name "D-Glucose 6-phosphate" pulled out GLC-6-P Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT METHYLENE-THF : 5,10-methylene-THF 5,10-methenyl-thf : 5,10-methenyl-THF OCTAPRENYL-DIPHOSPHATE : octaprenyl diphosphate FARNESYLFARNESYLGERANYL-PP : farnesylfarnesylgeranyl-PP modified-charged-selC-tRNA : L-selenocysteinyl-tRNAsec Charged-SEC-tRNAs : L-selenocysteinyl-tRNAsec ;;;kr:Dec-16-2005 EC(6): (length *UNMAPPED-PALSSON-CPDS*) 276 ;;;kr:Nov-17-2005 dump out as a spreadsheet: (dump-palsson-cpds-to-tab-delimited-file (subseq *UNMAPPED-PALSSON-CPDS* 0 140) "/homedir/kr/ecocyc/metabology/palsson/unmapped-cpds-051117-1.csv") (dump-palsson-cpds-to-tab-delimited-file (subseq *UNMAPPED-PALSSON-CPDS* 140) "/homedir/kr/ecocyc/metabology/palsson/unmapped-cpds-051117-2.csv") ;;;kr:Nov-21-2005 try reading back in: (defparameter *intermediate-palsson-cpds* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/unmapped-cpds-051117-1-before-meta.csv" :only-read t) ) (defparameter *metacyc-palsson-cpds* (loop for p-cpd in *intermediate-palsson-cpds* when (and (palsson-cpd-analysis p-cpd) (>= (length (palsson-cpd-analysis p-cpd)) 7) (equalp "metacyc" (subseq (palsson-cpd-analysis p-cpd) 0 7)) ) collect p-cpd) ) (length *METACYC-PALSSON-CPDS*) ==> 7 ;;kr:Nov-21-2005 before metacyc mapping ;;;never mind. too small a gain... (loop for p-cpd in *intermediate-palsson-cpds* unless (palsson-cpd-analysis p-cpd) do ;;(setf (palsson-cpd-ecocyc-id pals-struct) (map-palsson-cpd-to-ecocyc pals-struct)) ) ;;;kr:Dec-16-2005 read back in results of manual analysis: (defparameter *analyzed-palsson-cpds-kr* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/unmapped-cpds-051117-1-051216.csv" :only-read t) ) (defparameter *analyzed-palsson-cpds-keseler* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/cpds_Markus1.csv" :only-read t) ) ;;;kr:Dec-28-2005 try reading back in: (defparameter *analyzed-unmapped-palsson-cpds-kr* (populate-palsson-cpds-ht "/home/kr/metabology/palsson-051206/unmapped-cpds-051117-1-051216.csv" :only-read t) ) (defparameter *analyzed-unmapped-palsson-cpds-keseler* (populate-palsson-cpds-ht "/home/kr/metabology/palsson-051206/cpds_Markus1.csv" :only-read t) ) (defparameter *analyzed-unmapped-palsson-cpds-total* (append *analyzed-unmapped-palsson-cpds-kr* *analyzed-unmapped-palsson-cpds-keseler*) ) ;;;kr:Dec-30-2005 (map nil #'interpret-palsson-cpd-analysis *analyzed-unmapped-palsson-cpds-kr*) (map nil #'interpret-palsson-cpd-analysis *analyzed-unmapped-palsson-cpds-keseler*) ;;;kr:Dec-31-2005 against dist 9.5 : Error: Frame ID CPD0-888 for "uLa4fn" is not coercible in EcoCyc. Error: Frame ID D-BETA-D-HEPTOSE-17-DIPHOSPHATE for "gmhep17bp" is not coercible in EcoCyc. Error: Frame ID CPD0-882 for "anhm" is not coercible in EcoCyc. Error: Frame ID HEPTA-ACYLATED-LIPID-A for "halipa" is not coercible in EcoCyc. Error: Frame ID |2,3-DIKETO-L-GULONATE| for "23doguln" is not coercible in EcoCyc. Error: Frame ID PHOSPHATIDYLETHANOLAMINE-KDO2 for "enlipa" is not coercible in EcoCyc. Error: Frame ID L-ARABINOSE for "arab-L" is not coercible in EcoCyc. ;;; results in: EC(70): (length *UNMAPPED-PALSSON-CPDS*) 145 EC(71): EC(76): (length *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL*) 281 EC(77): (count "synonym" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 56 EC(78): (count "hack" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 27 EC(79): (count "class" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 9 EC(80): (count "protein" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 1 EC(81): (count "metacyc" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 45 EC(82): (count "synmeta" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 19 EC(87): (+ 56 27 9 1 45 19) 157 EC(83): (count "unknown" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 58 EC(84): (count "fatty" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 32 EC(85): (count "other" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 38 EC(86): (+ 58 32 38) 128 EC(88): (- 276 145) 131 ;;;kr:Jan-6-2006 EC(57): (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: KEGG "C00259" pulled out ARABINOSE , CAS "147-81-9" pulled out ARABINOSE , whereas name "L-Arabinose" pulled out L-ARABINOSE ;;; ARABINOSE is an instance of L-ARABINOSE ;;; ==> map by hand: ("arab-L" ARABINOSE) Warning: LIGAND ID "C00092" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) ;;; kr changed LIGAND "C00092" to LIGAND-CPD "C00668" for ALPHA-GLC-6-P Warning: CAS ID "56-73-5" pulled out more than one cpd, namely: (ALPHA-GLC-6-P GLC-6-P) ;;; kr removed CAS "56-73-5" from ALPHA-GLC-6-P Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT ;;;** error: CAS "56-86-0" was added to both glu-D and glu-L , whereas in ecocyc, D-GLT has CAS "6893-26-1" ;;; ==> map by hand: ("glu-D" D-GLT) 5,10-methenyl-thf : 5,10-methenyl-THF v;;kr:Jan-9-2006 ==> removed erroneous synonym "5,10-methylenetetrahydrofolate" METHYLENE-THF : 5,10-methylene-THF FARNESYLFARNESYLGERANYL-PP : farnesylfarnesylgeranyl-PP OCTAPRENYL-DIPHOSPHATE : octaprenyl diphosphate ;;; kr merged (FARNESYLFARNESYLGERANYL-PP . OCTAPRENYL-DIPHOSPHATE) modified-charged-selC-tRNA : L-selenocysteinyl-tRNAsec ;; an instance Charged-SEC-tRNAs : L-selenocysteinyl-tRNAsec ;; the class. kr:Jan-9-2006 let's map to the class. ;;; ==> map by hand: ("scsertrna" |Charged-SEC-tRNAs|) ;;;kr:Jan-9-2006 EC(29): (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: KEGG "C00259" pulled out ARABINOSE , CAS "147-81-9" pulled out ARABINOSE , whereas name "L-Arabinose" pulled out L-ARABINOSE Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT |modified-charged-selC-tRNA| : L-selenocysteinyl-tRNAsec |Charged-SEC-tRNAs| : L-selenocysteinyl-tRNAsec ;;; after commenting out the old synonym mapping additions... EC(31): (length *UNMAPPED-PALSSON-CPDS*) 368 EC(32): (- 368 281) 87 ;;; why are there now 87 more unmapped ones than earlier ??? (defparameter *analyzed-unmapped-palsson-cpds-kr* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/unmapped-cpds-051117-1-051216.csv" :only-read t) ) (defparameter *analyzed-unmapped-palsson-cpds-keseler* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/cpds_Markus1.csv" :only-read t) ) (defparameter *analyzed-unmapped-palsson-cpds-total* (append *analyzed-unmapped-palsson-cpds-kr* *analyzed-unmapped-palsson-cpds-keseler*) ) EC(41): (length *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL*) 281 (defparameter *puzzling-new-unmappeds* (set-difference *UNMAPPED-PALSSON-CPDS* *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-abbreviation :test #'equal) ) EC(45): (length *PUZZLING-NEW-UNMAPPEDS*) 91 ;;;kr:Jan-13-2006 after pkarp fixed a cpd lookup bug (involving html markup), which makes things work with (index-ekb) ;;; but still not yet on mysql, and after elements were excluded from regular cpd lookup: [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: KEGG "C00259" pulled out ARABINOSE , CAS "147-81-9" pulled out ARABINOSE , whereas name "L-Arabinose" pulled out L-ARABINOSE Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT |modified-charged-selC-tRNA| : L-selenocysteinyl-tRNAsec |Charged-SEC-tRNAs| : L-selenocysteinyl-tRNAsec EC(23): (length *UNMAPPED-PALSSON-CPDS*) 293 ;;; ... same as above... EC(22): (length *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL*) 281 EC(23): (map nil #'interpret-palsson-cpd-analysis *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL*) Warning: "u3hga" already had an ecocyc-id of "UDP-OHMYR-GLUCOSAMINE" assigned ! Warning: "hkndd" already had an ecocyc-id of "CPD-157" assigned ! Warning: "6hmhpt" already had an ecocyc-id of "AMINO-OH-HYDROXYMETHYL-DIHYDROPTERIDINE" assigned ! Warning: "dtdp4aaddg" already had an ecocyc-id of "TDP-FUC4NAC" assigned ! Warning: "acglc-D" already had an metacyc-id of "unknown we have maltose as substrate for the reaction" assigned ! Warning: "N1aspmd" already had an metacyc-id of "coli has generic N-ACETYLSPERMIDINE using the N1 structure" assigned ! Warning: "pgp_EC" already had an metacyc-id of "ecocyc has it as a class" assigned ! Warning: "g3pi" already had an metacyc-id of "EcoCyc has the general substrate for the reaction" assigned ! Warning: "n8aspmd" already had an metacyc-id of "coli has generic N-ACETYLSPERMIDINE using the N1 structure" assigned ! Warning: "3oocoa" already had an metacyc-id of "pubchem 440608" assigned ! Warning: "glu-D" already had an ecocyc-id of "D-GLT" assigned ! Warning: "sbt-D" already had an ecocyc-id of "SORBITOL" assigned ! Warning: "fldox" already had an metacyc-id of "also protein OX-FLAVODOXIN1, OX-FLAVODOXIN2" assigned ! Warning: "sucgsa" already had an ecocyc-id of "N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE" assigned ! NIL EC(24): ;;;kr:Jan-14-2006 EC(35): (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-cpds.csv] Warning: abbreviation "ag" was assigned by hand-encoded mapping. Warning: KEGG "C00259" pulled out ARABINOSE , CAS "147-81-9" pulled out ARABINOSE , whereas name "L-Arabinose" pulled out L-ARABINOSE Warning: abbreviation "cd2" was assigned by hand-encoded mapping. Warning: abbreviation "cpe_EC" was assigned by hand-encoded mapping. Warning: KEGG "C00217" pulled out D-GLT , whereas CAS "56-86-0" pulled out GLT Warning: abbreviation "k" was assigned by hand-encoded mapping. Warning: abbreviation "mg2" was assigned by hand-encoded mapping. Warning: abbreviation "na1" was assigned by hand-encoded mapping. Warning: abbreviation "ni2" was assigned by hand-encoded mapping. Warning: abbreviation "pe_EC" was assigned by hand-encoded mapping. Warning: abbreviation "ps_EC" was assigned by hand-encoded mapping. |modified-charged-selC-tRNA| : L-selenocysteinyl-tRNAsec |Charged-SEC-tRNAs| : L-selenocysteinyl-tRNAsec Warning: abbreviation "zn2" was assigned by hand-encoded mapping. EC(13): (length *UNMAPPED-PALSSON-CPDS*) 274 EC(23): *PUZZLING-NEW-UNMAPPEDS* (#S(PALSSON-CPD :ABBREVIATION "palmACP" :OFFICIALNAME "Palmitoyl-ACP (n-C16:0ACP)" :FORMULA "C27H51N2O8PRS" :CHARGE -1 :CASNUMBER NIL :KEGG-CMPD-ID NIL :NOTES "tv JLR changed to fix ACP(used to be just R, now C11H20N2O7PRS)" :ECOCYC-ID NIL :ANALYSIS NIL :METACYC-ID NIL)) EC(29): (setf (PALSSON-CPD-ANALYSIS (gethash "palmACP" *palsson-cpds-ht*)) "fatty") EC(32): (map nil #'interpret-palsson-cpd-analysis *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL*) Warning: "u3hga" already had an ecocyc-id of "UDP-OHMYR-GLUCOSAMINE" assigned ! Warning: "hkndd" already had an ecocyc-id of "CPD-157" assigned ! Warning: "6hmhpt" already had an ecocyc-id of "AMINO-OH-HYDROXYMETHYL-DIHYDROPTERIDINE" assigned ! Warning: "dtdp4aaddg" already had an ecocyc-id of "TDP-FUC4NAC" assigned ! Warning: "acglc-D" already had an metacyc-id of "unknown we have maltose as substrate for the reaction" assigned ! Warning: "N1aspmd" already had an metacyc-id of "coli has generic N-ACETYLSPERMIDINE using the N1 structure…" assigned ! Warning: "pgp_EC" already had an metacyc-id of "ecocyc has it as a class" assigned ! Warning: "g3pi" already had an metacyc-id of "EcoCyc has the general substrate for the reaction" assigned ! Warning: "n8aspmd" already had an metacyc-id of "coli has generic N-ACETYLSPERMIDINE using the N1 structure…" assigned ! Warning: "3oocoa" already had an metacyc-id of "pubchem 440608" assigned ! Warning: "glu-D" already had an ecocyc-id of "D-GLT" assigned ! Warning: "sbt-D" already had an ecocyc-id of "SORBITOL" assigned ! Warning: "fldox" already had an metacyc-id of "also protein OX-FLAVODOXIN1, OX-FLAVODOXIN2" assigned ! Warning: "sucgsa" already had an ecocyc-id of "N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE" assigned ! NIL EC(33): EC(33): (count "synonym" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 56 EC(34): (count "hack" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 27 EC(35): (count "class" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 9 EC(36): (count "protein" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 1 EC(37): (count "metacyc" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 45 EC(38): (count "synmeta" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 19 EC(39): (+ 56 27 9 1 45 19) 157 EC(40): (count "unknown" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 58 EC(41): (count "fatty" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 32 EC(42): (count "other" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'search) 38 EC(43): (+ 58 32 38) 128 EC(44): EC(44): (- 274 128) 146 EC(45): EC(45): (defparameter *still-unmapped-palsson-cpds* (loop for p-cpd being each hash-key in *palsson-cpds-ht* using (hash-value pals-struct) unless (or (palsson-cpd-ecocyc-id pals-struct) (palsson-cpd-metacyc-id pals-struct) ) collect pals-struct) ) *STILL-UNMAPPED-PALSSON-CPDS* EC(46): (length *STILL-UNMAPPED-PALSSON-CPDS*) 123 EC(63): (+ 157 128) 285 ;;; which is more than the 281 we started with, so some got double-counted... ;;;kr:Jan-15-2006 new counting effort: (defun tagword-match (tag string) (when string ;;kr:Jan-15-2006 the :end2 forces the match to be at the beginning. (search (subseq tag 0 4) string :end2 4) ) ) EC(67): (tagword-match "meta" "metacyc") 0 EC(68): (tagword-match "meta" "whatever metacyc") NIL ;;;kr:Aug-23-2006 ==> see (full-tagword-match ...) instead !! (loop for p in *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* unless (find-if #'(lambda (tag) (full-tagword-match tag (palsson-cpd-analysis p)) ) #("synonym" "hack" "class" "protein" "metacyc" "synmeta" "unknown" "fatty" "other") ) collect p) ;;==> only found 3 with analysis NIL EC(102): (count "synonym" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 56 EC(103): (count "hack" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 27 EC(104): (count "class" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 7 EC(105): (count "protein" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 1 EC(106): (count "metacyc" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 42 EC(107): (count "synmeta" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 19 EC(108): (+ 56 27 7 1 42 19) 152 EC(109): (count "unknown" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 57 EC(110): (count "fatty" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 30 EC(111): (count "other" *ANALYZED-UNMAPPED-PALSSON-CPDS-TOTAL* :key #'palsson-cpd-analysis :test #'full-tagword-match) 38 EC(112): (+ 57 30 38) 125 EC(113): (+ 152 125) 277 ;;; + 3 with "NIL" : "dtdp4aaddg" "sbt-D" "sucgsa" + 1 with NIL " "glu-D" = 281 ;;; these 4 were all mapped to ecocyc. EC(120): (length *one-time-cpd-assignments*) 13 ;;; + "palmACP" see above ;;; summary: 885 cpds in AMF spreadsheet ;;; 281 could not be automatically matched to ecocyc ==> hand-analyzed by ingrid and kr ;;; - 95 associated with ecocyc ID ;;; - 61 associated with metacyc ID ;;; - 125 could not be found. of which 30 are fatty acid related. ;;; another 14 could not be automatically matched to ecocyc and 13 were hand-mapped. 1 became fatty ;;;kr:Jan-15-2006 Final spreadsheet that maps the cpds: (dump-palsson-cpds-to-tab-delimited-file (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv" ) ;;;kr:Jan-18-2006 fix db of LIGAND to LIGAND-CPD in some cpds: (loop for c in (get-class-all-sub-frames '|Compounds|) for old-kegg-links = (get-links c :db 'LIGAND) do (loop for ol in old-kegg-links do (remove-links c 'LIGAND :oid (link-oid ol)) (setf (link-db ol) 'LIGAND-CPD) (add-link c ol) (format t "~S changed to ~S~%" (get-frame-name c) ol) ) ) ;;;kr:Jan-18-2006 ecocyc: EC(9): (save-kb) [****************Saving updates to KB ECOBASE... Saved Log file #p"/home/pfeiffer1/kr/ECOBASE/logfile0620" ** No Conflicting Updates were found ** Saving Operation-Log of length 170 to database server Transaction id 16208 generated Operation Log saved to database server [Modifications to KB ECOBASE: Modified frames: RIBOFLAVIN NIACINE NIACINAMIDE BIOTIN GALACTITOL L-LYXOSE L-FUCOSE 2-DEOXYRIBOSE MANNOSE RIBOSE MALTOTRIOSE CPD-1141 SUCROSE GALACTOSE D-ARABINOSE XYLOSE ARABINOSE GLC-6-P N-ACETYL-D-MANNOSAMINE N-ACETYL-D-GLUCOSAMINE ... 0 created, 85 modified, 0 indexed, 0 deleted] ;;;kr:Jan-18-2006 metacyc: EC(13): (save-kb) [****************Saving updates to KB METABASE... Saved Log file #p"/home/pfeiffer1/kr/METABASE/logfile0783" ** No Conflicting Updates were found ** Saving Operation-Log of length 212 to database server Transaction id 25405 generated Operation Log saved to database server [Modifications to KB METABASE: Modified frames: MET LEU VAL TYR TRP THR SER PRO PHE L-ALPHA-ALANINE ILE GLY GLN ASN PHOSPHOTHREONINE B-ALANINE L-METHIONINE_SULFOXIDE HOMO-SER ACETYL-GLU 3-SULFINOALANINE ... 0 created, 106 modified, 0 indexed, 0 deleted] ;;;kr:Jan-18-2006 (defparameter *ecocyc-060114-cpd-mappings* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv" :only-read t) ) (map nil #'update-ecocyc-from-palsson-cpd *ecocyc-060114-cpd-mappings*) ;;; hand-edits to fix screw-ups: (the weird one) EC(54): (add-link 'CPD-3604 (make-link :db 'CAS :oid "14215-68-0")) EC(55): (add-link 'CPD-3604 (make-link :db 'CAS :oid "1811-31-0")) ;;; plus a ton of CAS IDs called "None"... ;;;resulting saves: see: /homedir/kr/ecocyc/metabology/palsson/updating-dblinks-060118.txt ||# #||;;;kr:Nov-13-2005 the actual invocation: (map-palsson-gpr-file "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-rxn-gpr.csv") ;;;kr:Nov-21-2005 ran again: [Processed 1964 data rows from file /homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-rxn-gpr.csv] ... p-rxns-with-unmapped-cpds : 901 p-rnxs-otherwise-unmapped : 486 ;;;kr:Feb-27-2006 (so t) (so 'meta) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv] Warning: For "sucgsa", the ecocyc-id N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE was not coercible to a frame. Warning: For "fldox", the ecocyc-id OX-FLAVODOXIN was not coercible to a frame. Warning: For "ag", the ecocyc-id CPD-1485 was not coercible to a frame. Warning: For "fldrd", the ecocyc-id RED-FLAVODOXIN was not coercible to a frame. ;;;kr:Mar-6-2006 (map-palsson-gpr-file "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-rxn-gpr.csv") ... *p-rxns-mapped-to-ecocyc* : 543 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 766 EC(23): (+ 543 654 766) 1963 EC(48): (remove-duplicates *p-rxns-mapped-to-ecocyc* :test #'equalp :key #'PALSSON-RXN-ECOCYC-RXN-IDS) EC(47): (length *) 526 some lists needed: one set for ecocyc and metacyc (and there should be flags stating how the rxn-directionality compares) - rxns that map flawlessly - rxns that map when 1 proton is added otherwise-unmapped: (the cpds matched, but the rxns didn't) - list of "trivial" transport-rxns ;;;kr:Mar-14-2006 filter out the simple exchange rxns: *p-rxns-mapped-to-ecocyc* : 544 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 369 *p-rnxs-unmapped-exchange* : 396 ;;;kr:Mar-20-2006 taking compartments into acct. in some detail: (puzzling)->unintended side-effect *p-rxns-mapped-to-ecocyc* : 377 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 630 *p-rnxs-unmapped-exchange* : 302 ;;;kr:Mar-20-2006 After a fixing the problem that temporarily interfered with fuzzy rule processing... *p-rxns-mapped-to-ecocyc* : 658 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 350 *p-rnxs-unmapped-exchange* : 301 ;;;kr:Mar-21-2006 After applying fuzzy strategies to both rxn sides simultaneously. ;;; The rxn mapping now runs noticeably slower, but it found 30 more tricky cases. *p-rxns-mapped-to-ecocyc* : 688 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 320 *p-rnxs-unmapped-exchange* : 301 ;;;kr:Mar-21-2006 Taking into acct. finding rxns in metacyc (but not yet additional cpds) *p-rxns-mapped-to-ecocyc* : 688 *p-rxns-mapped-to-metacyc* : 48 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 273 *p-rnxs-unmapped-exchange* : 300 ;;;kr:Mar-22-2006 Taking into acct. finding cpds and rxns in metacyc *p-rxns-mapped-to-ecocyc* : 688 *p-rxns-mapped-to-metacyc* : 61 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 337 ;; seems like a slight routing error... *p-rnxs-unmapped-exchange* : 334 ;; seems like a slight routing error... ;;;kr:Mar-24-2006 It's now back to fast mapping. :-) ;;;kr:Mar-24-2006 After clearing my dcache that contained some hickups... *p-rxns-mapped-to-ecocyc* : 689 *p-rxns-mapped-to-metacyc* : 61 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 336 *p-rnxs-unmapped-exchange* : 334 ;;;kr:Apr-6-2006 *p-rxns-mapped-to-ecocyc* : 691 *p-rxns-mapped-to-metacyc* : 59 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 336 *p-rnxs-unmapped-exchange* : 334 (dump-gene-rxn-report *p-rnxs-otherwise-unmapped* "~kr/unmapped-rxns-1.csv") ||# #|| ;;;kr:May-11-2006 (so 'ecoli "10.1") (index-ekb) (so 'meta "10.1") (index-ekb) (so 'ecoli) (sri:fload "/homedir/kr/ecocyc/metabology/palsson/model-comparison-palsson-ecocyc") EC(8): (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv] Warning: For "sucgsa", the ecocyc-id N2-SUCCINYLGLUTAMIC-SEMIALDEHYDE was not coercible to a frame. Warning: For "rhcys", the ecocyc-id CPD-561 was not coercible to a frame. Warning: For "fldox", the ecocyc-id OX-FLAVODOXIN was not coercible to a frame. Warning: For "ag", the ecocyc-id CPD-1485 was not coercible to a frame. Warning: For "fldrd", the ecocyc-id RED-FLAVODOXIN was not coercible to a frame. Warning: For "acg5sa", the ecocyc-id N-ACETYLGLUTAMATE_SEMIALDEHYDE was not coercible to a frame. (map-palsson-gpr-file "/homedir/kr/ecocyc/metabology/palsson/FBA_model-11-9-05-rxn-gpr.csv") *p-rxns-mapped-to-ecocyc* : 688 *p-rxns-mapped-to-metacyc* : 60 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 338 *p-rnxs-unmapped-exchange* : 334 ;;; the above 10.1 benchmark run was before code any changes... ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;kr:May-12-2006 After fuzzy strategies were applicable to cpd-specs too: *p-rxns-mapped-to-ecocyc* : 710 *p-rxns-mapped-to-metacyc* : 54 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 329 *p-rnxs-unmapped-exchange* : 327 ;;;kr:May-22-2006 After fixing 2 bugs in (find-rxns-by-substrates ...) regarding class matches: *p-rxns-mapped-to-ecocyc* : 750 *p-rxns-mapped-to-metacyc* : 56 *p-rxns-with-unmapped-cpds* : 543 *p-rnxs-otherwise-unmapped* : 291 ;; improvement so far: (- 338 291) ==> 47 *p-rnxs-unmapped-exchange* : 323 ||# #|| ;;;kr:June-16-2006 After workshop with Jeremy, etc. (so 'ecoli "10.1") (index-ekb) (so 'meta "10.1") (index-ekb) (so 'ecoli) (sri:fload "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/ptools/model-comparison-palsson-ecocyc") (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv") [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv] (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt") *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 738 *p-rxns-mapped-to-metacyc* : 51 *p-rxns-with-unmapped-cpds* : 930 *p-rnxs-otherwise-unmapped* : 292 *p-rnxs-unmapped-exchange* : 333 (defparameter *p-rxns-transport* (loop for p-r in *p-rxns-all* when (search "transport" (palsson-rxn-officialName p-r) :test #'char-equal) collect p-r) ) EC(17): (length *P-RXNS-TRANSPORT*) 616 EC(18): (dump-gene-rxn-report *P-RXNS-TRANSPORT* "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/transport-rxn-clusters-2006-06-18-iAF1237.txt") ||# #|| ;;;kr:Jul-20-2006 Now, there have been many schema/code changes, so I'll try using the most current versions; (so 'ecoli) (index-ekb) (so 'meta) (index-ekb) (so 'ecoli) (sri:fload "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/ptools/model-comparison-palsson-ecocyc") (defparameter *palsson-cpd-list* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/cpds-6-16-06-iAF1237.csv") ) 1000 [Processed 1037 data rows from file /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/cpds-6-16-06-iAF1237.csv] Warning: CAS ID "147-81-9" pulled out more than one cpd, namely: (L-ARABINOSE ARABINOSE) Warning: KEGG NIL pulled out NIL , whereas CAS "147-81-9" pulled out MULTIPLE-HITS Warning: CAS ID "65207-12-7" pulled out more than one cpd, namely: (DIHYDROSIROHYDROCHLORIN SIROHYDROCHLORIN) Warning: KEGG NIL pulled out NIL , whereas CAS "65207-12-7" pulled out MULTIPLE-HITS Warning: CAS ID "20298-95-7" pulled out more than one cpd, namely: (CPD-618 CPD-534) Warning: KEGG NIL pulled out NIL , whereas CAS "20298-95-7" pulled out MULTIPLE-HITS Warning: CAS ID "53-84-9" pulled out more than one cpd, namely: (NAD NADH) Warning: KEGG NIL pulled out NIL , whereas CAS "53-84-9" pulled out MULTIPLE-HITS Warning: CAS ID "1339-63-5" pulled out more than one cpd, namely: (UBIQUINONE-8 UBIQUINONE-10) Warning: KEGG NIL pulled out NIL , whereas CAS "1339-63-5" pulled out MULTIPLE-HITS |modified-charged-selC-tRNA| : L-selenocysteinyl-tRNAsec |Charged-SEC-tRNAs| : L-selenocysteinyl-tRNAsec Warning: Xt: Name: nil Class: XmRowColumn XtGrabPointer failed. |charged-selC-tRNA| : L-seryl-tRNAsec |L-seryl-SEC-tRNAs| : L-seryl-tRNAsec Warning: Xt: Name: nil Class: XmRowColumn XtGrabPointer failed. ;;; see above... EC(13): (length *UNMAPPED-PALSSON-CPDS*) 360 (defparameter *060114-palsson-cpd-hash-table* (make-hash-table :test #'equalp)) (defparameter *060114-palsson-cpd-list* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv" :ht *060114-palsson-cpd-hash-table*) ) [Processed 886 data rows from file /homedir/kr/ecocyc/metabology/palsson/ecocyc-060114-cpds-FBA-11-9-05.csv] ;;; which cpds in *UNMAPPED-PALSSON-CPDS* did have a mapping in *060114-palsson-cpd-list* ?? (defparameter *palsson-cpds-mapped-from-060114* (loop for pc in *UNMAPPED-PALSSON-CPDS* for abbrev = (palsson-cpd-abbreviation pc) for old-pc = (gethash abbrev *060114-palsson-cpd-hash-table*) when (and old-pc (or (palsson-cpd-ecocyc-id old-pc) (palsson-cpd-metacyc-id old-pc) ) (progn ;; if we get down here, we found something and should (destructively) transfer it: (when (palsson-cpd-ecocyc-id old-pc) (setf (palsson-cpd-ecocyc-id pc) (palsson-cpd-ecocyc-id old-pc)) ) (when (palsson-cpd-metacyc-id old-pc) (setf (palsson-cpd-metacyc-id pc) (palsson-cpd-metacyc-id old-pc)) ) ;; make sure the and won't fail down here: t) ) collect pc) ) EC(31): (length *PALSSON-CPDS-MAPPED-FROM-060114*) 75 ;;; see above... EC(37): (length *STILL-UNMAPPED-PALSSON-CPDS*) 285 (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt") ... *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 765 *p-rxns-mapped-to-metacyc* : 48 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 548 *p-rnxs-unmapped-exchange* : 329 ;;; prioritize unmapped cpds according to how often they are used: (defparameter *unmapd-stats-ht* (make-hash-table :test #'equalp)) (loop for pr in *p-rxns-with-unmapped-cpds* for left-cpds = (palsson-rxn-left-cpds pr) for right-cpds = (palsson-rxn-right-cpds pr) do (dolist (abb left-cpds) (when (stringp abb) (add-to-hash-slot pr abb *unmapd-stats-ht*) ) ) (dolist (abb right-cpds) (when (stringp abb) (add-to-hash-slot pr abb *unmapd-stats-ht*) ) ) ) ;;; from /homedir/kr/ecocyc/utils.lisp (hash-table-multiple-value-stats *unmapd-stats-ht* :sort-fn #'> :sort-which-p :val) EC(59): (length *) 526 (??)is it overcounting because some stoichs are stored in the strings?? (dotimes (i 100) (format t "~S~%" (elt ** i)) ) ==>saved in: /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/most-used-unmapped-cpds-060721.txt (dump-palsson-cpds-to-tab-delimited-file (remove nil (map 'list #'(lambda (abb-pair) (let* ((str (first abb-pair)) (pos (position #\Space str)) ) (when pos (setf str (subseq str (1+ pos))) ) (gethash str *palsson-cpds-ht*) ) ) (subseq (hash-table-multiple-value-stats *unmapd-stats-ht* :sort-fn #'> :sort-which-p :val) 0 100) ) ) "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/most-used-unmapped-cpds-060721.csv" ) ;;;kr:Jul-27-2006 saved in: /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/most-used-unmapped-cpds-remainder-060727.txt (map nil #'(lambda (x) (format t "~S~%" x)) (subseq (hash-table-multiple-value-stats *unmapd-stats-ht* :sort-fn #'> :sort-which-p :val) 100) ) ||# #||;;;kr:Aug-4-2006 kr:Aug-16-2006 kr:Aug-23-2006 (so 'ecoli "10.5") (index-ekb) (so 'meta "10.5") (index-ekb) (so 'ecoli) (sri:fload "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/ptools/hand-cpd-assignments") (sri:fload "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/ptools/model-comparison-palsson-ecocyc") ;;; This reads in and tries to map the cpds from AF's spreadsheet: (defparameter *palsson-cpd-list* (let* ((*ignore-interactive-duplicate-cpd-resolution* t)) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/cpds-6-16-06-iAF1237.csv") ) ) EC(12): (length *PALSSON-CPD-LIST*) 1036 EC(13): (defparameter *unmapped-palsson-cpds* (loop for p-cpd being each hash-key in *palsson-cpds-ht* using (hash-value pals-struct) unless (or (palsson-cpd-ecocyc-id pals-struct) (palsson-cpd-metacyc-id pals-struct) ) collect pals-struct) ) *UNMAPPED-PALSSON-CPDS* EC(14): (length *UNMAPPED-PALSSON-CPDS*) 191 EC(15): (defparameter *060114-palsson-cpd-hash-table* (make-hash-table :test #'equalp)) (defparameter *060114-palsson-cpd-list* (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/ecocyc-060114-cpds-FBA-11-9-05.csv" :ht *060114-palsson-cpd-hash-table*) ) ;;; Which cpds in *UNMAPPED-PALSSON-CPDS* did have a mapping in *060114-palsson-cpd-list* ?? ;;; Transfer old mappings that were located. ;;; (defparameter *palsson-cpds-mapped-from-060114* (loop for pc in *UNMAPPED-PALSSON-CPDS* for abbrev = (palsson-cpd-abbreviation pc) for old-pc = (gethash abbrev *060114-palsson-cpd-hash-table*) when (and old-pc (or (palsson-cpd-ecocyc-id old-pc) (palsson-cpd-metacyc-id old-pc) ) (progn ;; if we get down here, we found something and should (destructively) transfer it: (when (palsson-cpd-ecocyc-id old-pc) (setf (palsson-cpd-ecocyc-id pc) (palsson-cpd-ecocyc-id old-pc)) ) (when (palsson-cpd-metacyc-id old-pc) (setf (palsson-cpd-metacyc-id pc) (palsson-cpd-metacyc-id old-pc)) ) ;;kr:Aug-23-2006 We need to rescue (and convert) the old analysis tags too: (setf (palsson-cpd-analysis pc) (map-old-style-cpd-analysis-to-new (palsson-cpd-analysis old-pc))) ;; make sure the (and ...) won't fail down here: t) ) collect pc) ) ;;; How many old mappings were located ? EC(25): (length *PALSSON-CPDS-MAPPED-FROM-060114*) 75 EC(26): (defparameter *still-unmapped-palsson-cpds* (loop for p-cpd being each hash-key in *palsson-cpds-ht* using (hash-value pals-struct) unless (or (palsson-cpd-ecocyc-id pals-struct) (palsson-cpd-metacyc-id pals-struct) ) collect pals-struct) ) *STILL-UNMAPPED-PALSSON-CPDS* EC(27): (length *STILL-UNMAPPED-PALSSON-CPDS*) 116 EC(28): ;;;kr:Aug-23-2006 Final spreadsheet that maps the cpds: (dump-palsson-cpds-to-tab-delimited-file (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1237-ecocyc-cpd-mappings-060823.csv" ) ;;;kr:Aug-23-2006 simple stats: EC(48): (count :manual (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 245 EC(49): (count :metacyc (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 60 EC(50): (count :i-o-c (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 133 EC(51): (count :dispute (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 4 EC(52): (count :protein-instance (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 1 EC(53): (count :polymer-section (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 1 EC(54): ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;kr:Aug-24-2006 rxn mapping ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; no code changes: ;;; Reload the cpd mappings that were stored: (defparameter *palsson-cpd-list* (let* ((*ignore-interactive-duplicate-cpd-resolution* t)) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1237-ecocyc-cpd-mappings-060823.csv") ) ) ;;; previously: (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt") [Processed 2345 data rows from file /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt] ... *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 765 *p-rxns-mapped-to-metacyc* : 48 *p-rxns-with-unmapped-cpds* : 654 *p-rnxs-otherwise-unmapped* : 548 *p-rnxs-unmapped-exchange* : 329 ;;;kr:Aug-24-2006 now: [Processed 2345 data rows from file /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt] ... *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 891 *p-rxns-mapped-to-metacyc* : 58 *p-rxns-with-unmapped-cpds* : 313 ;; this got noticeably reduced ! *p-rnxs-otherwise-unmapped* : 735 *p-rnxs-unmapped-exchange* : 347 NIL EC(33): ;;;kr:Aug-24-2006 Tightened the definition of simple exchange rxns. ;;; Find the wrongly classified ones with the new definition: (loop for pc in *p-rnxs-unmapped-exchange* unless (palsson-rxn-is-exchange-p pc) collect pc) (length *) ==> 64 transporters !! ;;;kr:Aug-25-2006 with some code changes: ;;; Now, class-instance relations are taken into account, by adding the :expanded-rxn-match tag. ;;; *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 1012 *p-rxns-mapped-to-metacyc* : 64 *p-rxns-with-unmapped-cpds* : 313 *p-rnxs-otherwise-unmapped* : 691 *p-rnxs-unmapped-exchange* : 264 ; cpu time (non-gc) 4,568,940 msec (01:16:08.940) user, 1,750 msec system ; cpu time (gc) 7,600 msec user, 10 msec system ; cpu time (total) 4,576,540 msec (01:16:16.540) user, 1,760 msec system ; real time 4,579,157 msec (01:16:19.157) ; space allocation: ; 1,883,729,846 cons cells, 1,553,620,360 other bytes, 260,716 static bytes NIL EC(83): ;;;kr:Aug-27-2006 with class-instance relations, after some speed-up, and after ;;; hand-fixing 2 cpd mappings: (time (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-6-16-06-iAF1237.txt")) *p-rxns-all* : 2344 *p-rxns-mapped-to-ecocyc* : 1015 *p-rxns-mapped-to-metacyc* : 64 *p-rxns-with-unmapped-cpds* : 313 *p-rnxs-otherwise-unmapped* : 688 *p-rnxs-unmapped-exchange* : 264 ; cpu time (non-gc) 978,740 msec (00:16:18.740) user, 140 msec system ; cpu time (gc) 7,370 msec user, 40 msec system ; cpu time (total) 986,110 msec (00:16:26.110) user, 180 msec system ; real time 986,435 msec (00:16:26.435) ; space allocation: ; 316,758,811 cons cells, 399,320,088 other bytes, 0 static bytes NIL EC(12): ;;;kr:Aug-27-2006 Final spreadsheet that maps the rxns: (dump-palsson-rxns-to-tab-delimited-file *p-rxns-all* "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1237-ecocyc-rxn-mappings-060827.csv" ) ||# #|| ;;;kr:Aug-28-2006 some rxn stats EC(14): (count :metacyc *p-rxns-all* :key #'palsson-rxn-analysis :test #'find) 64 ;;;kr:Aug-28-2006 Can we get a "spectral analysis" of all the differing tags used ? : (let* ((ht (make-hash-table :test #'equal))) (loop for pc in *p-rxns-all* for analysis = (palsson-rxn-analysis pc) when analysis do (dolist (an-entry analysis) (add-to-hash-slot (palsson-rxn-abbreviation pc) an-entry ht) ) ) (dump-hash-table ht) ) EC(16): (map 'list #'(lambda (sublist) (list (first sublist) (length (second sublist)))) *) ((:METACYC 64) ((LEFT REMOVE PROTON) 164) ((LEFT SUBSTITUTE AMMONIA AMMONIUM) 5) (:EXPANDED-RXN-MATCH 132) ((RIGHT SUBSTITUTE AMMONIA AMMONIUM) 33) ((RIGHT REMOVE PROTON) 298) (:EXCHANGE 264) (:UNMAPPED-CPDS 313)) EC(17): ;;;kr:Aug-28-2006 Let's collect all the rxn ids from EcoCyc that were mapped to, so we can see which rxns ;;; are left in EcoCyc that were not utilized. ;;; However, this is probably a large number and not very useful, given so many unmapped rxns in general.... ;;; So: not done... (loop for pc in *p-rnxs-otherwise-unmapped* unless (palsson-rxn-has-non-default-compartments-p pc) collect pc) ==> 271 which all have the default-compartment ||# #||;;;kr:Aug-29-2006 transitioning over to the "final" model iAF1243 ;;; Reload the previous cpd mappings that were stored: (defparameter *iAF1237-palsson-cpd-hash-table* (make-hash-table :test #'equalp)) (defparameter *iAF1237-palsson-cpd-list* (let* ((*ignore-interactive-duplicate-cpd-resolution* t)) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1237-ecocyc-cpd-mappings-060823.csv" :ht *iAF1237-palsson-cpd-hash-table*) ) ) ;;;kr:Aug-29-2006 Load the brand new cpd sheet: ;;; This is lacking columns KEGG-cmpd-ID and notes (defparameter *palsson-cpd-list* (let* ((*ignore-interactive-duplicate-cpd-resolution* t)) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/cpds-iAF1243.csv") ) ) EC(25): (length *PALSSON-CPD-LIST*) 1038 EC(26): (length *iAF1237-palsson-cpd-list*) 1036 ;;;kr:Aug-29-2006 We should compare, and rescue over any information that we would not otherwise get: (dolist (pc *palsson-cpd-list*) (let* ((abbreviation (palsson-cpd-abbreviation pc)) (prev-pc (gethash abbreviation *iAF1237-palsson-cpd-hash-table*)) ) (if prev-pc (progn (unless (equalp (palsson-cpd-officialName pc) (palsson-cpd-officialName prev-pc)) (warn "~S differs in its officialName . iAF1243: ~S iAF1237: ~S" abbreviation (palsson-cpd-officialName pc) (palsson-cpd-officialName prev-pc)) ) (when (palsson-cpd-KEGG-cmpd-ID prev-pc) ;; silently transfer (setf (palsson-cpd-KEGG-cmpd-ID pc) (palsson-cpd-KEGG-cmpd-ID prev-pc)) ) (when (palsson-cpd-notes prev-pc) ;; silently transfer (setf (palsson-cpd-notes pc) (palsson-cpd-notes prev-pc)) ) ;; ecocyc-id (if (palsson-cpd-ecocyc-id pc) (when (palsson-cpd-ecocyc-id prev-pc) (unless (eql (palsson-cpd-ecocyc-id pc) (palsson-cpd-ecocyc-id prev-pc)) (warn "~S differs in its ecocyc-id . iAF1243: ~S iAF1237: ~S" abbreviation (palsson-cpd-ecocyc-id pc) (palsson-cpd-ecocyc-id prev-pc)) ) ) (when (palsson-cpd-ecocyc-id prev-pc) ;; transfer (setf (palsson-cpd-ecocyc-id pc) (palsson-cpd-ecocyc-id prev-pc)) (warn "~S had ecocyc-id ~S transferred from iAF1237" abbreviation (palsson-cpd-ecocyc-id prev-pc)) ) ) ;; metacyc-id (if (palsson-cpd-metacyc-id pc) (when (palsson-cpd-metacyc-id prev-pc) (unless (eql (palsson-cpd-metacyc-id pc) (palsson-cpd-metacyc-id prev-pc)) (warn "~S differs in its metacyc-id . iAF1243: ~S iAF1237: ~S" abbreviation (palsson-cpd-metacyc-id pc) (palsson-cpd-metacyc-id prev-pc)) ) ) (when (palsson-cpd-metacyc-id prev-pc) ;; transfer (setf (palsson-cpd-metacyc-id pc) (palsson-cpd-metacyc-id prev-pc)) (warn "~S had metacyc-id ~S transferred from iAF1237" abbreviation (palsson-cpd-metacyc-id prev-pc)) ) ) ;; analysis (if (palsson-cpd-analysis pc) (when (palsson-cpd-analysis prev-pc) (unless (equal (palsson-cpd-analysis pc) (palsson-cpd-analysis prev-pc)) (warn "~S differs in its analysis . iAF1243: ~S iAF1237: ~S" abbreviation (palsson-cpd-analysis pc) (palsson-cpd-analysis prev-pc)) ) ) (when (palsson-cpd-analysis prev-pc) ;; transfer (setf (palsson-cpd-analysis pc) (palsson-cpd-analysis prev-pc)) (warn "~S had analysis ~S transferred from iAF1237" abbreviation (palsson-cpd-analysis prev-pc)) ) ) ) (warn "~S did not pull up a cpd from iAF1237" abbreviation) ) ) ) ;;;oops, hand-fixed: Warning: "dtdp4addg" had metacyc-id CPD-472 transferred from iAF1237 EC(18): (gethash "dtdp4addg" *palsson-cpds-ht*) #S(PALSSON-CPD :ABBREVIATION "dtdp4addg" :OFFICIALNAME "dTDP-4-amino-4,6-dideoxy-D-galactose" :FORMULA "C16H26N3O14P2" :CHARGE -1 :CASNUMBER NIL :KEGG-CMPD-ID NIL :NOTES NIL :ECOCYC-ID TDP-D-FUCOSAMINE :ANALYSIS (:MANUAL) :METACYC-ID CPD-472) T EC(19): (setf (PALSSON-CPD-METACYC-ID (gethash "dtdp4addg" *palsson-cpds-ht*)) nil) NIL ;;;kr:Aug-29-2006 simple stats: EC(22): (count :manual (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 245 EC(23): (count :metacyc (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) :key #'palsson-cpd-analysis :test #'find) 58 ;;;kr:Aug-29-2006 Final spreadsheet that maps the cpds: (dump-palsson-cpds-to-tab-delimited-file (map 'list #'second (dump-hash-table *palsson-cpds-ht*)) "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1243-ecocyc-cpd-mappings.csv" ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;kr:Aug-29-2006 rxn mapping ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Reload the cpd mappings that were stored: (defparameter *palsson-cpd-list* (let* ((*ignore-interactive-duplicate-cpd-resolution* t)) (populate-palsson-cpds-ht "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1243-ecocyc-cpd-mappings.csv") ) ) ;;; without code mod. to take into account diffusion vs exchange: (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-iAF1243.csv") [Processed 2367 data rows from file /homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-iAF1243.csv] ... *p-rxns-all* : 2366 *p-rxns-mapped-to-ecocyc* : 1018 *p-rxns-mapped-to-metacyc* : 64 *p-rxns-with-unmapped-cpds* : 317 *p-rnxs-otherwise-unmapped* : 698 *p-rnxs-unmapped-exchange* : 269 NIL EC(33): ;;;kr:Aug-29-2006 After code changes to distinguish between :diffusion and :exchange , ;;; according to the suggestion and correction by Adam Feist. (time (map-palsson-gpr-file "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/GPR-iAF1243.csv")) ... *p-rxns-all* : 2366 *p-rxns-mapped-to-ecocyc* : 987 *p-rxns-mapped-to-metacyc* : 64 *p-rxns-with-unmapped-cpds* : 294 *p-rnxs-otherwise-unmapped* : 452 *p-rnxs-unmapped-exchange* : 300 *p-rnxs-unmapped-diffusion* : 269 ; cpu time (non-gc) 985,310 msec (00:16:25.310) user, 400 msec system ; cpu time (gc) 2,850 msec user, 0 msec system ; cpu time (total) 988,160 msec (00:16:28.160) user, 400 msec system ; real time 988,923 msec (00:16:28.923) ; space allocation: ; 307,766,289 cons cells, 399,349,696 other bytes, 0 static bytes NIL EC(37): ;;;kr:Aug-29-2006 Final spreadsheet that maps the rxns: (dump-palsson-rxns-to-tab-delimited-file *p-rxns-all* "/homedir/kr/ecocyc/jeremy-svn/lsw/trunk/bug/iAF1237/iAF1243-ecocyc-rxn-mappings.csv" ) ;;;kr:Aug-28-2006 Can we get a "spectral analysis" of all the differing tags used ? : (let* ((ht (make-hash-table :test #'equal))) (loop for pc in *p-rxns-all* for analysis = (palsson-rxn-analysis pc) when analysis do (dolist (an-entry analysis) (add-to-hash-slot (palsson-rxn-abbreviation pc) an-entry ht) ) ) (dump-hash-table ht) ) EC(40): (map 'list #'(lambda (sublist) (list (first sublist) (length (second sublist)))) *) ((:METACYC 64) ((LEFT REMOVE PROTON) 164) ((LEFT SUBSTITUTE AMMONIA AMMONIUM) 5) (:EXPANDED-RXN-MATCH 113) ((RIGHT SUBSTITUTE AMMONIA AMMONIUM) 33) (:DIFFUSION 269) ((RIGHT REMOVE PROTON) 299) (:EXCHANGE 300) (:UNMAPPED-CPDS 294)) EC(41): ||# #||;;;kr:Aug-24-2006 reaction mapping questions. in GPR-6-16-06-iAF1237.txt ;;; how can diffusion be irreversible ?? - 14GLUCANtexi 1,4-alpha-D-glucan transport via diffusion (extracellular to periplasm) irreversible 14glucan[e] --> 14glucan[p] - (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "h[p] + pyr[p] <==> h[c] + pyr[c]" "PYRt2rpp")) ;;;kr:Aug-27-2006 AF does not associate any gene with this, so where is the evidence this even exists ? - (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : atp + gtp + h2o + so4 --> aps + gdp + pi + ppi" "SADT2")) ;;;kr:Aug-27-2006 in ecocyc, the rxn has no gtp hydrolysis, but a comment says that gtp hydrolysis ;;; seems to increase activity. - (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : (2) o2 + q8h2 --> (2) h + (2) o2s + q8" "QMO2")) (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : mql8 + (2) o2 --> (2) h + mqn8 + (2) o2s" "QMO3")) ;;;kr:Aug-27-2006 in ecocyc, ygiN is only linked to the rxn with menadiol... - (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "(2) h[p] + succ[p] --> (2) h[c] + succ[c]" "SUCCt2_2pp")) ;;;kr:Aug-27-2006 in ecocyc, dctA is described as a Na+ synporter. so where does the evidence of ;;; the 2 H+ come from ?? - (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : accoa + dtdp4addg --> coa + dtdp4aaddg + h" "TDPADGAT")) ;;;kr:Aug-27-2006 there seems to be a discrepancy between dtdp4addg and dtdp4aaddg . ;;; the former refers to glucose (which is why it got mapped to CPD-472 , which for the rxn ;;; should probably have been TDP-D-FUCOSAMINE instead), whereas the latter refers to galactose. ;;; one of them can not be right. - mismatch: (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : (2) h2o + o2 + urate --> alltn + co2 + h2o2" "URIC")) ;;; probably should map to RXN-7610 1.7.3.3 , but left side is lacking 2 H2O , ;;; and BALANCE-STATE: :UNBALANCED-UNKNOWN !!! ;;==> corresponds to several rxns... ;;; The real issue seems to be that the AF rxn is a summary of about 3 separate rxns, ;;; namely 1.7.3.3 to 5-HYDROXYISOURATE , ;;; and then 3.5.2.17 to CPD-5821 "5-hydroxy-2-oxo-4-ureido-2,5-dihydro-1H imidazole-5-carboxylate", ;;; which then spontaneously decarboxylates. ||# #|| ;;;kr:Aug-27-2006 sources of mismatches: (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : nad + pphn --> 34hpp + co2 + nadh" "PPND")) ;;; 34hpp matched to 3-4-HYDROXYPHENYLPYRUVATE , whichy appears to be a duplicate of ;;; P-HYDROXY-PHENYLPYRUVATE , which is what is used in PREPHENATEDEHYDROG-RXN ;;; remedy: merge 3-4-HYDROXYPHENYLPYRUVATE into P-HYDROXY-PHENYLPYRUVATE , while ;;; keeping the structure of 3-4-HYDROXYPHENYLPYRUVATE ;;;kr:Aug-27-2006 performed this merger in metacyc 10.6 . ;;; also edited the manual assignment (removed :metacyc) in iAF1237-ecocyc-cpd-mappings-060823.csv ;;;kr:Aug-27-2006 didn't match because of old, wrong hand assignment of trnasecys to |Charged-SEC-tRNAs| (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : atp + ser-L + trnasecys --> amp + ppi + sertrna(sec)" "SERTRS2")) ;;; edited the manual assignment (to |SEC-tRNAs|) in iAF1237-ecocyc-cpd-mappings-060823.csv ;;;kr:Aug-27-2006 AF labels this as periplasmic. in eocyc, the exact rxn is happening in cytoplasm. (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[p] : h2o + pser-L --> pi + ser-L" "PSP_Lpp")) ;;; this appears to be a special case of generic rxn 3.1.3.2 aphA ... ;;; same issue with: (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[p] : h2o + thrp --> pi + thr-L" "PTHRpp")) ;;; same issue with: (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[p] : h2o + r5p --> pi + rib-D" "R5PPpp")) ;;;kr:Aug-27-2006 AF calls this anaerobic and includes fum . ecocyc comment says it is "coupled" to fum. ;;; hemG 1.3.3.4 . in ecocyc, there is probably only the aerobic rxn. (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : (3) fum + pppg9 --> ppp9 + (3) succ" "PPPGO3")) ;;;kr:Aug-27-2006 in ecocyc, hcaT is putative, so no rxn eqn. (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "h[p] + pppn[p] <==> h[c] + pppn[c]" "PPPNt2rpp")) ;;;kr:Aug-27-2006 in ecocyc, ian did not include this antiporter eqn. for potE : (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "orn[c] + ptrc[p] <==> orn[p] + ptrc[c]" "PTRCORNt7pp")) ;;;kr:Aug-27-2006 in ecocyc, iubmb, and kegg, there is a water on the right, ;;; instead of the proton, which might in fact be wrong... (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : selnp + sertrna(sec) --> h + pi + sectrna" "SELCYSS")) ||# #|| ;;;kr:Aug-24-2006 EC(97): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[p] : 23camp + h2o --> 3amp + h" "23PDE7pp")) NIL EC(93): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[p] : 23cump + h2o --> 3ump + h" "23PDE2pp")) NIL ;;; These rxns should be made to match CYCPHOSDIESTER-RXN , because ;;; |Cyclic-2-3-Ribonucleoside-Monophosphates| has 23camp and 23cump as instances, and ;;; |3-Prime-Ribonucleoside-Monophosphates| has 3amp and 3ump as instances (in MetaCyc). ;;; (but also, a proton is missing on the right side of CYCPHOSDIESTER-RXN , and the compartments are different...) (FUZZY-FIND-RXN-BY-SUBSTRATES '((:class |Cyclic-2-3-Ribonucleoside-Monophosphates| CCO-PERI-BAC) (:INSTANCE WATER CCO-PERI-BAC)) '((:class |3-Prime-Ribonucleoside-Monophosphates| CCO-PERI-BAC) (:INSTANCE PROTON CCO-PERI-BAC))) ==>nil EC(22): (FIND-RXN-BY-SUBSTRATES '(|Cyclic-2-3-Ribonucleoside-Monophosphates| WATER) '(|3-Prime-Ribonucleoside-Monophosphates|)) 0[2]: (FIND-RXN-BY-SUBSTRATES (|Cyclic-2-3-Ribonucleoside-Monophosphates| WATER) (|3-Prime-Ribonucleoside-Monophosphates|)) 0[2]: returned NIL NIL EC(23): EC(26): (FIND-RXN-BY-SUBSTRATES '((:CLASS |Cyclic-2-3-Ribonucleoside-Monophosphates| CCO-PERI-BAC) (:INSTANCE WATER CCO-PERI-BAC)) '((:CLASS |3-Prime-Ribonucleoside-Monophosphates| CCO-PERI-BAC))) 0[2]: (FIND-RXN-BY-SUBSTRATES ((:CLASS |Cyclic-2-3-Ribonucleoside-Monophosphates| CCO-PERI-BAC) (:INSTANCE WATER CCO-PERI-BAC)) ((:CLASS |3-Prime-Ribonucleoside-Monophosphates| CCO-PERI-BAC))) 0[2]: returned NIL NIL EC(27): EC(24): (FIND-RXN-BY-SUBSTRATES '((:CLASS |Cyclic-2-3-Ribonucleoside-Monophosphates| CCO-CYTOSOL) WATER) '((:CLASS |3-Prime-Ribonucleoside-Monophosphates| CCO-CYTOSOL))) 0[2]: (FIND-RXN-BY-SUBSTRATES ((:CLASS |Cyclic-2-3-Ribonucleoside-Monophosphates| CCO-CYTOSOL) WATER) ((:CLASS |3-Prime-Ribonucleoside-Monophosphates| CCO-CYTOSOL))) 0[2]: returned (CYCPHOSDIESTER-RXN) (CYCPHOSDIESTER-RXN) EC(25): EC(29): (FIND-RXN-BY-SUBSTRATES '((:INSTANCE CPD-3725 CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL)) '((:INSTANCE CPD-3724 CCO-CYTOSOL)) ) 0[2]: (FIND-RXN-BY-SUBSTRATES ((:INSTANCE CPD-3725 CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL)) ((:INSTANCE CPD-3724 CCO-CYTOSOL))) 1[2]: (ASSEMBLE-CPD-BASED-RXN-LIST (CPD-3725 WATER CPD-3724)) 1[2]: returned NIL 0[2]: returned NIL NIL EC(30): EC(30): (FIND-RXN-BY-SUBSTRATES '((:INSTANCE CPD-3725 CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL)) '((:INSTANCE CPD-3724 CCO-CYTOSOL)) :rxn-list '(CYCPHOSDIESTER-RXN)) 0[2]: (FIND-RXN-BY-SUBSTRATES ((:INSTANCE CPD-3725 CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL)) ((:INSTANCE CPD-3724 CCO-CYTOSOL)) :RXN-LIST (CYCPHOSDIESTER-RXN)) 0[2]: returned (CYCPHOSDIESTER-RXN) (CYCPHOSDIESTER-RXN) EC(31): (FIND-RXN-BY-SUBSTRATES '((:INSTANCE CPD-3725 CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL)) '((:INSTANCE CPD-3724 CCO-CYTOSOL)) :rxn-list (get-class-all-instances '|Reactions|)) (#) ;;; seems like an erroneous eqn: (parse-palsson-rxn-eqn "[c] : atp + xylu-L --> adp + h + xu5p-D") (parse-palsson-rxn-eqn "[c] : atp + xylu-D --> adp + h + xu5p-D" "XYLK") (parse-palsson-rxn-eqn "h[p] + xylu-L[p] --> h[c] + xylu-L[c]" "XYLUt2pp") ;;;kr:Aug-24-2006 EC(79): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "h[p] + xylu-L[p] --> h[c] + xylu-L[c]" "XYLUt2pp")) NIL (find-rxn-by-substrates (palsson-rxn-left-cpds palsson-rxn) (palsson-rxn-right-cpds palsson-rxn) ) (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "ala-L[p] + atp[c] + h2o[c] --> adp[c] + ala-L[c] + h[c] + pi[c]" "ALAabcpp")) ;; ==> i think we don't have the one with ala ! ;;;kr:Mar-23-2006 why did this not match ? ;;;kr:Mar-24-2006 presumably because we to add a proton, but compartments also need to be specified ;;; for transport-rxns... #S(PALSSON-RXN :LEFT-CPDS (ATP WATER TAURINE) :RIGHT-CPDS (ADP PROTON |Pi| TAURINE) :LEFT-STOICHS (1 1 1) :RIGHT-STOICHS (1 1 1 1) :LEFT-COMPARTMENTS (CCO-CYTOSOL CCO-CYTOSOL CCO-PERI-BAC) :RIGHT-COMPARTMENTS (CCO-CYTOSOL CCO-CYTOSOL CCO-CYTOSOL CCO-CYTOSOL) :IRREV-P T :ABBREVIATION "TAURabcpp" :ECOCYC-RXN-IDS NIL :ANALYSIS NIL :GENEASSOCIATION (B0365 AND B0366 AND B0367)) --- Instance ABC-64-RXN --- Types: OCELOT-GFP::FRAMES :CREATION-DATE: 3192201146 :CREATOR: |jchen| ENZYMATIC-REACTION: ABC-64-ENZRXN LEFT: TAURINE ---COMPARTMENT: CCO-PERI-BAC ATP WATER OFFICIAL-EC?: NIL RIGHT: TAURINE, ADP, |Pi| EC(136): (fuzzy-find-rxn-by-substrates '(ATP WATER TAURINE) '(ADP PROTON |Pi| TAURINE)) NIL ||# #|| ;;;kr:May-15-2006 cpd-class vs. instance mappings... example clusters 12 13 14 16 ushA: [p] : h2o + udpg --> g1p + (2) h + ump a UDP-sugar + H_2_O = UMP + a sugar-1-phosphate UDPSUGARHYDRO-RXN ==> the problem is that there are no cpd instances under a sugar-1-phosphate, and so: EC(29): (nonspecific-forms-of-rxn 'UDPSUGARHYDRO-RXN) NIL EC(30): (specific-forms-of-rxn 'UDPSUGARHYDRO-RXN) NIL EC(31): ;;;kr:May-15-2006 we do not seem to have that rxn, not even in metacyc ! (?) (parse-palsson-rxn-eqn "[c] : akg + tyr-L <==> 34hpp + glu-L" "TYRTA") ||# #|| From: zucker@research.dfci.harvard.edu To: Cc: , , , Subject: Re: FBA model dataset 11-3-05 Date: Fri, 4 Nov 2005 10:06:56 -0500 (EST) Hey Adam, One more question. In the conference call on Wednesday, Jennifer mentioned that some reactions such as abbreviation: ALDD19x officialName: "aldehyde dehydrogenase (phenylacetaldehyde, NAD)") equation: [c] : h2o + nad + pacald --> (2) h + nadh + pac subSystem: Alternate Carbon Metabolism proteinClass: 1.2.1.39 geneAssociation: b1385 equivalentTo: Ecocyc frame-id: PHENDEHYD-RXN and PHENDEHYD-ENZRXN were considered irreversible for physiological reasons even if they were reversible from a thermodynamic perspective. (In EcoCyc the PHENDEHYD-ENZRXN claims this is reversible). In BioPAX, we distinguish between thermodynamic irreversibility and physiological irreversibility. Is there a way to flag those reactions, or should I just assume that any reaction-direction discrepency between EcoCyc and iJR904 implies a physiological irreversibility? Sincerely, Jeremy > Hi Adam, > > This is fantastic! Just a few questions: When there is no > geneAssociation associated with the reaction, should we take this to > mean that the reaction is spontaneous? Also, were you able to find > kegg compound ids, or were the CAS numbers the only Xref you could > extract? > > I'm working on the BioPAX conversion right now. Thanks so much. > > Jeremy > >> All, >> Here is the current model content in a workbook, there >> is a worksheet containing notes on the content in the >> worksheet. There are unique identifiers for each gene >> and compound and GPR associations in terms of the b#s. >> >> All of the reaction/gene notes have to be queried from >> our database from Genomatica (makers of the SimPhney >> reconstruction software) and I should be able to get >> them in the upcoming days. >> >> Let me know if you have any questions or if there is >> anything else that would be helpful to provide. >> >> Adam ||# #|| From: Adam Feist To: zucker@research.dfci.harvard.edu, afeist@ucsd.edu Cc: kr@AI.SRI.COM, keseler@AI.SRI.COM, zucker@research.dfci.harvard.edu, jlreed@ucsd.edu Subject: FBA model dataset 11-4-05 Date: Fri, 4 Nov 2005 11:21:32 -0800 (PST) --0-212013700-1131132092=:27421 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8bit Content-Id: Content-Disposition: inline All, Look below for answers to Jeremy's questions and later for the discussion on reaction reversibility. > Just a few questions: When > there is no > geneAssociation associated with the reaction, > should we take this to > mean that the reaction is spontaneous? In some cases, there are spontaneous reactions. I have included an updated worksheet with the included spontaneous reactions that are NOT transport reactions. Not all transport reactions have gene associations: All exchange reactions 'EX_' reactions are spontaneous (no gene needed) since they are transport between an imaginary systems boundary and the extracellular space around the cell. Additionally, all transport reactions between the extracellular space and periplasm with no gene association are assumed to diffuse through the outer membrane porins (reactions usually marked with a lower case 'tex', standing for tRANSPORT exTRACELLULAR. One more class of transport reactions without an association are metabolites that diffuse through the inner membrane: h2o, co2, etc. have no associated gene since they diffuse. Now, the remaining reactions in the model without association are reactions were the gene(s) responsible have not been characterized (I think you call them orphan reactions). These can be identified by isolating only non-gene associated reactions occuring in one compartment and excluding the listed spontaneous reactions. The transport reactions have to be examined manually. > Also, were > you able to find > kegg compound ids, or were the CAS numbers the only > Xref you could > extract? I need to contact Genomatica to get the KEGG IDs, but I will get them. Hopefully soon. ------ On the topic of reaction reversibility, I think that it is becoming evident that we need to review the reversibility of reactions in the model. In the past, we have used the reversibility data from primary literature and have been conservative (preferring irreversible reactions) when no evidence is prsented. As a start to establishing evidence for reversibility, Jennie suggested identifying the discrepancies between EcoCyc and the model and then letting her have a look at them to see if there was evidence for physiological irreversibility. I would be interested in discussing a method to provide evidence for reaction reversibility for all reactions in the model. Could you let me know what data EcoCyc (or BioPAX) has on reversibility and I can consolidate data the we have in our database as well. Do any of you have further suggestions or insight? Talk to you soon, Adam ||# #|| From: zucker@research.dfci.harvard.edu To: , Cc: , , , Subject: Re: FBA model dataset 11-4-05 Date: Fri, 4 Nov 2005 17:15:25 -0500 (EST) ------=_20051104171525_43569 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8bit Hi Adam, Thanks for getting back to me so quickly. Enclosed is a spreadsheet containing all small-molecule reactions in EcoCyc with reaction-direction and gene-protein-reaction associations. I used Blattner numbers to name the genes, but some genes contained more than one Blattner number, and others didn't have Blattner number at all. The reaction-direction requires some interpretation. There is a flag on the reaction frame that asks whether it is spontaneous. If the flag is true, I put "spontaneous" If the flag is false *and* there is no enzyme that catalyzes the reaction, I put "not spontaneous" If the enzyme that catalyzes the reaction does not have a reaction direction, I put NIL (unknown). Otherwise, it is REVERSIBLE, IRREVERSIBLE-LEFT-TO-RIGHT, or PHYSIOLOGICAL-LEFT-TO-RIGHT Also, I have a few questions about the iJR904 reaction prefixes and suffixes. Is a prefix of EX_ redundant with the suffix (e)? And as far as suffixes, how should I interpret the following? r - rEVERSIBLE? tpp - tRANSPORT DOUBLE pHOSpPHORYLATION? tex - tRANSPORT eXTRACELLULAR texi - tRANSPORT eXTRACELLULAR iRREVERSIBLE? t2rpp - pp x y abcpp - abc TRANSPORT with 2 pHOSpHORYLATIONS? 1 2 3 4 5 s 1r ptspp 1f 2f 3f 4f 5f 6f 7f 8f tonex t2pp _D2 _L L2 19 2x 19x 2y 3y 4 _2 s exs t3ipp 3t8pp 3tex t2_2pp t2_3pp 4rpp 3abcp uabcpp or is it 1uabcpp? tonex or is it 1tonex? t7pp 2abcpp exs abc2pp _EC _1.3 (e) Thanks, Jeremy ||# #|| From: Adam Feist To: zucker@research.dfci.harvard.edu, afeist@ucsd.edu, ght2@york.ac.uk Cc: kr@AI.SRI.COM, keseler@AI.SRI.COM, jlreed@ucsd.edu Subject: Re: FBA model dataset 11-4-05 Date: Fri, 4 Nov 2005 14:53:47 -0800 (PST) --0-1395104267-1131144827=:69444 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8bit Content-Id: Content-Disposition: inline just the question answers: > > Also, I have a few questions about the iJR904 > reaction prefixes and suffixes. > Is a prefix of EX_ redundant with the suffix (e)? EX_ is the prefex for an exchange reaction (a rection property), (e) denotes the loaction of a metabolite, the extracellular space. Some reactions have metabolites from (e) that are not 'EX_' reactions. > And as far as suffixes, how should I interpret the > following? These are general rules that we used, but there are exceptions. Such as some reversible reactions do not have an 'r' after them. I would take some of these distinctions lightly. > r - rEVERSIBLE > tpp - tRANSPORT / pERIpLASM > tex - tRANSPORT eXTRACELLULAR > texi - tRANSPORT / eXTRACELLULAR / iRREVERSIBLE > t2rpp - tRANSPORT / 2 - usually proton antiport / rEVERSIBLE / pERIpLASM > pp - pERIpLASM > x - involves NAD(H) > y - involved NADP(H) > abcpp - abc TRANSPORT / pERIpLASM > s - sPONTANEOUS > ptspp - pts transport / pERIpLASM > tonex - transfer involving the ton complex / exTERNAL > _D - uses D form > _L - uses L form > exs exTRACELLULAR / sPONTANEOUS > u - uptake > _EC - means specific for E. coli (i.e. fatty acids) > _1.3 - a stoichiometry for a reaction - numbers for reactions usually denote different substrates for a similar reaction (such as a nucleoside-triphosphatase using ATP or GTP or TTP ...) - numbers following a lowercase 't' usually denote a different type of transport 1-diffusion 2-antiport 3-simport (these are less reliable) > 1 > 2 > 3 > 4 > 5 The rest are a combination of those given above. I should have sent this before. Adam ||# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Fixed bugs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; #|| ;;;kr:May-15-2006 should resolve to: VALINE--TRNA-LIGASE-RXN (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : atp + trnaval + val-L --> amp + ppi + valtrna" "VALTRS")) ==> NIL ??? ;; all metabolites resolved ok... EC(40): (FIND-RXN-BY-SUBSTRATES '(ATP |VAL-tRNAs| VAL) '(AMP PPI |Charged-VAL-tRNAs| ) :EXACT-SUBSTRATES? nil) NIL EC(41): ;;;kr:Aug-24-2006 Confirmed that this is now ok: EC(74): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : atp + trnaval + val-L --> amp + ppi + valtrna" "VALTRS")) (VALINE--TRNA-LIGASE-RXN) EC(75): ||# #|| ;;;kr:Mar-14-2006 Why does this not match the ecocyc rxn DSERDEAM-RXN ? (parse-palsson-rxn-eqn "[c] : ser-D --> nh4 + pyr" "SERD_D") ;;; ==> because it is AMMONIUM vs. AMMONIA !! ;;;kr:Aug-24-2006 Confirmed that this is now ok: EC(84): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : ser-D --> nh4 + pyr" "SERD_D")) (DSERDEAM-RXN) EC(85): ;;; kr:Mar-20-2006 transport rxns not found because compartments needed to be specified. (parse-palsson-rxn-eqn "pep[c] + sbt-D[p] --> pyr[c] + sbt6p[c]" "SBTptspp") ;;; should be: TRANS-RXN-169 ;;;kr:Aug-24-2006 Confirmed that this is now ok: EC(87): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "pep[c] + sbt-D[p] --> pyr[c] + sbt6p[c]" "SBTptspp")) (TRANS-RXN-169) EC(88): ;;;kr:May-12-2006 Work on fuzzy strategies that also take compartments into account. (trace find-rxn-by-substrates fuzzy-find-rxn-by-substrates find-rxn-by-substrates-w-fuzziness-strategies) ;;; a typical case: (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "ala-L[p] + atp[c] + h2o[c] --> adp[c] + ala-L[c] + h[c] + pi[c]" "ALAabcpp")) ;; ==> i think we don't have the one with ala ! ;;;kr:May-12-2006 After adding (modify-cpd-list-by-strategy ...) which now can apply strategies even to cpd-specs ! EC(50): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "atp[c] + h2o[c] + ile-L[p] --> adp[c] + h[c] + ile-L[c] + pi[c]" "ILEabcpp")) 0[1]: (FUZZY-FIND-RXN-BY-SUBSTRATES ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL))) 1[1]: (FIND-RXN-BY-SUBSTRATES ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL)) :EXACT-SUBSTRATES? T) 1[1]: returned NIL 1[1]: (FIND-RXN-BY-SUBSTRATES-W-FUZZINESS-STRATEGIES (NOOP) (NOOP) ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL))) 2[1]: (FIND-RXN-BY-SUBSTRATES ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL)) :EXACT-SUBSTRATES? T) 2[1]: returned NIL 1[1]: returned NIL 1[1]: (FIND-RXN-BY-SUBSTRATES-W-FUZZINESS-STRATEGIES (NOOP) (REMOVE PROTON) ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL))) 2[1]: (FIND-RXN-BY-SUBSTRATES ((:INSTANCE ATP CCO-CYTOSOL) (:INSTANCE WATER CCO-CYTOSOL) (:INSTANCE ILE CCO-PERI-BAC)) ((:INSTANCE ADP CCO-CYTOSOL) (:INSTANCE ILE CCO-CYTOSOL) (:INSTANCE |Pi| CCO-CYTOSOL)) :EXACT-SUBSTRATES? T) 2[1]: returned (ABC-15-RXN) 1[1]: returned (ABC-15-RXN) ((RIGHT REMOVE PROTON)) 0[1]: returned (ABC-15-RXN) ((RIGHT REMOVE PROTON)) (ABC-15-RXN) EC(51): EC(56): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "atp[c] + h2o[c] + leu-L[p] --> adp[c] + h[c] + leu-L[c] + pi[c]" "LEUabcpp")) (ABC-35-RXN) EC(57): ;;;kr:Aug-24-2006 Should map to PYRROLINECARBREDUCT-RXN 1.5.1.2 (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : 1pyr5c + (2) h + nadph --> nadp + pro-L" "P5CR")) ;;; PYRROLINECARBREDUCT-RXN is written in the opposite direction! ;;; but also in terms of the |NAD(P)H| and |NAD(P)| classes... ;;;kr:Aug-25-2006 Including a second pass that deals with instance-class mappings fixed the problem: EC(57): (palsson-rxn->ecocyc (parse-palsson-rxn-eqn "[c] : 1pyr5c + (2) h + nadph --> nadp + pro-L" "P5CR")) 0[2]: (FUZZY-FIND-RXN-BY-SUBSTRATES ((:INSTANCE |l-delta(1)-pyrroline_5-carboxylate| CCO-CYTOSOL) (:INSTANCE PROTON CCO-CYTOSOL) (:INSTANCE NADPH CCO-CYTOSOL)) ((:INSTANCE NADP CCO-CYTOSOL) (:INSTANCE PRO CCO-CYTOSOL))) 0[2]: returned (PYRROLINECARBREDUCT-RXN) (:EXPANDED-RXN-MATCH) (PYRROLINECARBREDUCT-RXN) EC(58): ||#