#!/usr/bin/python import fluxor2.reader, sets def fill_empty( rxn, headers ): for header in headers: if header not in rxn: rxn[header] = '' return rxn def filter_file( superclass_file, filter_p, args, subclass_file ): rp = fluxor2.reader.FFParser() superclass = rp.parse( superclass_file ) headers = superclass[0] header_template = get_header_template( headers ) out = open( subclass_file, 'w' ) out.write('\t'.join( headers ) + '\n') for rxn in rxns: if apply( filter_p, (rxn, args) ): out.write( '\t'.join( header_template ) % fill_empty( rxn, headers ) + '\n') out.close() return rp.parse( subclass_file ) def unmapped_wo_genes_p(rxn, mapped_rxns ): return rxn['abbreviation'] not in mapped_rxns and ('geneAssociation' not in rxn or rxn['geneAssociation'] == '') def unmatched_genes_to_lisp( superclass_file, lispdir, lisp_name ): rp = fluxor2.reader.FFParser() gp = fluxor2.reader.GeneParser() superclass = rp.parse( superclass_file ) lisp_file = '%s%s.lisp' % (lispdir, lisp_name) out = open( lisp_file , 'w' ) out.write( "(setq *%s* '(" % lisp_name ) gene_rxns = [] for rxn in superclass[1:]: if rxn.has_key( 'geneAssociation' ): genepredicate = gp.parse( rxn['geneAssociation'] ) out.write( '("%s" ( "%s" ) )' % (rxn['abbreviation'], '" "'.join( genepredicate.get_literals() ) ) ) out.write( "))\n" ) out.close() return lisp_file def print_comment_and_rxns_of_gene( comment_of_rxn_gene, comment_and_rxns_of_gene_file ): fp = fluxor2.reader.FFParser() comment_and_rxns_of_gene = {} for gene, rxn in comment_of_rxn_gene: if gene in comment_and_rxns_of_gene: comment_and_rxns_of_gene[gene].append( rxn ) else: comment_and_rxns_of_gene[gene] = [comment_of_rxn_gene[(gene, rxn)], rxn] genes = comment_and_rxns_of_gene.keys() genes.sort() out = open( comment_and_rxns_of_gene_file , 'w' ) out.write('gene\treactions\tcomment\n') for gene in genes: out.write( '%s\t%s\t%s\n' % ( gene, ','.join( comment_and_rxns_of_gene[gene][1:] ), comment_and_rxns_of_gene[gene][0] ) ) out.close() return fp.parse( comment_and_rxns_of_gene_file ) def print_unmapped_rxns_w_genes_comments( genes_wo_ecocyc_rxns_file, unmapped_rxns_w_genes_file, unmapped_rxns_w_genes_comments_file ): unmapped_rxns_w_genes_comments = {} rp = fluxor2.reader.FFParser() genes_wo_ecocyc_rxns = rp.parse( genes_wo_ecocyc_rxns_file ) unmapped_rxns_w_genes = rp.parse( unmapped_rxns_w_genes_file ) for gene in genes_wo_ecocyc_rxns[1:]: for rxn in gene['reactions'].split(',' ): if rxn in unmapped_rxns_w_genes_comments: unmapped_rxns_w_genes_comments[rxn].append( gene ) else: unmapped_rxns_w_genes_comments[rxn] = [gene ] headers = unmapped_rxns_w_genes[0] header_template = get_header_template( headers ) out = open( unmapped_rxns_w_genes_comments_file, 'w' ) out.write( '\t'.join( headers ) + '\n' ) for rxn in unmapped_rxns_w_genes[1:]: if rxn['abbreviation'] in unmapped_rxns_w_genes_comments: notes = [] for gene in unmapped_rxns_w_genes_comments[rxn['abbreviation']]: notes.append( '%s : %s' % ( gene['gene'], gene['comment'] ) ) rxn['notes'] = '\t'.join( notes ) out.write( '\t'.join( header_template ) % fill_empty( rxn, headers ) + '\n' ) out.close() return rp.parse( unmapped_rxns_w_genes_comments_file ) def get_header_template( headers ): return ['%%(%s)s' % header for header in headers] def print_unmapped_rxns_wo_cpd_mappings( rxns_wo_cpds_file, mapped_rxns_wo_cpds_file, unmapped_rxns_wo_cpd_mappings_file ): rp = fluxor2.reader.FFParser() cat3 = rp.parse( rxns_wo_cpds_file ) cat3[0] = headers mapped_cat3 = rp.parse( mapped_rxns_wo_cpds_file ) mapped_cat3_rxns = {} for rxn in mapped_cat3[1:]: mapped_cat3_rxns[rxn['abbreviation']] = rxn out = open( unmapped_rxns_wo_cpd_mappings_file, 'w' ) header_template = get_header_template( headers ) out.write( '\t'.join( headers ) + '\n' ) for rxn in cat3[1:]: if rxn['abbreviation'] not in mapped_cat3_rxns: out.write( '\t'.join( header_template ) % fill_empty( rxn, headers ) + '\n' ) out.close() def match_rxns_by_gene( gene_ecocyc_rxns_file, unmatched_ucsd_rxns_file, ecocyc_ucsd_map, matched_ucsd_rxns_file, all_p=True ): rp = fluxor2.reader.FFParser() gene_ecocyc_rxns = rp.parse( gene_ecocyc_rxns_file ) unmatched_ucsd_rxns = rp.parse( unmatched_ucsd_rxns_file ) headers = unmatched_ucsd_rxns[0] header_template = get_header_template( headers ) matched_ucsd_rxns = open( matched_ucsd_rxns_file, 'w' ) matched_ucsd_rxns.write( '\t'.join( headers ) + '\n' ) for rxn in unmatched_ucsd_rxns[1:]: if rxn.has_key( 'equation' ): ucsd_substrates = parseEqn( rxn['equation'] ) print "UCSD substrates: %s" % ', '.join( ucsd_substrates ) best_score = 0 best_gene = {} difference = sets.Set() ucsd_minus_ecocyc = sets.Set() ecocyc_minus_ucsd = sets.Set() score = 0 ecocyc_substrates = sets.Set() for gene in gene_ecocyc_rxns[1:]: if rxn['abbreviation'].lower() == gene['ucsd-rxn'].lower() and gene.has_key( 'ecocyc-substrates' ): ecocyc_substrates = sets.Set( gene['ecocyc-substrates'][:-1].split( ',' ) ) print "EcoCyc substrates: %s" % ', '.join( ecocyc_substrates ) score = len( ucsd_substrates.intersection( ecocyc_substrates ) ) print "Score %d" % score ecocyc_minus_ucsd = ecocyc_substrates.difference( ucsd_substrates ) ucsd_minus_ecocyc = ucsd_substrates.difference( ecocyc_substrates ) if score >= best_score: best_score = score best_gene = gene best_emu = ecocyc_minus_ucsd best_ume = ucsd_minus_ecocyc if all_p: matched_ucsd_rxns.write( '\t'.join( header_template ) % fill_empty( rxn, headers ) ) matched_ucsd_rxns.write('%(ecocyc-rxn)s\t:gene-match\t%(ecocyc-eqn)s' % gene ) matched_ucsd_rxns.write('\t%s\t%s\t%d\n' % (', '.join( ecocyc_minus_ucsd ), ', '.join( ucsd_minus_ecocyc ), score ) ) if not all_p and best_gene != {}: matched_ucsd_rxns.write( '\t'.join( header_template ) % fill_empty( rxn, headers) ) matched_ucsd_rxns.write('%(ecocyc-rxn)s\t:gene-match\t%(ecocyc-eqn)s' % best_gene ) matched_ucsd_rxns.write('\t%s\t%s\t%d\n' % (', '.join( best_emu ), ', '.join( best_ume ), best_score ) ) matched_ucsd_rxns.close() return rp.parse( matched_ucsd_rxns_file ) def print_complement_file( superclass_file, subclass_file, complement_file ): rp = fluxor2.reader.FFParser() superclass = rp.parse( superclass_file ) subclass = rp.parse( subclass_file ) headers = superclass[0] header_template = get_header_template( headers ) subclass_rxns = {} for rxn in subclass[1:]: subclass_rxns[rxn['abbreviation']] = rxn out = open( complement_file, 'w' ) out.write( '\t'.join( headers ) + '\n' ) for rxn in superclass[1:]: if rxn['abbreviation'] not in subclass_rxns: out.write( '\t'.join( header_template ) % fill_empty( rxn, headers ) + '\n' ) out.close() return rp.parse( complement_file ) def make_ecocyc_ucsd_map( cpd_filename ): rp = fluxor2.reader.FFParser() cpds = rp.parse( cpd_filename ) ecocyc_ucsd_map = {} for cpd in cpds[1:]: if 'ecocyc-id' in cpd: if cpd['ecocyc-id'] in ecocyc_ucsd_map: ecocyc_ucsd_map[cpd['ecocyc-id']].append( cpd['abbreviation'] ) else: ecocyc_ucsd_map[cpd['ecocyc-id']] = [cpd['abbreviation']] return ecocyc_ucsd_map def parseEqn( eqn ): rp = fluxor2.reader.ReactionParser() (left, right, direction, conversion_type) = rp.parseEqn( eqn, '') substrates = sets.Set() for physEnt, coefficient, compartment in left: substrates.add( physEnt ) for physEnt, coefficient, compartment in right: substrates.add( physEnt ) return substrates # if __name__ == "__main__": # iafdir = '/home/zucker/src/lsw/trunk/bug/iAF1261/' # rp = fluxor2.reader.FFParser(second_delimiter='') # cat4 = rp.parse( iafdir + 'iAF1261-ecocyc-rxn-unmapped-otherwise.txt' ) # mapped_cat4 = rp.parse( iafdir + 'mapped-other-rxns2.txt') # mapped_rxns = {} # for rxn in mapped_cat4[1:]: # mapped_rxns[rxn['abbreviation']] = rxn # headers = cat4[0] # rxns = cat4[1:] # filter_file(iafdir + 'mapped-other-rxns2.txt', unmapped_wo_genes_p, mapped_rxns, iafdir + 'category4-unmapped-wo-genes.txt') # print_comment_and_rxns_of_gene( comments_of_protein_by_blattner, iafdir + 'category4-genes-wo-ecocyc-rxns.txt' ) # print_unmapped_w_genes_comments( iafdir + 'category4-genes-wo-ecocyc-rxns.txt', iafdir + 'category4-unmapped-w-genes.txt', iafdir + 'category4-unmapped-w-genes-comments.txt' )