#!/usr/bin/env python ############################################################################# ### frankenmol.py -- combine parts at correspondingly mapped atoms ### ### Input is one template mol followed by one or more sidechain mols ### as space-delimited ampersand-notation smiles. Each sidechain should have ### only one attachment point, indicated by mapped atoms. Template ### may have multiple attachment points. All permutations of sidechains ### are generated. Poor man's combichem enumerator. ### ### Sample input: ### S&1&2&3 [H]&1 C&1 F&2 Br&2 N&3 O&3 ### C&1CCN&2 [H]&1 N&1 O&1 F&2 Br&2 C&2(=O)O ### N&11C(=O)N&2C(=O)C(N&3C=N2)=C12 [H]&1 C&1 [H]&2 C&2 C&3 C&3(=O)O ### ############################################################################# ### Note: the OpenEye smiles parser can do some of this automatically, ### e.g., C&1CN.O&1 --> OCCN ### But, ### e.g., C&1CN&2.O&1 --> OCCN[R2] ### and "R-notation" smiles will not "react" in the same way. ############################################################################# ### author: Jeremy Yang ### date: 11 Nov 2003 ############################################################################# import os,sys,getopt,re from openeye.oechem import * PROG=os.path.basename(sys.argv[0]) ############################################################################# ### Combine() - attach template mol (tmol) and list of side-mols (smols) in ### all allowed ways. Find matching tmol/smol mapindex and attach. ### Then call function recursively with newmol and reduced smols. ### Attachment points are mapped wildcard atoms: [*:1],[*:2], etc. ############################################################################# def Combine(tmol,smols,frags=0,title=0,verbose=0): outmols=[] midxes=[] for atom in tmol.GetAtoms(OEHasAtomicNum(0)): midx=atom.GetMapIdx() if not midx==0 and midx not in midxes: midxes.append(midx) tmp=[] for smol in smols: midx=MyMapIdx(smol) if not midx==0 and midx not in tmp: tmp.append(midx) for midx in midxes: if midx not in tmp: midxes.remove(midx) if len(midxes)==0: return [] midx=midxes[0] tatom=tmol.GetAtom(OEHasMapIdx(midx)) ## wildcard atom if not tatom: return [] idxt=tatom.GetIdx() for smol in smols: satom=smol.GetAtom(OEHasMapIdx(midx)) ## wildcard atom if not satom: continue idxs=satom.GetIdx() newmol=OEGraphMol() as,bs=OEAddMols(newmol,tmol) newmol.SetTitle(tmol.GetTitle()) aw1=as[idxt] ## wildcard atom as,bs=OEAddMols(newmol,smol) aw2=as[idxs] ## wildcard atom for bond in aw1.GetBonds(): break a1=bond.GetNbr(aw1) for bond in aw2.GetBonds(): break a2=bond.GetNbr(aw2) bond=newmol.NewBond(a1,a2,1) newmol.DeleteAtom(aw1) newmol.DeleteAtom(aw2) if MolIsDone(newmol) or frags: outmols.append(OEGraphMol(newmol)) if not MolIsDone(newmol): smols2=smols[:] smols2.remove(smol) if len(smols2)==0: continue outmols.extend(Combine(newmol,smols2,frags,title,verbose)) return outmols ############################################################################# ### PrintMol() - output nicely ############################################################################# def PrintMol(oms,mol,title=0): for a in mol.GetAtoms(): a.SetMapIdx(0) OEAssignImplicitHydrogens(mol) OEAssignFormalCharges(mol) if not title: mol.SetTitle('') OEWriteMolecule(oms,mol) ############################################################################# ### MolIsDone() - has no open attachment point[s] ############################################################################# def MolIsDone(mol): for a in mol.GetAtoms(OEHasAtomicNum(0)): midx=a.GetMapIdx() if midx>0: return 0 return 1 ############################################################################# ### MyMapIdx() ############################################################################# def MyMapIdx(smol): for a in smol.GetAtoms(OEHasAtomicNum(0)): midx=a.GetMapIdx() if midx>0: return midx return 0 ############################################################################# def CheckSmol(smol,ssmi): midxes=[] for atom in smol.GetAtoms(OEHasAtomicNum(0)): midx=atom.GetMapIdx() if not midx==0: midxes.append(midx) if not len(midxes)==1: sys.stderr.write('ERROR: illegal smol; exactly 1 attachment required: %s\n'%ssmi) return 0 return 1 ############################################################################# if __name__=='__main__': usage=''' %(P)s [options] options: --i= ... file of templates plus sidechains, space delimited e.g.: N&11C(=O)N&2C(=O)C(N&3C=N2)=C12 [H]&1 C&1 [H]&2 C&2 C&3 C&3(=O)O --o= ... new mols --ofmt= --frags ... include incomplete mols (w/ attachment points) --title ... add mol titles reflecting composition --v ... verbose --h ... help '''%{'P':PROG} verbose=0; infile=None; ofmt=OEFormat_ISM; wegot_ofile=0; oms=None; frags=0; title=0; opts,pargs = getopt.getopt(sys.argv[1:],'h',['h','v','i=','o=', 'ofmt=','frags','title' ]) for (opt,val) in opts: if opt=='--h': OEThrow.Usage(usage) elif opt=='--i': infile=open(val) elif opt=='--o': oms=oemolostream() if not oms.open(val): OEThrow.Fatal('Cannot open: %s'%val) wegot_ofile=1 elif opt=='--ofmt': ofmt=globals()['OEFormat_'+val.upper()] elif opt=='--frags': frags=1 elif opt=='--title': title=1 elif opt=='--v': verbose=1 else: OEThrow.Fatal('Illegal option: %s\n%s' % (opt,usage)) if not infile: OEThrow.Fatal('Input file required.\n%s'%usage) if not oms: oms=oemolostream() oms.open() oms.SetFormat(ofmt) incount=0 outcount=0 tmol=OEGraphMol() # template mol smol=OEGraphMol() # sidechain mol while 1: tmol.Clear() line=infile.readline() if not line: break if re.match('^\s*#.*$',line): continue incount+=1 line=line.rstrip() parts = line.split() tsmi=parts[0] ssmis=parts[1:] if verbose: sys.stderr.write('template: %s\n'%tsmi) sys.stderr.write('sidechains: %s\n'%' '.join(ssmis)) if not OEParseSmiles(tmol,tsmi): sys.stderr.write('ERROR: bad template-mol (line %d)\n'%incount) continue if title: tmol.SetTitle('%d'%incount) if frags: PrintMol(oms,OEGraphMol(tmol),title) if len(ssmis)==0: sys.stderr.write('ERROR: no sub-mols (line %d)\n'%incount) continue smols=[] i=0 for ssmi in ssmis: i+=1 smol.Clear() OEParseSmiles(smol,ssmi) CheckSmol(smol,ssmi) if title: smol.SetTitle('%d:%d'%(MyMapIdx(smol),i)) smols.append(OEGraphMol(smol)) outmols=Combine(tmol,smols,frags,title,verbose) if verbose: sys.stderr.write('outmols[%d]: %d\n'%(incount,len(outmols))) for outmol in outmols: outcount+=1 PrintMol(oms,outmol,title) sys.stderr.write('%s results: in: %d out: %d\n'%(PROG,incount,outcount))