#!/usr/bin/env python ############################################################################# ## mcss.py - max common substructure search ## ## Authors: Jeremy Yang, Bob Tolbert ## 21 Aug 2003 ############################################################################# import os,sys,re,getopt from openeye.oechem import * import formats PROG=os.path.basename(sys.argv[0]) usage=''' %(PROG)s [options] [outfile] required options: --qsmi= or --qmol= options: --mcsfunc=MaxBonds|MaxAtoms|MaxAtomsCompleteCycles --all ... default is best (or first) only --kekule ... use kekule forms; no aromaticity --minatoms= --i= --ifmt= --v ... verbose --vv ... very verbose --h ... help --hh ... help a lot --atom_expr= --bond_expr= '''%{'PROG':PROG} expr_help=''' atom/bond expressions are |'ed list of: atom expressions primitives: Mass HCount ImplicitHCount FormalCharge StrictFormalCharge Degree ExplicitDegree Valence Hybridization AtomicNumber EqMetal EqHalogen EqON EqONS EqPS EqAromatic EqCHalogen EqCAliphaticONS EqCPSAcidRoot EqKetoneSulfoneRoot bond expressions primitives: BondOrder EqSingleDouble EqDoubleTriple EqNotAromatic atom/bond expressions primitives: Aromaticity RingMember Chiral canned expressions: DefaultAtoms = AtomicNumber|Aromaticity|FormalCharge DefaultBonds = BondOrder|Aromaticity ExactAtoms = AtomicNumber|Aromaticity|StrictFormalCharge|Degree|\ HCount|Chiral|Mass|RingMember ExactBonds = BondOrder|Aromaticity|RingMember|Chiral AutomorphAtoms = AtomicNumber|Aromaticity|Degree|Chiral|HCount AutomorphBonds = Aromaticity AnyAtom AnyBond ''' ############################################################################# def ShowMCSS(mcss,qmol,mol,verbose,all,minatoms=0): print 'qmol:', OECreateCanSmiString(OEGraphMol(qmol)) print ' mol:', OECreateCanSmiString(OEGraphMol(mol)) OETriposAtomNames(mol) matcount=0 tmcsmol=OEGraphMol() qmcsmol=OEGraphMol() mats=[] mcsmols=[] rmtx=DoubleArray(9) tmtx=DoubleArray(3) for mat in mcss.Match(mol): if mat.NumAtoms()1.1b5) for mp in mat.GetBonds(): revmat.AddPair(mp.target,mp.pattern) ############################################################## OESubsetMol(qmcsmol,revmat) print OECreateCanSmiString(qmcsmol), '->', OESubsetMol(tmcsmol,mat) mcsmols.append(OEGraphMol(tmcsmol)) print OECreateCanSmiString(tmcsmol) if verbose>1: for mp in mat.GetAtoms(): print '%s -> %s'%(mp.pattern.GetName(),mp.target.GetName()), (x,y,z) = qmol.GetCoords(mp.pattern) print '%.3f,%.3f,%.3f' % (x,y,z), '->', (x,y,z) = mol.GetCoords(mp.target) print '%.3f,%.3f,%.3f' % (x,y,z) rms=OERMSD(tmcsmol,qmcsmol,mat,1,rmtx,tmtx) print 'MCS RMSD = %.3f\n' % rms tmcsmol.Clear() qmcsmol.Clear() i+=1 if not all: break return mcsmols ############################################################################# if __name__=='__main__': ifmt=0; wegot_ifile=0; verbose=0; qmol=None; all=0; kekule=0; minatoms=0; atom_expr=OEExprOpts_DefaultAtoms aexpstr='DefaultAtoms' bond_expr=OEExprOpts_DefaultBonds bexpstr='DefaultBonds' mcsfunc=None ifs=oemolistream() opts,pargs = getopt.getopt(sys.argv[1:],'hv',['h','v','vv','i=', 'qsmi=','qmol=','atom_expr=','bond_expr=','minatoms=', 'all','kekule','h','hh','ifmt=','mcsfunc=']) for (opt,val) in opts: if opt=='--h': OEThrow.Usage(usage) elif opt=='--hh': OEThrow.Usage(usage+expr_help+'\nfile formats:\n'+formats.Sprintformats()) elif opt=='--i': if not ifs.open(val): OEThrow.Fatal('Cannot open: %s' % val) wegot_ifile=1 elif opt=='--ifmt': ifmt=globals()['OEFormat_'+val.upper()] elif opt=='--qsmi': qmol=OEGraphMol() OEParseSmiles(qmol,val) elif opt=='--qmol': qifs=oemolistream() if not qifs.open(val): OEThrow.Fatal('Cannot open: %s' % val) qmol=OEGraphMol() OEReadMolecule(qifs,qmol) elif opt=='--v': verbose=1 elif opt=='--vv': verbose=2 elif opt=='--all': all=1 elif opt=='--kekule': kekule=1 elif opt=='--minatoms': minatoms=int(val) elif opt=='--atom_expr': aexpstr = re.sub('(?<=\|)\s*(\w+)',r'OEExprOpts_\1',val) aexpstr = re.sub('^\s*(\w+)',r'OEExprOpts_\1',aexpstr) aexpstr = re.sub('OEExprOpts_AnyAtom','0x0',aexpstr) atom_expr=eval(aexpstr) elif opt=='--bond_expr': bexpstr = re.sub('(?<=\|)\s*(\w+)',r'OEExprOpts_\1',val) bexpstr = re.sub('^\s*(\w+)',r'OEExprOpts_\1',bexpstr) bexpstr = re.sub('OEExprOpts_AnyBond','0x0',bexpstr) bond_expr=eval(bexpstr) elif opt=='--mcsfunc': if val not in ['MaxAtoms','MaxBonds','MaxAtomsCompleteCycles']: OEThrow.Fatal('Illegal mcsfunc: %s\n%s' % (val,usage)) mcsfunc=eval('OEMCS'+val) else: OEThrow.Fatal('Illegal option: %s\n%s' % (opt,usage)) if verbose: sys.stderr.write('atom expression: %s (%x)\n'%(aexpstr,atom_expr)) sys.stderr.write('bond expression: %s (%x)\n'%(bexpstr,bond_expr)) if mcsfunc: sys.stderr.write('mcsfunc: %s\n'%'OEMCS'+mcsfunc) if not wegot_ifile: if ifmt: ifs.open() else: OEThrow.Fatal('Input fmt required w/ stdin.\n'+usage) if ifmt: ifs.SetFormat(ifmt) else: ifmt=ifs.GetFormat() if not qmol: OEThrow.Fatal('No query molecule.\n'+usage) OETriposAtomNames(qmol) if kekule: OEClearAromaticFlags(qmol) OEKekulize(qmol) else: OEAssignAromaticFlags(qmol) mcss=OEMCSSearch(qmol,atom_expr,bond_expr) if mcsfunc: f=eval('OEMCS'+mcsfunc) mcss.SetMCSFunc(f()) nmol=0 for mol in ifs.GetOEMols(): nmol+=1 if verbose: print 'molecule %d:' % nmol if kekule: OEClearAromaticFlags(mol) OEKekulize(mol) else: OEAssignAromaticFlags(mol) ShowMCSS(mcss,qmol,mol,verbose,all,minatoms)