#!/usr/bin/env python ############################################################################# ### rings.py; find all rings in mols ### ### Author: Jeremy Yang ### Rev: 31 Jul 2003 ############################################################################# import os,sys,re,getopt import formats from openeye.oechem import * usage=''' rings.py [options] [ ... max ringsize [default=10] --minsize= ... min ringsize [default=3] --i= --ifmt= --v ... verbose/debug --h ... this help --hh ... more help ''' ############################################################################# ### AttachList should return a list of list of [0123]s indicating whether ### each atom is a ringsystem atom and has any non-ring bonds, if yes ### what are the bond orders. ### Atom indices are consecutive, begin with 0, and the ### ringsystem list returned by OEDetermineRingSystems() is in index order. ############################################################################# def AttachList(mol): alist=[[] for i in range(mol.NumAtoms())] for atom in mol.GetAtoms(): if atom.IsInRing(): for bond in atom.GetBonds(): if not bond.IsInRing(): alist[atom.GetIdx()].append(bond.GetOrder()) return alist ############################################################################# ### main ############################################################################# if __name__=='__main__': ifmt = wegot_ifile = 0 maxsize = 10 minsize = 3 verbose = 0 ifs = oemolistream() ### parse command line opts,pargs = getopt.getopt(sys.argv[1:],'',['h','hh','v','i=', 'input_format=','ifmt=','maxsize=','minsize=']) for (opt,val) in opts: if opt=='--h': OEThrow.Usage(usage) elif opt=='--hh': OEThrow.Usage(usage+formats.Sprintformats()) elif opt=='--i': if not ifs.open(val): OEThrow.Fatal('Cannot open: %s' % val) wegot_ifile=1 elif opt=='--ifmt': ifmt = globals()['OEFormat_'+val.upper()] elif opt=='-maxsize': maxsize=int(val) elif opt=='-minsize': minsize=int(val) elif opt=='--v': verbose=1 else: OEThrow.Fatal('Illegal option: %s ' % val) if not wegot_ifile: ## using stdin if ifmt: ifs.open() else: OEThrow.Fatal('Input fmt required w/ stdin.\n'+usage) if ifmt: ifs.SetFormat(ifmt) else: ifmt = ifs.GetFormat() ###################################################### ### define ring smarts up to maxsize ### Set up subsearch and parse/check SMARTS. ###################################################### ringsmarts = ['' for i in range(minsize)] ringsmarts.extend(['*1'+((i-1)*'~*')+'1' for i in range(minsize,maxsize)]) ringpats = { } for size in range (minsize,maxsize): smarts = ringsmarts[size] ringpats[smarts] = OESubSearch() if not ringpats[smarts].Init(smarts): OEThrow.Fatal('Bad SMARTS: %s' % smarts) ####################################################### ### Loop over mols, find rings. ####################################################### ringmol = OEGraphMol() for mol in ifs.GetOEMols(): ir=0 ringatoms = {} ringbonds = {} OETriposAtomNames(mol) print OECreateCanSmiString(mol), mol.GetTitle() ######################################################################## ### Rings... ######################################################################## for size in range(minsize,maxsize): pat = ringpats[ringsmarts[size]] for matchbase in pat.Match(mol,1): newring = 0 str = '' ################################################################### ### (SASR = "set of all smallest rings") ### If any atom not in a ring, it's a new SASR ring. ### If all atoms are already in rings, but at least one ### atom's smallest ring is equal size, then it's a new SASR ring. ### If a bond is not yet in a ring, it's a new ring. ### If all bonds in rings, but at least one bond's smallest ### ring is same sized, it's a new ring. ### This includes such as C12CC1C3CC31. ### ### But how about buckyball-like ringsystems, where ### only the 5-membered rings will be found, not the 6? ### Conclusion: SASR will contain all ring atoms and bonds, ### but not necessarily a spanning set of rings. ################################################################### aids=[] for matchpair in matchbase.GetAtoms(): atom = matchpair.target aid = atom.GetIdx() aids.append(aid) if not ringatoms.has_key(aid): ringatoms[aid] = size newring = 1 elif ringatoms[aid]==size: newring = 1 str += "%s " % matchpair.target.GetName() bids=[] for matchpair in matchbase.GetBonds(): bond = matchpair.target bid = bond.GetIdx() bids.append(bid) if not ringbonds.has_key(bid): ringbonds[bid] = size newring = 1 elif ringbonds[bid]==size: newring = 1 if newring: ir+=1 rlist=[] for i in range(mol.NumAtoms()): rlist.append(0) for j in aids: rlist[j]=1 pred = PartPred(rlist) pred.SelectPart(1) OESubsetMol(ringmol,mol,pred) OEAssignImplicitHydrogens(ringmol) print "\t%d. SASR ring(%d): %s %s" % (ir,size,str ,OECreateCanSmiString(ringmol)) ringmol.Clear() ######################################################################## ### Ringsystems... ### rslist is list of natom integers indicating ringsys membership ### Atom indices start with 0. ######################################################################## OEFindRingAtomsAndBonds(mol) count,rslist = OEDetermineRingSystems(mol) print "\t%d ringsystems found" % count alist = AttachList(mol) ## identify substituent attachment points ### rsids is list of rs atomid lists rsids = [] for i in range(count+1): rsids.append([]) for i in range(len(rslist)): if rslist[i]: rsids[rslist[i]].append(i) ####################################################### ### loop over ringsystems, copy mol->mol2, delete other atoms ### and add substituent wildcards. ### Assumption: ids correspond between mol&mol2 copy. ### Note: attached H's become attachment points. ####################################################### for i in range(1,count+1): mol2 = OEMol(mol) for atom in mol2.GetAtoms(): idx = atom.GetIdx() if idx not in rsids[i] and idx [Rn] ######################################## r=0 for atom in partmol.GetAtoms(): if atom.GetAtomicNum()==0: ## wildcard r+=1 atom.SetMapIdx(r) i_rsys+=1 print '\tringsystem %d: %s'%(i_rsys,OECreateCanSmiString(partmol))