#!/usr/bin/env python ############################################################################# ### convert.py - OEChem molecule file format converter. Lots of options. ### ### Author: Jeremy Yang ### Rev: 27 Jul 2004 ############################################################################# import os,sys,re,getopt from openeye.oechem import * PROG=os.path.basename(sys.argv[0]) usage=''' %(PROG)s [options] [outfile] options: --h ... help --hh ... more help --i= --o= --ifmt= ... (or inferred from infile) --ofmt= ... (or inferred from outfile) --n= ... how many mols to convert --skip ... skip how many mols --canon ... canonicalize atom/bond order --mc ... handle as multi-conformer molecules (absolute) --mc_isomer ... handle as multi-conformer molecules (isomeric) --sc ... handle as single-conformer molecules --igz ... input is gzipped (or inferred from infile) --ogz ... gzip output (or inferred from outfile) --residues ... perceive residues --no_hyd ... suppress hydrogens --add_hyd ... add hydrogens --major=N ... major progress report --minor=N ... minor progress report --v ... verbose --q ... quiet (supress errors) '''%{'PROG':PROG} usage2=''' format-specific input options: --mmodiDefault --mmodiFormalCrg --mol2iDefault --mol2iM2H --pdbiTER ... read TER as separator --pdbiEND ... read END as separator --pdbiENDM ... read ENDM as separator --pdbiTerMask --pdbiALL = 0x008; --pdbiDATA = 0x010; --pdbiCHARGE = 0x020; --pdbiRADIUS = 0x040; --pdbiDELPHI = 0x060; --pdbiBasicMask = 0x07F; --pdbiFormalCrg --pdbiImplicitH --pdbiBondOrder --pdbiRings --pdbiConnect --pdbiExtraMask --pdbiAllMask --pdbiDefault --smiiCanon --smiiStrict --smiiDefault --xyziFormalCrg --xyziImplicitH --xyziBondOrder --xyziRings --xyziConnect --xyziExtraMask --xyziDefault format-specific output options: --mdloMCHG ... write MCHG field --mdloMISO ... write MISO field --mdloMRGP ... write MRGP field --mdloMDLParity ... write MDL parity --mdloNoParity ... write no parity --mdloCurrentParity ... write internal parity --mdloMMask --mdloPMask --mdloDefault --mfoTitle --mfoDefault --mmodoDefault --mmodoAtomTypes --mol2oAtomTypeNames --mol2oBondTypeNames --mol2oAtomNames --mol2oNameMask --mol2oDefault --mopacoXYZ ... write xyz --mopacoCHARGES ... write charges --mopacoDefault --pdboBONDS ... write BONDS --pdboORDERS ... write ORDERS --pdboBOTH ... write BOTH --pdboCHARGE ... write CHARGE --pdboDefault --smioIsotopes --smioHydrogens --smioRGroups --smioAtomStereo --smioBondStereo --smioAtomMaps --smioCanonical --smioKekule --smioSuperAtoms --smioSmiMask --smioDefault input options (generic): --iOEAroModelDaylight --iOEAroModelOpenEye --iOEAroModelTripos --iOEAroModelMMFF --iOEAroModelMDL --iAroMask --iRings --iGenericMask --iDefault output options (generic): --oOEAroModelDaylight --oOEAroModelOpenEye --oOEAroModelTripos --oOEAroModelMMFF --oOEAroModelMDL --oAroMask --oRings --oGenericMask --oDefault ''' ############################################################################# def ParseCommandLine(): global ifmt,ifile,ofmt,ofile global n,skip,verbose,quiet,canon,mc,sc,mc_isomer,igz,ogz global residues,no_hyd,add_hyd global mdloflags, mfoflags,mol2iflags,mol2oflags,mopacoflags global pdbiflags,pdboflags,smiiflags,smioflags,xyziflags global mmodoflags global iflags,oflags global minor,major opts,pargs = getopt.getopt(sys.argv[1:],'hv',['h','hh','v','q','i=','o=','n=', 'skip=','mc','mc_isomer','sc','igz','ogz', 'residues','no_hyd','add_hyd', 'ifmt=','ofmt=','canon','major=','minor=', 'mdloMCHG','mdloMISO','mdloMRGP','mdloMDLParity','mdloNoParity', 'mdloCurrentParity','mdloMMask','mdloPMask','mdloDefault', 'pdbiTER','pdbiEND','pdbiENDM','pdbiTerMask', 'pdbiALL','pdbiDATA','pdbiCHARGE','pdbiRADIUS','pdbiDELPHI','pdbiBasicMask', 'pdbiFormalCrg','pdbiImplicitH','pdbiBondOrder','pdbiRings', 'pdbiConnect','pdbiExtraMask','pdbiAllMask','pdbiDefault', 'pdboBONDS','pdboORDERS','pdboBOTH','pdboCHARGE','pdboDefault', 'mopacoXYZ','mopacoCHARGES','mopacoDefault', 'mfoTitle','mfoDefault','mmodiDefault','mmodiFormalCrg','mmodoDefault', 'mmodoAtomTypes','mol2iDefault','mol2iM2H','mol2oAtomTypeNames', 'mol2oBondTypeNames','mol2oAtomNames','mol2oNameMask', 'mol2oDefault','smiiCanon','smiiStrict','smiiDefault','smioIsotopes', 'smioHydrogens','smioRGroups','smioAtomStereo','smioBondStereo', 'smioAtomMaps','smioCanonical','smioKekule','smioSuperAtoms','smioSmiMask', 'smioDefault','xyziFormalCrg','xyziImplicitH','xyziBondOrder','xyziRings', 'xyziConnect','xyziExtraMask','xyziDefault','iOEAroModelDaylight', 'iOEAroModelOpenEye','iOEAroModelTripos','iOEAroModelMMFF','iOEAroModelMDL', 'iAroMask','iRings','iGenericMask','iDefault','oOEAroModelDaylight', 'oOEAroModelOpenEye','oOEAroModelTripos','oOEAroModelMMFF','oOEAroModelMDL', 'oAroMask','oRings','oGenericMask','oDefault', ]) if not opts: OEThrow.Usage(usage) for (opt,val) in opts: if opt=='--h': OEThrow.Usage(usage) elif opt=='--hh': import formats OEThrow.Usage(usage+usage2+formats.Sprintformats()) elif opt=='--i': ifile=val elif opt=='--o': ofile=val elif opt=='--ifmt': ifmt=globals()['OEFormat_'+val.upper()] elif opt=='--ofmt': ofmt=globals()['OEFormat_'+val.upper()] elif opt=='--n': n=int(val) elif opt=='--skip': skip=int(val) elif opt=='--minor': minor=int(val) elif opt=='--major': major=int(val) elif opt=='--canon': canon=1 elif opt=='--mc': mc=1 elif opt=='--mc_isomer': mc_isomer=1 elif opt=='--sc': sc=1 elif opt=='--igz': igz=1 elif opt=='--ogz': ogz=1 elif opt=='--residues': residues=1 elif opt=='--no_hyd': no_hyd=1 elif opt=='--add_hyd': add_hyd=1 elif opt=='--v': verbose=1 elif opt=='--q': quiet=1 elif opt=='--mdloMCHG': mdloflags|=OEOFlavor_MDL_MCHG elif opt=='--mdloMISO': mdloflags|=OEOFlavor_MDL_MISO elif opt=='--mdloMRGP': mdloflags|=OEOFlavor_MDL_MRGP elif opt=='--mdloMDLParity': mdloflags|=OEOFlavor_MDL_MDLParity elif opt=='--mdloNoParity': mdloflags|=OEOFlavor_MDL_NoParity elif opt=='--mdloCurrentParity': mdloflags|=OEOFlavor_MDL_CurrentParity elif opt=='--mdloMMask': mdloflags|=OEOFlavor_MDL_MMask elif opt=='--mdloPMask': mdloflags|=OEOFlavor_MDL_PMask elif opt=='--mdloDefault': mdloflags|=OEOFlavor_MDL_Default elif opt=='--pdbiTER': pdbiflags|=OEIFlavor_PDB_TER elif opt=='--pdbiEND': pdbiflags|=OEIFlavor_PDB_END elif opt=='--pdbiENDM': pdbiflags|=OEIFlavor_PDB_ENDM elif opt=='--pdbiTerMask': pdbiflags|=OEIFlavor_PDB_TerMask elif opt=='--pdbiALL': pdbiflags|=OEIFlavor_PDB_ALL elif opt=='--pdbiDATA': pdbiflags|=OEIFlavor_PDB_DATA elif opt=='--pdbiCHARGE': pdbiflags|=OEIFlavor_PDB_CHARGE elif opt=='--pdbiRADIUS': pdbiflags|=OEIFlavor_PDB_RADIUS elif opt=='--pdbiDELPHI': pdbiflags|=OEIFlavor_PDB_DELPHI elif opt=='--pdbiBasicMask': pdbiflags|=OEIFlavor_PDB_BasicMask elif opt=='--pdbiFormalCrg': pdbiflags|=OEIFlavor_PDB_FormalCrg elif opt=='--pdbiImplicitH': pdbiflags|=OEIFlavor_PDB_ImplicitH elif opt=='--pdbiBondOrder': pdbiflags|=OEIFlavor_PDB_BondOrder elif opt=='--pdbiRings': pdbiflags|=OEIFlavor_PDB_Rings elif opt=='--pdbiConnect': pdbiflags|=OEIFlavor_PDB_Connect elif opt=='--pdbiExtraMask': pdbiflags|=OEIFlavor_PDB_ExtraMask elif opt=='--pdbiAllMask': pdbiflags|=OEIFlavor_PDB_AllMask elif opt=='--pdbiDefault': pdbiflags|=OEIFlavor_PDB_Default elif opt=='--pdboBONDS': pdboflags|=OEOFlavor_PDB_BONDS elif opt=='--pdboORDERS': pdboflags|=OEOFlavor_PDB_ORDERS elif opt=='--pdboBOTH': pdboflags|=OEOFlavor_PDB_BOTH elif opt=='--pdboCHARGE': pdboflags|=OEOFlavor_PDB_CHARGE elif opt=='--pdboDefault': pdboflags|=OEOFlavor_PDB_Default elif opt=='--mopacoXYZ': mopacoflags|=OEOFlavor_MOPAC_XYZ elif opt=='--mopacoCHARGES': mopacoflags|=OEOFlavor_MOPAC_CHARGES elif opt=='--mopacoDefault': mopacoflags|=OEOFlavor_MOPAC_Default elif opt=='--mfoTitle': mfoflags|=OEOFlavor_MF_Title elif opt=='--mfoDefault': mfoflags|=OEOFlavor_MF_Default elif opt=='--mmodiDefault': mmodiflags|=OEIFlavor_MMOD_Default elif opt=='--mmodiFormalCrg': mmodiflags|=OEIFlavor_MMOD_FormalCrg elif opt=='--mmodoDefault': mmodoflags|=OEOFlavor_MMOD_Default elif opt=='--mmodoAtomTypes': mmodoflags|=OEOFlavor_MMOD_AtomTypes elif opt=='--mol2iDefault': mol2iflags|=OEIFlavor_MOL2_Default elif opt=='--mol2iM2H': mol2iflags|=OEIFlavor_MOL2_M2H elif opt=='--mol2oAtomTypeNames': mol2oflags|=OEOFlavor_MOL2_AtomTypeNames elif opt=='--mol2oBondTypeNames': mol2oflags|=OEOFlavor_MOL2_BondTypeNames elif opt=='--mol2oAtomNames': mol2oflags|=OEOFlavor_MOL2_AtomNames elif opt=='--mol2oNameMask': mol2oflags|=OEOFlavor_MOL2_NameMask elif opt=='--mol2oDefault': mol2oflags|=OEOFlavor_MOL2_Default elif opt=='--smiiCanon': smiiflags|=OEIFlavor_SMI_Canon elif opt=='--smiiStrict': smiiflags|=OEIFlavor_SMI_Strict elif opt=='--smiiDefault': smiiflags|=OEIFlavor_SMI_Default elif opt=='--smioIsotopes': smioflags|=OEOFlavor_SMI_Isotopes elif opt=='--smioHydrogens': smioflags|=OEOFlavor_SMI_Hydrogens elif opt=='--smioRGroups': smioflags|=OEOFlavor_SMI_RGroups elif opt=='--smioAtomStereo': smioflags|=OEOFlavor_SMI_AtomStereo elif opt=='--smioBondStereo': smioflags|=OEOFlavor_SMI_BondStereo elif opt=='--smioAtomMaps': smioflags|=OEOFlavor_SMI_AtomMaps elif opt=='--smioCanonical': smioflags|=OEOFlavor_SMI_Canonical elif opt=='--smioKekule': smioflags|=OEOFlavor_SMI_Kekule elif opt=='--smioSuperAtoms': smioflags|=OEOFlavor_SMI_SuperAtoms elif opt=='--smioSmiMask': smioflags|=OEOFlavor_SMI_SmiMask elif opt=='--smioDefault': smioflags|=OEOFlavor_SMI_Default elif opt=='--xyziFormalCrg': xyziflags|=OEIFlavor_XYZ_FormalCrg elif opt=='--xyziImplicitH': xyziflags|=OEIFlavor_XYZ_ImplicitH elif opt=='--xyziBondOrder': xyziflags|=OEIFlavor_XYZ_BondOrder elif opt=='--xyziRings': xyziflags|=OEIFlavor_XYZ_Rings elif opt=='--xyziConnect': xyziflags|=OEIFlavor_XYZ_Connect elif opt=='--xyziExtraMask': xyziflags|=OEIFlavor_XYZ_ExtraMask elif opt=='--xyziDefault': xyziflags|=OEIFlavor_XYZ_Default elif opt=='--iOEAroModelDaylight':iflags|=OEIFlavor_Generic_OEAroModelDaylight elif opt=='--iOEAroModelOpenEye':iflags|=OEIFlavor_Generic_OEAroModelOpenEye elif opt=='--iOEAroModelTripos': iflags|=OEIFlavor_Generic_OEAroModelTripos elif opt=='--iOEAroModelMMFF': iflags|=OEIFlavor_Generic_OEAroModelMMFF elif opt=='--iOEAroModelMDL': iflags|=OEIFlavor_Generic_OEAroModelMDL elif opt=='--iAroMask': iflags|=OEIFlavor_Generic_AroMask elif opt=='--iRings': iflags|=OEIFlavor_Generic_Rings elif opt=='--iGenericMask': iflags|=OEIFlavor_Generic_GenericMask elif opt=='--iDefault': iflags|=OEIFlavor_Generic_Default elif opt=='--oOEAroModelDaylight':oflags|=OEOFlavor_Generic_OEAroModelDaylight elif opt=='--oOEAroModelOpenEye':oflags|=OEOFlavor_Generic_OEAroModelOpenEye elif opt=='--oOEAroModelTripos': oflags|=OEOFlavor_Generic_OEAroModelTripos elif opt=='--oOEAroModelMMFF': oflags|=OEOFlavor_Generic_OEAroModelMMFF elif opt=='--oOEAroModelMDL': oflags|=OEOFlavor_Generic_OEAroModelMDL elif opt=='--oAroMask': oflags|=OEOFlavor_Generic_AroMask elif opt=='--oRings': oflags|=OEOFlavor_Generic_Rings elif opt=='--oGenericMask': oflags|=OEOFlavor_Generic_GenericMask elif opt=='--oDefault': oflags|=OEOFlavor_Generic_Default else: OEThrow.Fatal('Illegal option: %s\n%s' % (opt,usage)) ############################################################################# def HandleOEErrors(oeerrs,verbose): errstr=oeerrs.str() for line in errstr.split('\n'): if not line.rstrip(): continue if re.search('Warning',line,re.I) and not verbose: continue sys.stderr.write("%s\n"%line) oeerrs.clear() ############################################################################# if __name__=='__main__': ifmt=0; ifile=""; ofmt=0; ofile="" n=0; skip=0; verbose=0; quiet=0; canon=0; mc=0; mc_isomer=0; sc=0; igz=0; ogz=0; residues=0; no_hyd=0; add_hyd=0; mdloflags=0x0; mfoflags=0x0; mol2iflags=0x0; mol2oflags=0x0; mopacoflags=0x0; pdbiflags=0x0; pdboflags=0x0; smiiflags=0x0; smioflags=0x0; xyziflags=0x0; mmodoflags=0x0; iflags=0x0; oflags=0x0; minor=50; major=1000; ParseCommandLine() ############################################ ### specify i/o streams ############################################ ims=oemolistream() if ifile and not ims.open(ifile): OEThrow.Fatal('Cannot open: %s'%ifile) oms=oemolostream() if ofile and not oms.open(ofile): OEThrow.Fatal('Cannot open: %s'%ofile) if ifmt: ims.SetFormat(ifmt) else: ifmt=ims.GetFormat() if ofmt: oms.SetFormat(ofmt) else: ofmt=oms.GetFormat() if not OEIsReadable(ifmt): OEThrow.Fatal('%s format not readable'%OEGetFormatString(ifmt)) if not OEIsWriteable(ofmt): OEThrow.Fatal('%s format not writeable'%OEGetFormatString(ofmt)) if igz: ims.Setgz(1) if ogz: oms.Setgz(1) ############################################ ### input-flavoring ############################################ if pdbiflags: if not ifmt==OEFormat_PDB: OEThrow.Fatal('PDB flags incompatible w/ %s'%OEGetFormatString(ifmt)) iflags|=pdbiflags if smiiflags: if ifmt not in (OEFormat_SMI,OEFormat_CAN,OEFormat_ISM): OEThrow.Fatal('SMI flags incompatible w/ %s'%OEGetFormatString(ifmt)) iflags|=smiiflags if xyziflags: if not ifmt==OEFormat_XYZ: OEThrow.Fatal('XYZ flags incompatible w/ %s'%OEGetFormatString(ifmt)) iflags|=xyziflags if iflags: if verbose: sys.stderr.write('Input flags: %d\n'%iflags) ims.SetFlavor(ifmt,iflags) ############################################ ### output-flavoring ############################################ if pdboflags: if not ofmt==OEFormat_PDB: OEThrow.Fatal('PDB flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=pdboflags if mdloflags: if ofmt not in (OEFormat_MDL,OEFormat_SDF): OEThrow.Fatal('MDL flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=mdloflags if smioflags: if ofmt not in (OEFormat_SMI,OEFormat_CAN,OEFormat_ISM): OEThrow.Fatal('SMI flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=smioflags if mol2oflags: if ofmt not in (OEFormat_MOL2,OEFormat_MOL2H): OEThrow.Fatal('MOL2 flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=mol2oflags if mfoflags: if not ofmt==OEFormat_MF: OEThrow.Fatal('MF flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=mfoflags if mmodoflags: if not ofmt==OEFormat_MMOD: OEThrow.Fatal('MMOD flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=mmodoflags if mopacoflags: if not ofmt==OEFormat_MOPAC: OEThrow.Fatal('MOPAC flags incompatible w/ %s'%OEGetFormatString(ofmt)) oflags|=mopacoflags if oflags: if verbose: sys.stderr.write('Output flags: %d\n'%oflags) oms.SetFlavor(ofmt,oflags) ############################################ ### single/multi-conformer setting ############################################ if mc: ims.SetConfTest(OEAbsoluteConfTest(0)) if mc_isomer: ims.SetConfTest(OEIsomericConfTest(0)) if mc or mc_isomer: mol=OEMol() elif sc: mol=OEGraphMol() else: mol=OEMol() ####################################################### ### Molecule loop ####################################################### oeerrs=oeosstream() OEThrow.SetOutputStream(oeerrs) iin=0; iout=0; iconfin=0; iconfout=0; dots=OEDots(major,minor,'mols') while OEReadMolecule(ims,mol): if not quiet: dots.Update() iin+=1 if mc or mc_isomer: iconfin+=mol.NumConfs() if iin<=skip: mol.Clear() continue if canon: OECanonicalOrderAtoms(mol) OECanonicalOrderBonds(mol) if residues: OEPerceiveResidues(mol) if no_hyd: for atom in mol.GetAtoms(OEHasAtomicNum(1)): mol.DeleteAtom(atom) OESuppressHydrogens(mol) elif add_hyd: OEAddExplicitHydrogens(mol) OEWriteMolecule(oms,mol) if mc or mc_isomer: iconfout+=mol.NumConfs() mol.Clear() iout+=1 if iout==n: sys.stderr.write('NOTE: limit %d reached\n'%n) break if quiet: oeerrs.clear() else: HandleOEErrors(oeerrs,verbose) if not quiet: sys.stderr.write('\n%s ('%PROG) if ims.Getgz(): str='gzipped ' else: str='' sys.stderr.write('%s%s -> '%(str,OEGetFormatString(ifmt))) if oms.Getgz(): str='gzipped ' else: str='' sys.stderr.write('%s%s):\n'%(str,OEGetFormatString(ofmt))) sys.stderr.write('\tmols in: %d'%(iin)) sys.stderr.write('\tmols out: %d\n'%(iout)) if mc or mc_isomer: sys.stderr.write('\tconfs in: %d'%(iconfin)) sys.stderr.write('\tconfs out: %d\n'%(iconfout)) dots.Total() oms.close()