///////////////////////////////////////////////////////////////////////////// /// rmsd_selftest.cpp - analyze multiconformer molecule in rmsd space. /// /// NOTES: In OEChem 1.3.1 OERMSD(OEMCMolBase,OEMCMolBase,...) is missing /// so OERMSD(OESCMolBase,OESCMolBase,...) can be used by iterating over /// confs in the first mol. Also in OCEhem 1.3.1 overlay=true crashes if /// the rot and trans matricies are not parameters. /// OERMSD(c,mol,rmsds,automorph,heavyonly,true); //crashes /// /// author: Jeremy Yang /// rev: 6 Oct 2004 ///////////////////////////////////////////////////////////////////////////// #include "openeye.h" #include #include #include #include "oeplatform.h" #include "oesystem.h" #include "oechem.h" using namespace OESystem; using namespace OEChem; using namespace std; static string ifile; static string progname; static int verbose=0; static bool overlay=false; static bool automorph=false; static bool pageout=false; static bool heavyonly=true; static float rmsdstep=0.2; static float rmsdmax=3.0; void Help(string msg) { cerr << msg << endl <<" "< rmsdses) { unsigned int i,j,size=rmsdses.size(); printf(" "); for (j=0; j rmsdses) { unsigned int i,j,jj,step=12; unsigned int size=rmsdses.size(); for (jj=0; jj &rmsdses, unsigned int *imin, unsigned int *jmin) { double rmsd,rmsd_min=999.9; for (unsigned int i=0;i rmsdses) { unsigned int i,j,imin=0,jmin=0,imax=0,jmax=0; unsigned int size=rmsdses.size(); vector freq; vector freqmin; vector freqmax; double rmsd; double rmsd_sum=0.0; double rmsd_min=999.0; double rmsd_max=0.0; double rmsd_min_con; // min rmsd for current con double rmsd_min_con_sum; // avg rmsd_min_con double rmsd_max_con; // max rmsd for current con double rmsd_max_con_sum; // avg rmsd_max_con unsigned int freqrange = (int)ceil(rmsdmax/rmsdstep); unsigned int nrmsd_sum,nrmsd_min_sum,nrmsd_max_sum; unsigned int nrmsd_total=size*(size-1)/2; ////////////////////////////////////////////////////////////////// // #confs vs. global minrmsd. // Using rmsdses[] array, // - find current rmsd_min // - signify deletion of 2nd conf by setting rmsds[j][j]=-1.0 // - for each rmsd step, list current #confs // - restore rmsds to original state ////////////////////////////////////////////////////////////////// // Note that removing conformations to diversify a conf ensemble // can be done in several ways. Which conf of a near pair to // delete can make a difference in the eventual number of // conformations for a given global rmsd_min. ////////////////////////////////////////////////////////////////// printf("\nMinimum global rmsd vs number of conformations\n"); unsigned int nconfs=size; printf(" rmsd min: nconfs\n"); for (i=0;i<=freqrange;++i) { while (FindMinRmsd(rmsdses,&imin,&jmin) < rmsdstep*i) { rmsdses[jmin][jmin]=-1.0; --nconfs; } printf("\t%.2f: %5d\n",rmsdstep*i,nconfs); if (nconfs==1) break; } for (i=0;irmsd_max) rmsd_max=rmsdses[imax=i][jmax=j]; if ((unsigned int)(rmsd/rmsdstep)>freqrange) ++freq[freqrange]; else ++freq[(unsigned int) (rmsd/rmsdstep)]; } } printf(" global avg rmsd: %5.2f\n",rmsd_sum/nrmsd_total); printf(" global min rmsd[%d][%d]: %5.2f\n",imin,jmin,rmsd_min); printf(" global max rmsd[%d][%d]: %5.2f\n",imax,jmax,rmsd_max); printf(" frequencies:\n"); for (i=0,nrmsd_sum=0;i%5.1f%%)\n", rmsdstep*i,rmsdstep*(i+1),freq[i], (float)freq[i]*100.0/(float)nrmsd_total, (float)nrmsd_sum*100.0/(float)nrmsd_total); } nrmsd_sum+=freq[freqrange]; printf("\t%.2f+: %5d (%5.1f%% ->%5.1f%%)\n", rmsdstep*(freqrange),freq[freqrange], (float)freq[freqrange]*100.0/(float)nrmsd_total, (float)nrmsd_sum*100.0/(float)nrmsd_total); printf(" total rmsds: %3d\n\n",nrmsd_total); ////////////////////////////////////////////////////////////////// // Analysis of min and max rmsds for each conformer. ////////////////////////////////////////////////////////////////// printf("Max/min rmsd analysis (near- and far-conformer)\n"); rmsd_min_con_sum=0.0; rmsd_max_con_sum=0.0; freqmin.assign(freqrange+1,0); freqmax.assign(freqrange+1,0); for (i=0;irmsd_max_con) rmsd_max_con=rmsd; } rmsd_min_con_sum+=rmsd_min_con; rmsd_max_con_sum+=rmsd_max_con; if ((unsigned int)(rmsd_min_con/rmsdstep)>freqrange) ++freqmin[freqrange]; else ++freqmin[(unsigned int) (rmsd_min_con/rmsdstep)]; if ((unsigned int)(rmsd_max_con/rmsdstep)>freqrange) ++freqmax[freqrange]; else ++freqmax[(unsigned int) (rmsd_max_con/rmsdstep)]; } printf(" avg min rmsd: %5.2f\n",rmsd_min_con_sum/(size-1)); printf(" avg max rmsd: %5.2f\n",rmsd_max_con_sum/(size-1)); printf(" frequencies: min max\n"); for (i=0,nrmsd_min_sum=0,nrmsd_max_sum=0;i%5.1f%%) %5d (%5.1f%% ->%5.1f%%)\n", rmsdstep*i,rmsdstep*(i+1), freqmin[i], (float)freqmin[i]*100.0/(float)size, (float)nrmsd_min_sum*100.0/(float)size, freqmax[i], (float)freqmax[i]*100.0/(float)size, (float)nrmsd_max_sum*100.0/(float)size); } nrmsd_min_sum+=freqmin[freqrange]; nrmsd_max_sum+=freqmax[freqrange]; printf("\t%.2f+: %5d (%5.1f%% ->%5.1f%%) %5d (%5.1f%% ->%5.1f%%)\n", rmsdstep*(freqrange), freqmin[freqrange], (float)freqmin[freqrange]*100.0/(float)size, (float)nrmsd_min_sum*100.0/(float)size, freqmax[freqrange], (float)freqmax[freqrange]*100.0/(float)size, (float)nrmsd_max_sum*100.0/(float)size); } } ///////////////////////////////////////////////////////////////////////////// int main(int argc,char *argv[]) { OEMol mol; OEIter c; OEIter m; oemolistream ims; double *rmsds=NULL; double *trans=NULL; double *rot=NULL; string smi; vector rmsdses; unsigned int maxconfidx,i,molcount,numconfs; ProcessArgs(argc,argv); if (!ims.open(ifile.c_str())) OEThrow.Fatal("Unable to open \"%s\".\n",ifile.c_str()); ims.SetConfTest(OEAbsoluteConfTest(false)); molcount=0; for (m=ims.GetMCMolBases();m;++m) { ++molcount; OESetDimensionFromCoords(m); if (m->GetDimension()==0) numconfs=0; else numconfs=m->NumConfs(); printf("%s: NumConfs: %d\n",m->GetTitle(),numconfs); if (verbose) { OECreateIsoSmiString(smi,m); printf("%s\n",smi.c_str()); } if (!*m || m->GetDimension()<2) { OEThrow.Warning("%s: mol empty or has insufficient coords.",m->GetTitle()); continue; } maxconfidx=m->GetMaxConfIdx(); if (maxconfidx!=m->NumConfs()) { OEThrow.Warning("%s: NumConfs disagreement: %d!=%d",m->GetTitle(),maxconfidx,m->NumConfs()); continue; } if (m->NumConfs()<2) { OEThrow.Info("%s: NumConfs<2; no rmsds possible.",m->GetTitle()); continue; } for (c=m->GetConfs();c;++c) { rmsds=(double *)malloc(maxconfidx*sizeof(double)); for (i=0;iGetTitle(),maxconfidx,rmsdses.size()); continue; } if (verbose>1) { if (pageout) PrintPagedTable(rmsdses); else PrintTable(rmsdses); } ShowStats(rmsdses); for (i=0;i