///////////////////////////////////////////////////////////////////////////// /// /// match.cpp - smarts matcher w/ different modes /// /// default - all possible matches. c1ccccc1 matches itself 12 ways. /// usa - unique-sets-of-atoms matches. c1ccccc1 matches Oc1ccccc1 once. /// asym - non symmetrical matches. Brc1ccccc1 matches Brc1cc(O)ccc1 /// 2 ways but Brc1ccc(O)cc1 only 1 way. c1ccccc1 matches /// Oc1ccccc1 6 ways. /// exph (can be used together with one of the above) - explicit H mode /// Normally Hs are implicit so [#1] will not match. Turn on /// exph and it will match all Hs. Beware that smarts [H] does not /// mean a hydrogen -- rather an atom with 1 H. /// ///////////////////////////////////////////////////////////////////////////// /// author: Jeremy Yang /// date: 17 Feb 2004 ///////////////////////////////////////////////////////////////////////////// #include #include #include #include #include "openeye.h" #include "oeplatform.h" #include "oesystem.h" #include "oechem.h" using namespace std; using namespace OEChem; using namespace OESystem; int ParseCommandLine(int argc, char* argv[]); void Help(string msg) { cerr << msg << endl <<"\t************************************************************"<outfile] |"< |"< (lowcase, e.g., smi,mdl) |"< |"< (lowcase, e.g., smi,mdl) |"< |"< ... match limit [1024] |"< mol; // molecule iterator OEIter match; // match iterator OEIter > mp; // matchpair iterator OEGraphMol targetmol; // matched submol int i,j,k; // generic ints unsigned int loc; char buff[1024]=""; // term input buffer string smi,errs; // returned strings vector idxl; // vector of match symclasses for a mol vector > idxll; // vector of idxl vectors vector syms; // vector of symmetric match-ids int matchcount=0; // match count int hitcount=0; // hit count ParseCommandLine(argc,argv); if (!wegot_ifile) { ims.open(); if (ifmt) ims.SetFormat(ifmt); else Help("Input fmt required w/ stdin."); } if (!wegot_ofile) { oms.open(); if (!ofmt) ofmt=OEFormat::SMI; oms.SetFormat(ofmt); } if (!smarts) { printf("Enter smarts> "); cin.getline(buff,1023); buff[strlen(buff)]='\0'; smarts=buff; } if (!pat.Init(smarts)) { fprintf(stderr,"bad smarts: \"%s\"\n",smarts); Help(""); } else if (verbose) fprintf(stderr,"smarts ok: \"%s\"\n",smarts); if (maxmatches) { pat.SetMaxMatches(maxmatches); if (verbose) fprintf(stderr,"max matches set to %d.\n",maxmatches); } if (usa && verbose) fprintf(stderr,"usa mode on.\n"); if (exph && verbose) fprintf(stderr,"explicit-H mode on.\n"); if (asym && verbose) fprintf(stderr,"asymmetric mode on.\n"); OEThrow.SetOutputStream(oeerrs); for (i=0,mol=ims.GetMolBases();mol;++mol) { ++i; idxll.clear(); if (verbose) { OECreateCanSmiString(smi,mol); cerr << smi << " " << mol->GetTitle() << endl; } if (exph) OEAddExplicitHydrogens(mol); OEAssignHybridization(mol); OETriposAtomNames(mol); matchcount=0; if (asym) OEPerceiveSymmetry(mol); for (match=pat.Match(mol,usa);match;++match) { if (verbose) fprintf(stderr,"\t%smatch (%d): ",usa?"USA ":"",matchcount+1); idxl.clear(); for (mp=match->GetAtoms();mp;++mp) { if (verbose) cerr << mp->target->GetName(); idxl.push_back(mp->target->GetSymmetryClass()); } idxll.push_back(idxl); ++matchcount; if (verbose) { OESubsetMol(targetmol,match); OECreateCanSmiString(smi,targetmol); cerr << "\t" << smi << endl; targetmol.Clear(); } } if (asym) { /////////////////////////////////////////////////////// // Compare all matches for symmetric equivalence // Keep vector (syms) of unique redundant matches. /////////////////////////////////////////////////////// syms.clear(); for (j=0;j0) { OEWriteMolecule(oms,mol); ++hitcount; } } cerr << i << " mols; " << hitcount << " hits" <