RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017-2022 Novartis Institutes for BioMedical Research and
3 // other RDKit contributors
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef RGROUP_DECOMP_DATA
12 #define RGROUP_DECOMP_DATA
13 
14 #include "RGroupCore.h"
15 #include "RGroupDecomp.h"
16 #include "RGroupMatch.h"
17 #include "RGroupScore.h"
18 #include "RGroupFingerprintScore.h"
19 #include "RGroupGa.h"
20 #include <vector>
21 #include <map>
22 
23 // #define VERBOSE 1
24 
25 namespace RDKit {
26 
27 extern const std::string _rgroupInputDummy;
28 
30  // matches[mol_idx] == vector of potential matches
31  std::map<int, RCore> cores;
32  std::map<std::string, int> newCores; // new "cores" found along the way
34  // this caches the running product of permutations
35  // across calls to process()
36  size_t permutationProduct = 1;
37  // this caches the size of the previous matches vector
38  // such that the size of the current chunk can be inferred
39  size_t previousMatchSize = 0;
40  // the default for Greedy/GreedyChunks is keeping only the best
41  // permutation after each call to process()
42  bool prunePermutations = true;
44 
45  std::vector<std::vector<RGroupMatch>> matches;
46  std::set<int> labels;
47  std::vector<size_t> permutation;
48  unsigned int pruneLength = 0U;
50  std::map<int, std::vector<int>> userLabels;
51 
52  std::vector<int> processedRlabels;
53 
54  std::map<int, int> finalRlabelMapping;
56 
57  RGroupDecompData(const RWMol &inputCore,
59  : params(std::move(inputParams)) {
60  addCore(inputCore);
61  prepareCores();
62  }
63 
64  RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
66  : params(std::move(inputParams)) {
67  for (const auto &core : inputCores) {
68  addCore(*core);
69  }
70  prepareCores();
71  }
72 
73  void addCore(const ROMol &inputCore) {
75  RWMol core(inputCore);
77  cores[cores.size()] = RCore(core);
78  } else {
79  cores[cores.size()] = RCore(inputCore);
80  }
81  }
82 
83  void prepareCores() {
84  for (auto &core : cores) {
85  RWMol *alignCore = core.first ? cores[0].core.get() : nullptr;
86  CHECK_INVARIANT(params.prepareCore(*core.second.core, alignCore),
87  "Could not prepare at least one core");
88  core.second.init();
89  core.second.labelledCore.reset(new RWMol(*core.second.core));
90  }
91  }
92 
93  void setRlabel(Atom *atom, int rlabel) {
94  PRECONDITION(rlabel > 0, "RLabels must be >0");
96  atom->setAtomMapNum(rlabel);
97  }
98 
100  std::string dLabel = "R" + std::to_string(rlabel);
101  atom->setProp(common_properties::dummyLabel, dLabel);
102  setAtomRLabel(atom, rlabel);
103  }
104 
106  atom->setIsotope(rlabel);
107  }
108  }
109 
110  int getRlabel(Atom *atom) const {
112  return atom->getAtomMapNum();
113  }
115  return atom->getIsotope();
116  }
117 
119  unsigned int label = 0;
121  return label;
122  }
123  }
124 
125  CHECK_INVARIANT(0, "no valid r label found");
126  }
127 
128  double scoreFromPrunedData(const std::vector<size_t> &permutation,
129  bool reset = true) {
130  PRECONDITION(
132  "Scoring method is not fingerprint variance!");
133 
135  "Illegal permutation prune length");
136  if (permutation.size() < pruneLength * 1.5) {
137  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
139  pos, permutation[pos], matches, labels);
140  }
141  double score =
143  if (reset) {
144  for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
146  pos, permutation[pos], matches, labels);
147  }
148  } else {
149  pruneLength = permutation.size();
150  }
151  return score;
152  } else {
153  if (reset) {
155  } else {
157  pruneLength = permutation.size();
160  }
161  }
162  }
163 
164  void prune() { // prune all but the current "best" permutation of matches
165  PRECONDITION(permutation.size() <= matches.size(),
166  "permutation.size() should be <= matches.size()");
167  size_t offset = matches.size() - permutation.size();
168  for (size_t mol_idx = 0; mol_idx < permutation.size(); ++mol_idx) {
169  std::vector<RGroupMatch> keepVector;
170  size_t mi = mol_idx + offset;
171  keepVector.push_back(matches[mi].at(permutation[mol_idx]));
172  matches[mi] = keepVector;
173  }
174 
175  permutation = std::vector<size_t>(permutation.size(), 0);
179  }
180  }
181 
182  // Return the RGroups with the current "best" permutation
183  // of matches.
184  std::vector<RGroupMatch> GetCurrentBestPermutation() const {
185  const bool removeAllHydrogenRGroups =
188 
189  std::vector<RGroupMatch> results; // std::map<int, RGroup> > result;
190  bool isPruned = (permutation.size() < matches.size());
191  for (size_t i = 0; i < matches.size(); ++i) {
192  size_t pi = (isPruned ? 0 : permutation.at(i));
193  results.push_back(matches[i].at(pi));
194  }
195 
196  // * if a dynamically-added RGroup (i.e., when onlyMatchAtRGroups=false)
197  // is all hydrogens, remove it
198  // * if a user-defined RGroup is all hydrogens and either
199  // params.removeAllHydrogenRGroups==true or
200  // params.removeAllHydrogenRGroupsAndLabels==true, remove it
201 
202  // This logic is a bit tricky, find all labels that have common cores
203  // and analyze those sets independently.
204  // i.e. if core 1 doesn't have R1 then don't analyze it in when looking
205  // at label 1
206  std::map<int, std::set<int>> labelCores; // map from label->cores
207  std::set<int> coresVisited;
208  for (auto &position : results) {
209  int core_idx = position.core_idx;
210  if (coresVisited.find(core_idx) == coresVisited.end()) {
211  coresVisited.insert(core_idx);
212  auto core = cores.find(core_idx);
213  if (core != cores.end()) {
214  for (auto rlabels : getRlabels(*core->second.core)) {
215  int rlabel = rlabels.first;
216  labelCores[rlabel].insert(core_idx);
217  }
218  }
219  }
220  }
221 
222  for (int label : labels) {
223  if (label > 0 && !removeAllHydrogenRGroups) {
224  continue;
225  }
226  bool allH = true;
227  for (auto &position : results) {
228  R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
229  bool labelHasCore = labelCores[label].find(position.core_idx) !=
230  labelCores[label].end();
231  if (labelHasCore && rgroup != position.rgroups.end() &&
232  !rgroup->second->is_hydrogen) {
233  allH = false;
234  break;
235  }
236  }
237 
238  if (allH) {
239  for (auto &position : results) {
240  position.rgroups.erase(label);
241  }
242  }
243  }
244  return results;
245  }
246 
247  class UsedLabels {
248  public:
249  std::set<int> labels_used;
250  bool add(int rlabel) {
251  if (labels_used.find(rlabel) != labels_used.end()) {
252  return false;
253  }
254  labels_used.insert(rlabel);
255  return true;
256  }
257 
258  int next() {
259  int i = 1;
260  while (labels_used.find(i) != labels_used.end()) {
261  ++i;
262  }
263  labels_used.insert(i);
264  return i;
265  }
266  };
267 
268  void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels) {
269  auto atoms = getRlabels(core);
270  for (const auto &p : atoms) {
271  if (p.first > 0) {
272  userLabels.insert(p.first);
273  }
274  }
275  }
276 
277  void addAtoms(RWMol &mol,
278  const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
279  for (const auto &i : atomsToAdd) {
280  mol.addAtom(i.second, false, true);
281  mol.addBond(i.first, i.second, Bond::SINGLE);
282  if (mol.getNumConformers()) {
283  MolOps::setTerminalAtomCoords(mol, i.second->getIdx(),
284  i.first->getIdx());
285  }
286  }
287  }
288 
289  void relabelCore(RWMol &core, std::map<int, int> &mappings,
290  UsedLabels &used_labels, const std::set<int> &indexLabels,
291  const std::map<int, std::vector<int>> &extraAtomRLabels) {
292  // Now remap to proper rlabel ids
293  // if labels are positive, they come from User labels
294  // if they are negative, they come from indices and should be
295  // numbered *after* the user labels.
296  //
297  // Some indices are attached to multiple bonds,
298  // these rlabels should be incrementally added last
299  std::map<int, Atom *> atoms = getRlabels(core);
300  // a core only has one labelled index
301  // a secondary structure extraAtomRLabels contains the number
302  // of bonds between this atom and the side chain
303 
304  // a sidechain atom has a vector of the attachments back to the
305  // core that takes the place of numBondsToRlabel
306 
307  std::map<int, std::vector<int>> bondsToCore;
308  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
309 
310  // Deal with user supplied labels
311  for (const auto &rlabels : atoms) {
312  int userLabel = rlabels.first;
313  if (userLabel < 0) {
314  continue; // not a user specified label
315  }
316  Atom *atom = rlabels.second;
317  mappings[userLabel] = userLabel;
318  used_labels.add(userLabel);
319 
320  if (atom->getAtomicNum() == 0 &&
321  atom->getDegree() == 1) { // add to existing dummy/rlabel
322  setRlabel(atom, userLabel);
323  } else { // adds new rlabel
324  auto *newAt = new Atom(0);
325  setRlabel(newAt, userLabel);
326  atomsToAdd.emplace_back(atom, newAt);
327  }
328  }
329 
330  // Deal with non-user supplied labels
331  for (auto newLabel : indexLabels) {
332  auto atm = atoms.find(newLabel);
333  if (atm == atoms.end()) {
334  continue;
335  }
336 
337  Atom *atom = atm->second;
338 
339  int rlabel;
340  auto mapping = mappings.find(newLabel);
341  if (mapping == mappings.end()) {
342  rlabel = used_labels.next();
343  mappings[newLabel] = rlabel;
344  } else {
345  rlabel = mapping->second;
346  }
347 
348  if (atom->getAtomicNum() == 0 &&
350  *atom)) { // add to dummy
351  setRlabel(atom, rlabel);
352  } else {
353  auto *newAt = new Atom(0);
354  setRlabel(newAt, rlabel);
355  atomsToAdd.emplace_back(atom, newAt);
356  }
357  }
358 
359  // Deal with multiple bonds to the same label
360  for (const auto &extraAtomRLabel : extraAtomRLabels) {
361  auto atm = atoms.find(extraAtomRLabel.first);
362  if (atm == atoms.end()) {
363  continue; // label not used in the rgroup
364  }
365  Atom *atom = atm->second;
366 
367  for (size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
368  int rlabel = used_labels.next();
369  // Is this necessary?
371  atom->getAtomicNum() > 1,
372  "Multiple attachments to a dummy (or hydrogen) is weird.");
373  auto *newAt = new Atom(0);
374  setRlabel(newAt, rlabel);
375  atomsToAdd.emplace_back(atom, newAt);
376  }
377  }
378 
379  addAtoms(core, atomsToAdd);
380  for (const auto &rlabels : atoms) {
381  auto atom = rlabels.second;
382  atom->clearProp(RLABEL);
383  atom->clearProp(RLABEL_TYPE);
384  }
385  core.updatePropertyCache(false); // this was github #1550
386  }
387 
388  void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings) {
389  PRECONDITION(rgroup.combinedMol.get(), "Unprocessed rgroup");
390 
391  RWMol &mol = *rgroup.combinedMol.get();
392 
393  if (rgroup.combinedMol->hasProp(done)) {
394  rgroup.labelled = true;
395  return;
396  }
397 
398  mol.setProp(done, true);
399  std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
400  std::map<int, int> rLabelCoreIndexToAtomicWt;
401 
402  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
403  ++atIt) {
404  Atom *atom = *atIt;
405  if (atom->hasProp(SIDECHAIN_RLABELS)) {
406  atom->setIsotope(0);
407  const std::vector<int> &rlabels =
408  atom->getProp<std::vector<int>>(SIDECHAIN_RLABELS);
409  // switch on atom mappings or rlabels....
410 
411  for (int rlabel : rlabels) {
412  auto label = mappings.find(rlabel);
413  CHECK_INVARIANT(label != mappings.end(), "Unprocessed mapping");
414 
415  if (atom->getAtomicNum() == 0) {
416  if (!atom->hasProp(_rgroupInputDummy)) {
417  setRlabel(atom, label->second);
418  }
419  } else if (atom->hasProp(RLABEL_CORE_INDEX)) {
420  atom->setAtomicNum(0);
421  setRlabel(atom, label->second);
422  } else {
423  auto *newAt = new Atom(0);
424  setRlabel(newAt, label->second);
425  atomsToAdd.emplace_back(atom, newAt);
426  }
427  }
428  }
429  if (atom->hasProp(RLABEL_CORE_INDEX)) {
430  // convert to dummy as we don't want to collapse hydrogens onto the core
431  // match
432  auto rLabelCoreIndex = atom->getProp<int>(RLABEL_CORE_INDEX);
433  rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->getAtomicNum();
434  atom->setAtomicNum(0);
435  }
436  }
437 
438  addAtoms(mol, atomsToAdd);
439 
441  RDLog::LogStateSetter blocker;
442  bool implicitOnly = false;
443  bool updateExplicitCount = false;
444  bool sanitize = false;
445  MolOps::removeHs(mol, implicitOnly, updateExplicitCount, sanitize);
446  }
447 
448  mol.updatePropertyCache(false); // this was github #1550
449  rgroup.labelled = true;
450 
451  // Restore any core matches that we have set to dummy
452  for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
453  ++atIt) {
454  Atom *atom = *atIt;
455  if (atom->hasProp(RLABEL_CORE_INDEX)) {
456  // don't need to set IsAromatic on atom - that seems to have been saved
457  atom->setAtomicNum(
458  rLabelCoreIndexToAtomicWt[atom->getProp<int>(RLABEL_CORE_INDEX)]);
459  atom->setNoImplicit(true);
461  }
463  }
464 
465 #ifdef VERBOSE
466  std::cerr << "Relabel Rgroup smiles " << MolToSmiles(mol) << std::endl;
467 #endif
468  }
469 
470  // relabel the core and sidechains using the specified user labels
471  // if matches exist for non labelled atoms, these are added as well
472  void relabel() {
473  std::vector<RGroupMatch> best = GetCurrentBestPermutation();
474 
475  // get the labels used
476  std::set<int> userLabels;
477  std::set<int> indexLabels;
478 
479  // Go through all the RGroups and find out which labels were
480  // actually used.
481 
482  // some atoms will have multiple attachment points, i.e. cycles
483  // split these up into new rlabels if necessary
484  // These are detected at match time
485  // This vector will hold the extra (new) labels required
486  std::map<int, std::vector<int>> extraAtomRLabels;
487 
488  for (auto &it : best) {
489  for (auto &rgroup : it.rgroups) {
490  if (rgroup.first > 0) {
491  userLabels.insert(rgroup.first);
492  }
493  if (rgroup.first < 0 && !params.onlyMatchAtRGroups) {
494  indexLabels.insert(rgroup.first);
495  }
496 
497  std::map<int, int> rlabelsUsedInRGroup =
498  rgroup.second->getNumBondsToRlabels();
499  for (auto &numBondsUsed : rlabelsUsedInRGroup) {
500  // Make space for the extra labels
501  if (numBondsUsed.second > 1) { // multiple rgroup bonds to same atom
502  extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
503  1);
504  }
505  }
506  }
507  }
508 
509  // find user labels that are not present in the decomposition
510  for (auto &core : cores) {
511  core.second.labelledCore.reset(new RWMol(*core.second.core));
512  addCoreUserLabels(*core.second.labelledCore, userLabels);
513  }
514 
515  // Assign final RGroup labels to the cores and propagate these to
516  // the scaffold
517  finalRlabelMapping.clear();
518 
519  UsedLabels used_labels;
520  // Add all the user labels now to prevent an index label being assigned to a
521  // user label when multiple cores are present (e.g. the user label is
522  // present in the second core, but not the first).
523  for (auto userLabel : userLabels) {
524  used_labels.add(userLabel);
525  }
526  for (auto &core : cores) {
527  relabelCore(*core.second.labelledCore, finalRlabelMapping, used_labels,
528  indexLabels, extraAtomRLabels);
529  }
530 
531  for (auto &it : best) {
532  for (auto &rgroup : it.rgroups) {
533  relabelRGroup(*rgroup.second, finalRlabelMapping);
534  }
535  }
536 
537  std::set<int> uniqueMappedValues;
538  std::transform(finalRlabelMapping.cbegin(), finalRlabelMapping.cend(),
539  std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
540  [](const std::pair<int, int> &p) { return p.second; });
541  CHECK_INVARIANT(finalRlabelMapping.size() == uniqueMappedValues.size(),
542  "Error in uniqueness of final RLabel mapping");
544  uniqueMappedValues.size() == userLabels.size() + indexLabels.size(),
545  "Error in final RMapping size");
546  }
547 
548  double score(const std::vector<size_t> &permutation,
549  FingerprintVarianceScoreData *fingerprintVarianceScoreData =
550  nullptr) const {
551  RGroupScore scoreMethod = static_cast<RGroupScore>(params.scoreMethod);
552  switch (scoreMethod) {
553  case Match:
555  break;
556  case FingerprintVariance:
558  fingerprintVarianceScoreData);
559  break;
560  default:;
561  }
562  return NAN;
563  }
564 
566  bool finalize = false) {
567  if (matches.empty()) {
568  return RGroupDecompositionProcessResult(false, -1);
569  }
570  auto t0 = std::chrono::steady_clock::now();
571  std::unique_ptr<CartesianProduct> iterator;
573 
574  if (params.matchingStrategy == GA) {
575  RGroupGa ga(*this, params.timeout >= 0 ? &t0 : nullptr);
576  if (ga.numberPermutations() < 100 * ga.getPopsize()) {
578  } else {
579  if (params.gaNumberRuns > 1) {
580  auto results = ga.runBatch();
581  auto best = max_element(results.begin(), results.end(),
582  [](const GaResult &a, const GaResult &b) {
583  return a.rGroupScorer.getBestScore() <
584  b.rGroupScorer.getBestScore();
585  });
586  rGroupScorer = best->rGroupScorer;
587  } else {
588  auto result = ga.run();
589  rGroupScorer = result.rGroupScorer;
590  }
591  }
592  }
593  size_t offset = 0;
594  if (params.matchingStrategy != GA) {
595  // Exhaustive search, get the MxN matrix
596  // (M = matches.size(): number of molecules
597  // N = iterator.maxPermutations)
598  std::vector<size_t> permutations;
599 
600  if (pruneMatches && params.scoreMethod != FingerprintVariance) {
601  offset = previousMatchSize;
602  }
603  previousMatchSize = matches.size();
604  std::transform(
605  matches.begin() + offset, matches.end(),
606  std::back_inserter(permutations),
607  [](const std::vector<RGroupMatch> &m) { return m.size(); });
608  permutation = std::vector<size_t>(permutations.size(), 0);
609 
610  // run through all possible matches and score each
611  // set
612  size_t count = 0;
613 #ifdef DEBUG
614  std::cerr << "Processing" << std::endl;
615 #endif
616  std::unique_ptr<CartesianProduct> it(new CartesianProduct(permutations));
617  iterator = std::move(it);
618  // Iterates through the permutation idx, i.e.
619  // [m1_permutation_idx, m2_permutation_idx, m3_permutation_idx]
620 
621  while (iterator->next()) {
622  if (count > iterator->maxPermutations) {
623  throw ValueErrorException("next() did not finish");
624  }
625 #ifdef DEBUG
626  std::cerr << "**************************************************"
627  << std::endl;
628 #endif
629  double newscore = params.scoreMethod == FingerprintVariance
630  ? scoreFromPrunedData(iterator->permutation)
631  : score(iterator->permutation);
632 
633  if (fabs(newscore - rGroupScorer.getBestScore()) <
634  1e-6) { // heuristic to overcome floating point comparison issues
635  rGroupScorer.pushTieToStore(iterator->permutation);
636  } else if (newscore > rGroupScorer.getBestScore()) {
637 #ifdef DEBUG
638  std::cerr << " ===> current best:" << newscore << ">"
639  << rGroupScorer.getBestScore() << std::endl;
640 #endif
641  rGroupScorer.setBestPermutation(iterator->permutation, newscore);
643  rGroupScorer.pushTieToStore(iterator->permutation);
644  }
645  ++count;
646  }
647  }
648 
649  if (rGroupScorer.tieStoreSize() > 1) {
652  } else {
654  }
656  if (pruneMatches || finalize) {
657  prune();
658  }
659 
660  if (finalize) {
661  relabel();
662  }
663 
665  }
666 };
667 } // namespace RDKit
668 
669 #endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
The class for representing atoms.
Definition: Atom.h:68
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
Definition: Atom.h:220
void setAtomicNum(int newNum)
sets our atomic number
Definition: Atom.h:128
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
int getAtomMapNum() const
Definition: Atom.h:388
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
Definition: Atom.h:376
unsigned int getIsotope() const
returns our isotope number
Definition: Atom.h:240
unsigned int getDegree() const
@ SINGLE
Definition: Bond.h:58
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:121
void clearProp(const std::string &key) const
clears the value of a property
Definition: RDProps.h:137
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
Definition: RDProps.h:107
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: RDProps.h:126
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
Definition: RDProps.h:77
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
Definition: RGroupGa.h:127
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void breakTies(const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition: RGroupScore.h:83
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition: RGroupScore.h:99
double getBestScore() const
return the best score found so far
Definition: RGroupScore.h:101
unsigned int getNumConformers() const
Definition: ROMol.h:560
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
static std::string to_string(const Descriptor &desc)
Definition: Descriptor.h:54
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
Std stuff.
Definition: Abbreviations.h:19
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
@ Exhaustive
Definition: RGroupDecomp.h:43
const std::string _rgroupInputDummy
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:213
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
Definition: RGroupUtils.h:74
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:29
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
iterate through all possible permutations of the rgroups
Definition: RGroupScore.h:20
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
Definition: RGroupCore.h:24
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addCore(const ROMol &inputCore)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core)
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
Definition: RGroupDecomp.h:84
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
Definition: RGroupDecomp.h:86
double timeout
timeout in seconds. <=0 indicates no timeout
Definition: RGroupDecomp.h:97
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
Definition: RGroupDecomp.h:91
bool prepareCore(RWMol &, const RWMol *alignCore)