RDKit
Open-source cheminformatics and machine learning.
Seed.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #pragma once
12 #include <map>
13 #include "../RDKitBase.h"
14 #include "DebugTrace.h" // algorithm optimisation definitions
15 #include "Graph.h"
16 #include "DuplicatedSeedCache.h"
17 #include "SubstructMatchCustom.h"
18 
19 namespace RDKit {
20 namespace FMCS {
21 class MaximumCommonSubgraph;
22 struct TargetMatch;
23 
25  MolFragment { // Reference to a fragment of source molecule
26  std::vector<const Atom*> Atoms;
27  std::vector<const Bond*> Bonds;
28  std::vector<unsigned> AtomsIdx;
29  std::vector<unsigned> BondsIdx; // need for results and size() only !
30  std::map<unsigned, unsigned> SeedAtomIdxMap; // Full Query Molecule to Seed
31  // indices backward conversion
32  // map
33 };
34 
36  unsigned SourceAtomIdx{0}; // index in the seed. Atom is already in the seed
37  unsigned BondIdx{0}; // index in qmol of new bond scheduled to be added into
38  // seed. This is outgoing bond from SourceAtomIdx
39  unsigned NewAtomIdx{0}; // index in qmol of new atom scheduled to be added
40  // into seed. Another end of new bond
41  const Atom* NewAtom{nullptr}; // pointer to qmol's new atom scheduled to be
42  // added into seed. Another end of new bond
43  unsigned EndAtomIdx{0}; // index in the seed. RING. "New" Atom on the another
44  // end of new bond is already exists in the seed.
45 
47 
48  {}
49 
50  NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom,
51  unsigned to_atom, const Atom* a)
52  : SourceAtomIdx(from_atom),
53  BondIdx(bond_idx),
54  NewAtomIdx(new_atom),
55  NewAtom(a),
56  EndAtomIdx(to_atom) {}
57 };
58 
60  private:
61  mutable std::vector<NewBond> NewBonds; // for multistage growing. all
62  // directly connected outgoing bonds
63  public:
64  bool CopyComplete{false}; // this seed has been completely copied into list.
65  // postponed non-locked copy for MULTI_THREAD
66  mutable unsigned GrowingStage{0}; // 0 new seed; -1 finished; n>0 in
67  // progress, exact stage of growing for SDF
68  MolFragment MoleculeFragment; // Reference to a fragment of source molecule
69  Graph Topology; // seed topology with references to source molecule
70 
71  std::vector<bool> ExcludedBonds;
72  unsigned LastAddedAtomsBeginIdx{0}; // in this subgraph for improving
73  // performance of future growing
74  unsigned LastAddedBondsBeginIdx{0}; // in this subgraph for DEBUG ONLY
75  unsigned RemainingBonds{0};
76  unsigned RemainingAtoms{0};
77 #ifdef DUP_SUBSTRUCT_CACHE
79 #endif
80  std::vector<TargetMatch> MatchResult; // for each target
81  public:
82  Seed()
83 
84  {}
85 
86  void setMoleculeFragment(const Seed& src) {
87  MoleculeFragment = src.MoleculeFragment;
88  }
89  Seed& operator=(const Seed& src) {
90  NewBonds = src.NewBonds;
91  GrowingStage = src.GrowingStage;
92  MoleculeFragment = src.MoleculeFragment;
93  Topology = src.Topology;
94  ExcludedBonds = src.ExcludedBonds;
95  LastAddedAtomsBeginIdx = src.LastAddedAtomsBeginIdx;
96  LastAddedBondsBeginIdx = src.LastAddedBondsBeginIdx;
97  RemainingBonds = src.RemainingBonds;
98  RemainingAtoms = src.RemainingAtoms;
99 #ifdef DUP_SUBSTRUCT_CACHE
100  DupCacheKey = src.DupCacheKey;
101 #endif
102  MatchResult = src.MatchResult;
103  CopyComplete = true; // LAST
104  return *this;
105  }
106  void createFromParent(const Seed* parent) {
107  MoleculeFragment = parent->MoleculeFragment;
108  Topology = parent->Topology;
109  ExcludedBonds = parent->ExcludedBonds;
110  RemainingBonds = parent->RemainingBonds;
111  RemainingAtoms = parent->RemainingAtoms;
112 #ifdef DUP_SUBSTRUCT_CACHE
113  DupCacheKey = parent->DupCacheKey;
114 #endif
115  LastAddedAtomsBeginIdx = getNumAtoms(); // previous size
116  LastAddedBondsBeginIdx = getNumBonds(); // previous size
117  GrowingStage = 0;
118  }
119 
120  unsigned getNumAtoms() const { return MoleculeFragment.AtomsIdx.size(); }
121  unsigned getNumBonds() const { return MoleculeFragment.BondsIdx.size(); }
122 
123  void grow(MaximumCommonSubgraph& mcs) const;
124  bool canGrowBiggerThan(unsigned maxBonds,
125  unsigned maxAtoms) const { // prune()
126  return RemainingBonds + getNumBonds() > maxBonds ||
127  (RemainingBonds + getNumBonds() == maxBonds &&
128  RemainingAtoms + getNumAtoms() > maxAtoms);
129  }
130  void computeRemainingSize(const ROMol& qmol);
131 
132  unsigned addAtom(const Atom* atom);
133  unsigned addBond(const Bond* bond);
134  void fillNewBonds(const ROMol& qmol);
135 };
136 } // namespace FMCS
137 } // namespace RDKit
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
unsigned getNumBonds() const
Definition: Seed.h:121
bool canGrowBiggerThan(unsigned maxBonds, unsigned maxAtoms) const
Definition: Seed.h:124
void createFromParent(const Seed *parent)
Definition: Seed.h:106
unsigned LastAddedBondsBeginIdx
Definition: Seed.h:74
void grow(MaximumCommonSubgraph &mcs) const
unsigned GrowingStage
Definition: Seed.h:66
unsigned getNumAtoms() const
Definition: Seed.h:120
unsigned RemainingBonds
Definition: Seed.h:75
MolFragment MoleculeFragment
Definition: Seed.h:68
DuplicatedSeedCache::TKey DupCacheKey
Definition: Seed.h:78
std::vector< TargetMatch > MatchResult
Definition: Seed.h:80
unsigned RemainingAtoms
Definition: Seed.h:76
unsigned addAtom(const Atom *atom)
void setMoleculeFragment(const Seed &src)
Definition: Seed.h:86
void fillNewBonds(const ROMol &qmol)
std::vector< bool > ExcludedBonds
Definition: Seed.h:71
unsigned LastAddedAtomsBeginIdx
Definition: Seed.h:72
void computeRemainingSize(const ROMol &qmol)
unsigned addBond(const Bond *bond)
Graph Topology
Definition: Seed.h:69
Seed & operator=(const Seed &src)
Definition: Seed.h:89
#define RDKIT_FMCS_EXPORT
Definition: export.h:153
Std stuff.
Definition: Abbreviations.h:19
std::vector< unsigned > AtomsIdx
Definition: Seed.h:28
std::vector< const Atom * > Atoms
Definition: Seed.h:26
std::vector< const Bond * > Bonds
Definition: Seed.h:27
std::vector< unsigned > BondsIdx
Definition: Seed.h:29
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:30
NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom, unsigned to_atom, const Atom *a)
Definition: Seed.h:50