RDKit
Open-source cheminformatics and machine learning.
StructChecker.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2016 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file StructChecker.h
12 
13 \brief Contains the public API of the StructChecker
14 
15 \b Note that this should be considered beta and that the API may change in
16 future
17 releases.
18 
19 */
20 #include <RDGeneral/export.h>
21 #pragma once
22 #ifndef RD_STRUCTCHECKER_H_Oct2016
23 #define RD_STRUCTCHECKER_H_Oct2016
24 
25 #include <string>
26 #include <vector>
27 #include "../RDKitBase.h"
28 
29 namespace RDKit {
30 namespace StructureCheck {
31 
32 // Flags for the return values of the StructureChecker
33 
34 // TypeDefs for translating augmented atom pairs
35 static const int ANY_CHARGE = 8;
37  RT_NONE = 0,
38  SINGLET = 1,
39  DOUBLET = 2,
40  TRIPLET = 3,
41  ANY_RADICAL = 0xFF
42 };
43 
44 enum AABondType { // MDL CTFile bond types plus extensions
45  BT_NONE = 0, // means REMOVE Bond
46  SINGLE = 1,
47  DOUBLE = 2,
48  TRIPLE = 3,
49  AROMATIC = 4,
53  ANY_BOND = 8,
54  ALL_BOND_TYPES = 0xF
55 };
56 
57 enum AATopology {
58  TP_NONE = 0, // Don't care
59  RING = 1, // Ring
60  CHAIN = 2 // Chain
61 };
62 
64  std::string AtomSymbol;
65  int Charge;
67  unsigned SubstitutionCount; // substitution count 0 = don't care
70  : Charge(ANY_CHARGE),
71  Radical(ANY_RADICAL),
72  SubstitutionCount(0),
73  BondType(ANY_BOND) {}
74 };
75 
77  std::string AtomSymbol;
78  std::string ShortName;
79  int Charge;
82  std::vector<Ligand> Ligands;
83 
85  : Charge(ANY_CHARGE), Radical(ANY_RADICAL), Topology(TP_NONE) {}
86 
87  AugmentedAtom(const std::string &symbol, const std::string &name, int charge,
88  RadicalType radical, AATopology topology)
89  : AtomSymbol(symbol),
90  ShortName(name),
91  Charge(charge),
92  Radical(radical),
93  Topology(topology) {}
94 };
95 
97  std::string AtomSymbol;
98  double LocalInc;
99  double AlphaInc;
100  double BetaInc;
101  double MultInc;
102 
103  // Used for logging
108 };
109 
112  double Cond;
113  // Used for logging
115 };
116 //-------------
117 
118 //! Structure Check Options
119 /// Holds all the user options for the StructureChecking.
120 /// Can be initialized from factory functions, perhaps serialized
122  double AcidityLimit;
127  unsigned MaxMolSize;
134  bool Verbose;
135 
136  // Internal data for struchk
137  std::vector<std::pair<AugmentedAtom, AugmentedAtom>> AugmentedAtomPairs;
138  std::vector<AugmentedAtom> AcidicAtoms;
139  std::vector<AugmentedAtom> GoodAtoms;
140  std::vector<ROMOL_SPTR> Patterns;
141  std::vector<ROMOL_SPTR> RotatePatterns;
142  std::vector<ROMOL_SPTR> StereoPatterns;
143  std::vector<ROMOL_SPTR> FromTautomer;
144  std::vector<ROMOL_SPTR> ToTautomer;
145 
146  double Elneg0; // elneg_table[0].value;
147  std::map<unsigned, double> ElnegTable; // AtomicNumber -> eleng
148  std::vector<IncEntry> AtomAcidity; // atom_acidity_table[]
149  std::vector<IncEntry> ChargeIncTable;
150  // std::map AtomSymbol(or AtomicNumber) -> IncEntry
151  /* [ReadTransformation() ]
152  * The alpha, beta coefficients of the transfomation function used
153  * to stretch the preliminary pKa values to the actual predictions.
154  * The function is pKa = 7 + (pKa'-7)*beta + ((pKa'-7)*alpha)^3.
155  */
156 
157  double Alpha, Beta;
158  std::vector<PathEntry> AlphaPathTable, BetaPathTable;
159 
160  public:
162 
163  void clear() { *this = StructCheckerOptions(); }
164 
165  bool loadAugmentedAtomTranslations(const std::string &path);
167  const std::vector<std::pair<AugmentedAtom, AugmentedAtom>> &aaPairs);
168 
169  bool loadAcidicAugmentedAtoms(const std::string &path);
170  void setAcidicAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
171 
172  bool loadGoodAugmentedAtoms(const std::string &path);
173  void setGoodAugmentedAtoms(const std::vector<AugmentedAtom> &acidicAtoms);
174 
175  bool loadPatterns(const std::string &path); // file with clean patterns
177  const std::vector<std::string> &smarts); // can throw RDKit exceptions
178  void setPatterns(const std::vector<ROMOL_SPTR> &p);
179 
181  const std::string &path); // file with rotate patterns
183  const std::vector<std::string> &smarts); // can throw RDKit exceptions
184  void setRotatePatterns(const std::vector<ROMOL_SPTR> &p);
185 
187  const std::string &path); // file with stereo patterns
189  const std::vector<std::string> &smarts); // can throw RDKit exceptions
190  void setStereoPatterns(const std::vector<ROMOL_SPTR> &p);
191 
192  bool loadTautomerData(const std::string &path); // file path
193  void parseTautomerData(const std::vector<std::string> &smartsFrom,
194  const std::vector<std::string> &smartsTo);
195  void setTautomerData(const std::vector<ROMOL_SPTR> &from,
196  const std::vector<ROMOL_SPTR> &to);
197  bool loadChargeDataTables(const std::string &path); // file path
198 };
199 
200 RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json,
202 
205  const std::string &augmentedAtomTranslationsFile = "",
206  // ?? AcidicAtoms;
207  // ?? GoodAtoms;
208  const std::string &patternFile = "", // file with clean patterns
209  const std::string &rotatePatternFile = "", // file with rotate patterns
210  const std::string &stereoPatternFile = "", // file with stereo patterns
211  const std::string &tautomerFile = "");
212 
213 //! \brief Class for performing structure validation and cleanup
214 /*! \b NOTE: This class should be considered beta. The API may change in future
215 releases.
216 
217 Examples of Usage
218 
219 \code
220  StructChecker chk;
221  int flags = StructureCheck::checkMolStructure( mol ); // use defaults
222 \endcode
223 
224 or
225 
226 \code
227  StructureCheck::StructCheckerOptions options; // use defaults
228  // To use external data
229  StructureCheck::loadOptionsFromFiles(options, file1, file2);
230  StructChecker chk(options);
231 
232  for( mol in mols ) {
233  int flags = StructureCheck::checkMolStructure( mol, &options);
234  if (0!=(flags & StructureCheck::StructureFlags::BAD_SET)) {
235  // write to error file
236  } else if (0!=(flags & StructureCheck::StructureFlags::TRANSFORMED_SET))
237 {
238  // input molecule was transformed
239  } else { // flag == NO_CHANGE
240  // no change
241  }
242  }
243 \endcode
244 */
246  public:
247  typedef enum StructureFlags {
248  NO_CHANGE = 0,
249  BAD_MOLECULE = 0x0001,
250  ALIAS_CONVERSION_FAILED = 0x0002,
251  STEREO_ERROR = 0x0004,
252  STEREO_FORCED_BAD = 0x0008,
253  ATOM_CLASH = 0x0010,
254  ATOM_CHECK_FAILED = 0x0020,
255  SIZE_CHECK_FAILED = 0x0040,
256  // reserved error = 0x0080,
257  TRANSFORMED = 0x0100,
258  FRAGMENTS_FOUND = 0x0200,
259  EITHER_WARNING = 0x0400,
260  DUBIOUS_STEREO_REMOVED = 0x0800,
261  RECHARGED = 0x1000,
262  STEREO_TRANSFORMED = 0x2000,
263  TEMPLATE_TRANSFORMED = 0x4000,
264  TAUTOMER_TRANSFORMED = 0x8000,
265  // mask:
266  BAD_SET = (BAD_MOLECULE | ALIAS_CONVERSION_FAILED | STEREO_ERROR |
267  STEREO_FORCED_BAD | ATOM_CLASH | ATOM_CHECK_FAILED |
268  SIZE_CHECK_FAILED),
269 
270  TRANSFORMED_SET = (TRANSFORMED | FRAGMENTS_FOUND | EITHER_WARNING |
271  DUBIOUS_STEREO_REMOVED | STEREO_TRANSFORMED |
272  TEMPLATE_TRANSFORMED | TAUTOMER_TRANSFORMED | RECHARGED),
273  } StructureFlags;
274  // attributes:
275  private:
276  StructCheckerOptions Options;
277 
278  public:
279  inline StructChecker() {}
280  inline StructChecker(const StructCheckerOptions &options)
281  : Options(options) {}
282 
283  const StructCheckerOptions &GetOptions() const { return Options; }
284  void SetOptions(const StructCheckerOptions &options) { Options = options; }
285 
286  // Check and fix (if need) molecule structure and return a set of
287  // StructureFlags
288  // that describes what have been done
289  unsigned checkMolStructure(RWMol &mol) const;
290 
291  // an instance independent helper methods:
292  // Converts structure property flags to a comma separated string
293  static std::string StructureFlagsToString(unsigned flags);
294  // Converts a comma separated string to a StructureFlag unsigned integer
295  static unsigned StringToStructureFlags(const std::string &str);
296  // internal implementation:
297  private:
298 };
299 } // namespace StructureCheck
300 } // namespace RDKit
301 #endif
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
Class for performing structure validation and cleanup.
StructChecker(const StructCheckerOptions &options)
static std::string StructureFlagsToString(unsigned flags)
const StructCheckerOptions & GetOptions() const
static unsigned StringToStructureFlags(const std::string &str)
void SetOptions(const StructCheckerOptions &options)
unsigned checkMolStructure(RWMol &mol) const
#define RDKIT_STRUCTCHECKER_EXPORT
Definition: export.h:465
static const char * symbol[119]
Definition: mf.h:257
RDKIT_STRUCTCHECKER_EXPORT bool loadOptionsFromFiles(StructCheckerOptions &op, const std::string &augmentedAtomTranslationsFile="", const std::string &patternFile="", const std::string &rotatePatternFile="", const std::string &stereoPatternFile="", const std::string &tautomerFile="")
RDKIT_STRUCTCHECKER_EXPORT bool parseOptionsJSON(const std::string &json, StructCheckerOptions &op)
static const int ANY_CHARGE
Definition: StructChecker.h:35
Std stuff.
Definition: Abbreviations.h:19
AugmentedAtom(const std::string &symbol, const std::string &name, int charge, RadicalType radical, AATopology topology)
Definition: StructChecker.h:87
std::vector< AugmentedAtom > GoodAtoms
void setRotatePatterns(const std::vector< ROMOL_SPTR > &p)
bool loadAcidicAugmentedAtoms(const std::string &path)
void setAugmentedAtomTranslations(const std::vector< std::pair< AugmentedAtom, AugmentedAtom >> &aaPairs)
bool loadRotatePatterns(const std::string &path)
bool loadChargeDataTables(const std::string &path)
bool loadGoodAugmentedAtoms(const std::string &path)
std::map< unsigned, double > ElnegTable
void setGoodAugmentedAtoms(const std::vector< AugmentedAtom > &acidicAtoms)
bool loadAugmentedAtomTranslations(const std::string &path)
std::vector< ROMOL_SPTR > StereoPatterns
void parseStereoPatterns(const std::vector< std::string > &smarts)
std::vector< AugmentedAtom > AcidicAtoms
void parseTautomerData(const std::vector< std::string > &smartsFrom, const std::vector< std::string > &smartsTo)
void parsePatterns(const std::vector< std::string > &smarts)
std::vector< ROMOL_SPTR > RotatePatterns
bool loadStereoPatterns(const std::string &path)
void setPatterns(const std::vector< ROMOL_SPTR > &p)
void setStereoPatterns(const std::vector< ROMOL_SPTR > &p)
void setTautomerData(const std::vector< ROMOL_SPTR > &from, const std::vector< ROMOL_SPTR > &to)
void setAcidicAugmentedAtoms(const std::vector< AugmentedAtom > &acidicAtoms)
void parseRotatePatterns(const std::vector< std::string > &smarts)
std::vector< std::pair< AugmentedAtom, AugmentedAtom > > AugmentedAtomPairs
bool loadPatterns(const std::string &path)
bool loadTautomerData(const std::string &path)