1
/**********************************************************************
2
fingerprint.h - Base class for fingerprints and fast searching
4
Copyright (C) 2005 by Chris Morley
6
This file is part of the Open Babel project.
7
For more information, see <http://openbabel.sourceforge.net/>
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation version 2 of the License.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
***********************************************************************/
19
#ifndef OB_FINGERPRINT_H
20
#define OB_FINGERPRINT_H
30
class OBBase; //Forward declaration; used only as pointer.
32
/// \brief The base class for fingerprints
33
class OBAPI OBFingerprint
35
//see end of cpp file for detailed documentation
38
void SetBit(std::vector<unsigned int>& vec, unsigned int n);
40
/// Repeatedly ORs the top half with the bottom half until no smaller than nbits
41
void Fold(std::vector<unsigned int>& vec, unsigned int nbits);
43
/// Returns fingerprint in vector, which may be resized, folded to nbits (if nbits!=0)
44
virtual bool GetFingerprint(OBBase* pOb, std::vector<unsigned int>& fp, int nbits=0)=0;
46
/// Required short description of the fingerprint type.
47
virtual std::string Description()=0;
50
enum FptFlag{FPT_UNIQUEBITS=1};
51
virtual unsigned int Flags() { return 0;};
53
/// Obtain info on available fingerprints
54
static bool GetNextFPrt(std::string& id, OBFingerprint*& pFPrt);
56
/// Returns a pointer to a fingerprint (the default if ID is empty), or NULL if not available
57
static OBFingerprint* FindFingerprint(std::string& ID);
59
/// Returns the Tanimoto coefficient between two vectors (vector<unsigned int>& SeekPositions)
60
static double Tanimoto(const std::vector<unsigned int>& vec1, const std::vector<unsigned int>& vec2);
62
/// Inline version of Tanimoto() taking a pointer for the second vector
63
static double Tanimoto(const std::vector<unsigned int>& vec1, const unsigned int* p2)
65
///If used for two vectors, vec1 and vec2, call as Tanimoto(vec1, &vec2[0]);
66
int andbits=0, orbits=0;
68
for (i=0;i<vec1.size();++i)
70
int andfp = vec1[i] & p2[i];
71
int orfp = vec1[i] | p2[i];
73
for(;andfp;andfp=andfp<<1)
74
if(andfp<0) ++andbits;
75
for(;orfp;orfp=orfp<<1)
78
return((double)andbits/(double)orbits);
81
static unsigned int Getbitsperint(){ return bitsperint; }
84
///Function object to set bits
87
unsigned int operator()(const unsigned int a, const unsigned int b)
93
typedef std::map<std::string, OBFingerprint*> FPMapType;
94
typedef FPMapType::iterator Fptpos;
97
///This static function returns a reference to the FPtsMap
98
///which, because it is a static local variable is constructed only once.
99
///This fiddle is to avoid the "static initialization order fiasco"
100
///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
101
static FPMapType& FPtsMap()
103
static FPMapType* fptm = NULL;
105
fptm = new FPMapType;
109
OBFingerprint(std::string ID, bool IsDefault=false)
111
FPtsMap()[ID] = this; //registers the derived fingerprint class
112
if(IsDefault || FPtsMap().empty())
117
static OBFingerprint* _pDefault;
118
static const unsigned int bitsperint;// = 8 * sizeof(unsigned int);
125
//*************************************************************
126
//Fast search routines
127
///Header for fastsearch index file
128
struct OBAPI FptIndexHeader
130
unsigned int headerlength;///<offset to data: sizeof(FptIndexHeader)
131
unsigned int nEntries; ///<number of fingerprints
132
unsigned int words; ///<number 32bit words per fingerprint
133
char fpid[16]; ///<ID of the fingerprint type
134
char datafilename[256]; ///<the data that this is an index to
136
/// Structure of fastsearch index files
137
struct OBAPI FptIndex
139
FptIndexHeader header;
140
std::vector<unsigned int> fptdata;
141
std::vector<unsigned int> seekdata;
142
bool Read(std::istream* pIndexstream);
143
///\brief Returns pointer to FP used or NULL and an error message
144
OBFingerprint* CheckFP();
147
/// \brief Class to search fingerprint index files
148
class OBAPI FastSearch
150
//see end of cpp file for detailed documentation
152
std::string ReadIndex(std::istream* pIndexstream);
153
virtual ~FastSearch(){};
155
/// \brief Does substructure search and returns vector of the file positions of matches
156
bool Find(OBBase* pOb, std::vector<unsigned int>& SeekPositions, unsigned int MaxCandidates);
158
/// \brief Returns multimap containing objects whose Tanimoto coefficients with the target
159
/// is greater than the value specified.
160
bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
163
/// \brief Returns multimap containing the nCandidates objects with largest Tanimoto
164
/// coefficients with the target.
165
bool FindSimilar(OBBase* pOb, std::multimap<double, unsigned int>& SeekposMap,
168
/// \brief Returns a pointer to the fingerprint type used to constuct the index
169
OBFingerprint* GetFingerprint() const{ return _pFP;};
176
//**********************************************
177
/// \brief Class to prepare fingerprint index files See FastSearch class for details
178
class OBAPI FastSearchIndexer
180
//see end of cpp file for detailed documentation
182
///\brief Constructor with a new index
183
FastSearchIndexer(std::string& datafilename, std::ostream* os, std::string& fpid,
186
///\brief Constructor using existing index
187
FastSearchIndexer(FptIndex* pindex, std::ostream* os);
189
~FastSearchIndexer();
191
///\brief Called for each object
192
bool Add(OBBase* pOb, std::streampos seekpos);
195
std::ostream* _indexstream;
201
} //namespace OpenBabel
204
//! \file fingerprint.h
205
//! \brief Declaration of OBFingerprint base class and fastsearch classes