1
// -*- mode: C++; tab-width: 2; -*-
4
// --------------------------------------------------------------------------
5
// OpenMS Mass Spectrometry Framework
6
// --------------------------------------------------------------------------
7
// Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
9
// This library is free software; you can redistribute it and/or
10
// modify it under the terms of the GNU Lesser General Public
11
// License as published by the Free Software Foundation; either
12
// version 2.1 of the License, or (at your option) any later version.
14
// This library is distributed in the hope that it will be useful,
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
16
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
// Lesser General Public License for more details.
19
// You should have received a copy of the GNU Lesser General Public
20
// License along with this library; if not, write to the Free Software
21
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
// --------------------------------------------------------------------------
24
// $Maintainer: Clemens Groepl,Andreas Bertsch$
25
// $Authors: Chris Bauer $
26
// --------------------------------------------------------------------------
28
#include <OpenMS/CONCEPT/ClassTest.h>
29
#include <OpenMS/DATASTRUCTURES/String.h>
30
#include <OpenMS/CHEMISTRY/ResidueDB.h>
31
#include <OpenMS/CHEMISTRY/Residue.h>
32
#include <OpenMS/CONCEPT/Exception.h>
35
///////////////////////////
36
#include <OpenMS/DATASTRUCTURES/SuffixArrayTrypticCompressed.h>
37
///////////////////////////
39
using namespace OpenMS;
42
START_TEST(SuffixArrayTrypticCompressed, "$Id: SuffixArrayTrypticCompressed_test.C 8215 2011-03-29 14:18:26Z aiche $")
44
/////////////////////////////////////////////////////////////
45
/////////////////////////////////////////////////////////////
47
SuffixArrayTrypticCompressed* ptr = 0;
48
SuffixArrayTrypticCompressed* nullPointer = 0;
49
const String text = "$AAARAA$ARARP$";
51
SuffixArrayTrypticCompressed* sa = new SuffixArrayTrypticCompressed(text, "");
53
START_SECTION(SuffixArrayTrypticCompressed(const String &st, const String &filename, const WeightWrapper::WEIGHTMODE weight_mode=WeightWrapper::MONO))
54
TEST_EXCEPTION (Exception::InvalidValue,new SuffixArrayTrypticCompressed("A",""));
55
TEST_EXCEPTION (Exception::InvalidValue,new SuffixArrayTrypticCompressed("$A",""));
56
ptr = new SuffixArrayTrypticCompressed("$","");
57
TEST_EQUAL(ptr->toString(),"lcp: 0\nskip: 0");
58
String s = sa->toString();
59
String sTree = s.substr(0,19);
60
String lcp = s.substr(25, 4);
61
String skip = s.substr(36,4);
62
TEST_EQUAL(sTree,"AA\nAAARAA\nARARP\nARP");
63
TEST_EQUAL(lcp,"2120");
64
TEST_EQUAL(skip,"1210");
65
TEST_NOT_EQUAL(ptr, nullPointer);
66
TEST_EXCEPTION (Exception::FileNotFound,ptr = new SuffixArrayTrypticCompressed(text,"FileThatNotExists"));
69
START_SECTION(SuffixArrayTrypticCompressed(const SuffixArrayTrypticCompressed & sa))
70
SuffixArrayTrypticCompressed sa2 (*sa);
71
TEST_EQUAL (sa->toString(),sa2.toString());
74
START_SECTION(~SuffixArrayTrypticCompressed())
78
START_SECTION(bool isDigestingEnd(const char aa1, const char aa2) const )
79
TEST_EQUAL (sa->isDigestingEnd('R','R'),true);
80
TEST_EQUAL (sa->isDigestingEnd('K','K'),true);
81
TEST_EQUAL (sa->isDigestingEnd('R','K'),true);
82
TEST_EQUAL (sa->isDigestingEnd('R','P'),false);
83
TEST_EQUAL (sa->isDigestingEnd('K','P'),false);
84
TEST_EQUAL (sa->isDigestingEnd('A','R'),false);
87
START_SECTION(DoubleReal getTolerance () const)
88
TEST_REAL_SIMILAR (sa->getTolerance(),0.5);
89
sa->setTolerance(0.1);
90
TEST_REAL_SIMILAR (sa->getTolerance(),0.1);
91
sa->setTolerance(0.5);
94
START_SECTION(void setTolerance(DoubleReal t))
95
TEST_REAL_SIMILAR (sa->getTolerance(),0.5);
96
sa->setTolerance(0.1);
97
TEST_REAL_SIMILAR (sa->getTolerance(),0.1);
98
sa->setTolerance(0.5);
99
TEST_EXCEPTION(Exception::InvalidValue,sa->setTolerance(-0.5));
102
START_SECTION(Size getNumberOfModifications())
103
TEST_EQUAL (sa->getNumberOfModifications(),0);
104
sa->setNumberOfModifications(1);
105
TEST_EQUAL (sa->getNumberOfModifications(),1);
106
sa->setNumberOfModifications(0);
109
START_SECTION(void setNumberOfModifications(Size number_of_mods))
110
TEST_EQUAL (sa->getNumberOfModifications(),0);
111
sa->setNumberOfModifications(1);
112
TEST_EQUAL (sa->getNumberOfModifications(),1);
113
sa->setNumberOfModifications(0);
116
START_SECTION(void setTags(const std::vector< String > &tags))
117
SuffixArrayTrypticCompressed * satc = new SuffixArrayTrypticCompressed(text,"");
119
tags.push_back("AAA");
120
tags.push_back("ARA");
121
const vector<String> tags_c (tags);
123
vector<String> res = satc->getTags();
124
TEST_EQUAL(res.at(0),tags.at(0));
125
TEST_EQUAL(res.at(1),tags.at(1));
128
START_SECTION(const std::vector<String>& getTags())
129
SuffixArrayTrypticCompressed * satc = new SuffixArrayTrypticCompressed(text,"");
130
TEST_EQUAL(satc->getTags().size(),0);
131
TEST_EQUAL(satc->getUseTags(),false);
133
tags.push_back("AAA");
134
tags.push_back("ARA");
135
const vector<String> tags_c (tags);
137
TEST_EQUAL(satc->getUseTags(),true);
138
vector<String> res = satc->getTags();
139
TEST_EQUAL(res.at(0),tags.at(0));
140
TEST_EQUAL(res.at(1),tags.at(1));
143
START_SECTION(void setUseTags(bool use_tags))
144
SuffixArrayTrypticCompressed * satc = new SuffixArrayTrypticCompressed(text,"");
145
TEST_EQUAL(satc->getUseTags(),false);
147
TEST_EQUAL(satc->getUseTags(),false);
149
tags.push_back("AAA");
150
tags.push_back("ARA");
151
const vector<String> tags_c (tags);
153
TEST_EQUAL(satc->getUseTags(),true);
155
TEST_EQUAL(satc->getUseTags(),false);
158
START_SECTION(bool getUseTags())
159
SuffixArrayTrypticCompressed * satc = new SuffixArrayTrypticCompressed(text,"");
160
TEST_EQUAL(satc->getUseTags(),false);
162
TEST_EQUAL(satc->getUseTags(),false);
164
tags.push_back("AAA");
165
tags.push_back("ARA");
166
const vector<String> tags_c (tags);
168
TEST_EQUAL(satc->getUseTags(),true);
170
TEST_EQUAL(satc->getUseTags(),false);
173
START_SECTION(bool open(const String &file_name))
174
TEST_EXCEPTION (Exception::FileNotFound,sa->open("FileThatNotExists"));
175
sa = new SuffixArrayTrypticCompressed(text,"");
176
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.lcp2"))
177
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.skip2"))
178
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.sa2"))
179
sa->save("SuffixArrayTrypticCompressed_test_save");
180
SuffixArrayTrypticCompressed * sa2 = new SuffixArrayTrypticCompressed(text,"");
181
sa2->open("SuffixArrayTrypticCompressed_test_save");
182
TEST_EQUAL(sa->toString(),sa2->toString());
185
START_SECTION(bool save(const String &file_name))
186
//TEST_EXCEPTION (Exception::UnableToCreateFile,sa->save("/usr/WhereIHaveNoRigths"));
187
sa = new SuffixArrayTrypticCompressed(text,"");
188
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.lcp2"))
189
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.skip2"))
190
NEW_TMP_FILE(String("SuffixArrayTrypticCompressed_test_save.sa2"))
191
sa->save("SuffixArrayTrypticCompressed_test_save");
192
SuffixArrayTrypticCompressed * sa2 = new SuffixArrayTrypticCompressed(text,"SuffixArrayTrypticCompressed_test_save");
193
TEST_EQUAL(sa->toString(),sa2->toString());
196
START_SECTION(String toString())
197
ptr = new SuffixArrayTrypticCompressed("$","");
198
TEST_EQUAL(ptr->toString(),"lcp: 0\nskip: 0");
199
String s = sa->toString();
200
String sTree = s.substr(0,19);
201
String lcp = s.substr(25, 4);
202
String skip = s.substr(36,4);
203
TEST_EQUAL(sTree,"AA\nAAARAA\nARARP\nARP");
204
TEST_EQUAL(lcp,"2120");
205
TEST_EQUAL(skip,"1210");
208
START_SECTION(void printStatistic())
210
//only for internal use
213
START_SECTION((void findSpec(std::vector< std::vector< std::pair< std::pair< SignedSize, SignedSize >, DoubleReal > > > &candidates, const std::vector< DoubleReal > &spec)))
214
DoubleReal masse[255];
215
ResidueDB* rdb = ResidueDB::getInstance();
217
char aa[] = "ARNDCEQGHILKMFPSTWYV";
219
for (Size i = 0; i<255;++i)
223
for (Size i = 0; i<strlen(aa);++i)
225
const Residue* r = rdb->getResidue(aa[i]);
226
masse[(int)aa[i]]=r->getMonoWeight(Residue::Internal);
229
sa = new SuffixArrayTrypticCompressed(text, "");
230
vector<DoubleReal> spec;
231
//spec.push_back(245.2816);
232
spec.push_back(AASequence("AR").getMonoWeight(Residue::Full));
233
spec.push_back(AASequence("AAAR").getMonoWeight(Residue::Full));
234
//spec.push_back(387.4392);
235
vector<DoubleReal> specc(spec);
236
vector<vector<pair<pair<SignedSize, SignedSize>, DoubleReal> > > res;
237
sa->findSpec(res, specc);
239
TEST_EQUAL(res.size(),specc.size());
240
for (Size i = 0; i < res.size(); ++i)
242
TEST_EQUAL(res.at(i).size(), 1);
245
TEST_EQUAL(res.at(0).at(0).first.first, 8)
246
TEST_EQUAL(res.at(0).at(0).first.second, 2)
247
TEST_EQUAL(res.at(1).at(0).first.first, 1)
248
TEST_EQUAL(res.at(1).at(0).first.second, 4)
252
const vector<DoubleReal> specc2(spec);
254
sa->findSpec(res, specc2);
255
TEST_EQUAL(res.size(),0);
256
spec.push_back(441.4806);
257
spec.push_back(178.1864);
258
const vector<DoubleReal> specc3 (spec);
260
TEST_EXCEPTION(Exception::InvalidValue, sa->findSpec(res, specc3));
262
i_stream.open(OPENMS_GET_TEST_DATA_PATH("SuffixArrayTrypticCompressed_test.txt"));
264
getline(i_stream,txt);
266
sa = new SuffixArrayTrypticCompressed(txt,"");
267
sa->setNumberOfModifications(0);
268
sa->setUseTags(false);
271
vector<DoubleReal> spec_new;
272
for (int i = 500; i < 5000; i += 197)
274
spec_new.push_back((DoubleReal)i);
276
const vector<DoubleReal> specc_new (spec_new);
278
sa->findSpec(res, specc_new);
279
//checking for doubled results;
280
for (Size i = 0; i < res.size();++i)
282
for (Size j = 0;j<res.at(i).size();++j)
284
for (Size k = j+1; k < res.at(i).size();++k)
286
TEST_EQUAL(res[i][j].first.first==res[i][k].first.first && res[i][j].first.second==res[i][k].first.second, false);
291
TOLERANCE_ABSOLUTE(0.55)
292
sa->setTolerance(0.5);
294
// checking if the mass of the found candidates is correct
295
// checking if the next character is not a P
296
for (Size i = 0; i < res.size();++i)
298
for (Size j = 0;j<res.at(i).size();++j)
300
String seq = txt.substr(res.at(i).at(j).first.first,res.at(i).at(j).first.second);
301
DoubleReal m = EmpiricalFormula("H2O").getMonoWeight();
302
for (Size k = 0; k < seq.length();++k)
304
m += masse[(int)seq[k]];
307
if (txt[res.at(i).at(j).first.first-1]!='$') TEST_NOT_EQUAL(seq[0],'P');
308
if (txt[res.at(i).at(j).first.first+res.at(i).at(j).first.second]!='$') TEST_EQUAL(seq[seq.length()-1]=='R'||seq[seq.length()-1]=='K',true)
310
TEST_REAL_SIMILAR(m,specc_new.at(i));
313
// getting all candidates with tags
314
Size number_of_tags=0;
315
vector<String> res_with_tags_exp;
316
for (Size i = 0; i < res.size();++i)
318
for (Size j = 0;j<res.at(i).size();++j)
320
String seq = txt.substr(res.at(i).at(j).first.first,res.at(i).at(j).first.second);
321
bool has_tag = false;
322
for (Size k = 2; k < seq.length();++k)
324
if (seq.substr(k-2,3)=="AAA"||seq.substr(k-2,3)=="ARA")
333
res_with_tags_exp.push_back(seq);
338
//std::cout<<"number_of_tags_:"<<number_of_tags<<std::endl;
340
tags.push_back("AAA");
341
tags.push_back("ARA");
342
const vector<String> tags_c (tags);
345
sa->findSpec(res, specc_new);
346
vector<String> res_with_tags;
347
for (Size i = 0; i < res.size();i++)
349
for (Size j = 0;j<res.at(i).size();j++)
351
String seq = txt.substr(res.at(i).at(j).first.first,res.at(i).at(j).first.second);
352
bool has_tag = false;
353
for (Size k = 2; k < seq.length();k++)
355
if (seq.substr(k-2,3)=="AAA"||seq.substr(k-2,3)=="ARA")
361
//if (!has_tag) std::cout <<seq<<std::endl;
362
TEST_EQUAL(has_tag, true);
363
TEST_EQUAL(res.at(i).at(j).second, 0);
365
res_with_tags.push_back(seq);
368
for (Size i = 0; i < res_with_tags_exp.size();++i)
370
bool was_found = false;
371
for (Size j = 0; j < res_with_tags.size();++j)
373
if (res_with_tags_exp.at(i)==res_with_tags.at(j))
379
//if (!was_found) //std::cout<<res_with_tags_exp.at(i)<<std::endl;
381
//std::cout<<"mod: 1"<<std::endl;
382
sa->setNumberOfModifications(1);
383
sa->setUseTags(false);
385
sa->findSpec(res, specc_new);
387
for (Size i = 0; i < res.size();i++)
389
for (Size j = 0;j<res.at(i).size();j++)
391
String seq = txt.substr(res.at(i).at(j).first.first,res.at(i).at(j).first.second);
392
DoubleReal m = EmpiricalFormula("H2O").getMonoWeight();
393
for (Size k = 0; k < seq.length();k++)
395
m += masse[(int)seq[k]];
397
//if (txt[res.at(i).at(j).first.first+res.at(i).at(j).first.second]=='P')
399
//std::cout<<"hasP:"<<seq<<std::endl;
401
TEST_NOT_EQUAL(txt[res.at(i).at(j).first.first+res.at(i).at(j).first.second],'P');
402
TEST_REAL_SIMILAR(m+res.at(i).at(j).second,specc_new.at(i));
406
// testing if a candidate can belong to several input masses
408
spec.push_back(441.4806);
409
spec.push_back(441.4806);
410
const vector<DoubleReal> specc4 (spec);
411
sa->setNumberOfModifications(0);
412
sa->setUseTags(false);
414
sa->findSpec(res, specc4);
415
TEST_EQUAL(res.at(0).size(),res.at(1).size());
416
for (Size j = 0; j < res.at(0).size();++j)
418
TEST_EQUAL(res.at(0).at(j).first.first,res.at(1).at(j).first.first);
419
TEST_EQUAL(res.at(0).at(j).first.second,res.at(1).at(j).first.second);
420
TEST_EQUAL(res.at(0).at(j).second,res.at(1).at(j).second);
425
/////////////////////////////////////////////////////////////
426
/////////////////////////////////////////////////////////////