~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/FORMAT/InspectOutfile.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2013-12-20 11:30:16 UTC
  • mfrom: (5.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20131220113016-wre5g9bteeheq6he
Tags: 1.11.1-3
* remove version number from libbost development package names;
* ensure that AUTHORS is correctly shipped in all packages.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
// -*- mode: C++; tab-width: 2; -*-
2
 
// vi: set ts=2:
3
 
//
4
 
// --------------------------------------------------------------------------
5
 
//                   OpenMS Mass Spectrometry Framework
6
 
// --------------------------------------------------------------------------
7
 
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
8
 
//
9
 
//  This library is free software; you can redistribute it and/or
10
 
//  modify it under the terms of the GNU Lesser General Public
11
 
//  License as published by the Free Software Foundation; either
12
 
//  version 2.1 of the License, or (at your option) any later version.
13
 
//
14
 
//  This library is distributed in the hope that it will be useful,
15
 
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 
//  Lesser General Public License for more details.
18
 
//
19
 
//  You should have received a copy of the GNU Lesser General Public
20
 
//  License along with this library; if not, write to the Free Software
21
 
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
1
// --------------------------------------------------------------------------
 
2
//                   OpenMS -- Open-Source Mass Spectrometry
 
3
// --------------------------------------------------------------------------
 
4
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
 
5
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
 
6
//
 
7
// This software is released under a three-clause BSD license:
 
8
//  * Redistributions of source code must retain the above copyright
 
9
//    notice, this list of conditions and the following disclaimer.
 
10
//  * Redistributions in binary form must reproduce the above copyright
 
11
//    notice, this list of conditions and the following disclaimer in the
 
12
//    documentation and/or other materials provided with the distribution.
 
13
//  * Neither the name of any author or any participating institution
 
14
//    may be used to endorse or promote products derived from this software
 
15
//    without specific prior written permission.
 
16
// For a full list of authors, refer to the file AUTHORS.
 
17
// --------------------------------------------------------------------------
 
18
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 
19
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
20
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 
21
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
 
22
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 
23
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 
24
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 
25
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 
26
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 
27
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 
28
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22
29
//
23
30
// --------------------------------------------------------------------------
24
31
// $Maintainer: Andreas Bertsch $
37
44
 
38
45
using namespace std;
39
46
 
40
 
namespace OpenMS 
 
47
namespace OpenMS
41
48
{
42
 
        InspectOutfile::InspectOutfile() {}
43
 
 
44
 
        /// copy constructor
45
 
        InspectOutfile::InspectOutfile(const InspectOutfile&) {}
46
 
 
47
 
        /// destructor
48
 
        InspectOutfile::~InspectOutfile() {}
49
 
 
50
 
        /// assignment operator
51
 
        InspectOutfile& InspectOutfile::operator=(const InspectOutfile& inspect_outfile)
52
 
        {
53
 
                if (this == &inspect_outfile) return *this;
54
 
                return *this;
55
 
        }
56
 
 
57
 
        /// equality operator
58
 
        bool InspectOutfile::operator==(const InspectOutfile&) const
59
 
        {
60
 
                return true;
61
 
        }
62
 
 
63
 
        vector<Size> InspectOutfile::load(const String& result_filename, vector<PeptideIdentification>& peptide_identifications,
64
 
                ProteinIdentification& protein_identification, const DoubleReal p_value_threshold, const String& database_filename)
65
 
        {
66
 
                // check whether the p_value is correct
67
 
                if ( (p_value_threshold < 0) || (p_value_threshold > 1) )
68
 
                {
69
 
                        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "The parameters 'p_value_threshold' must be >= 0 and <=1 !");
70
 
                }
71
 
                
72
 
                ifstream result_file(result_filename.c_str());
73
 
                if (!result_file)
74
 
                {
75
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
76
 
                }
77
 
                
78
 
                String
79
 
                        line,
80
 
                        accession,
81
 
                        accession_type,
82
 
                        spectrum_file,
83
 
                        identifier;
84
 
                        
85
 
                Size
86
 
                        record_number(0),
87
 
                        scan_number(0),
88
 
                        line_number(0),
89
 
                        number_of_columns(0);
90
 
                
91
 
                vector< String > substrings;
92
 
                vector< Size > corrupted_lines;
93
 
                
94
 
                PeptideIdentification peptide_identification;
95
 
                
96
 
                if ( !getline(result_file, line) ) // the header is read in a special function, so it can be skipped
97
 
                {
98
 
                        result_file.close();
99
 
                        result_file.clear();
100
 
                        throw Exception::FileEmpty(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
101
 
                }
102
 
                if ( !line.empty() && (line[line.length()-1] < 33) ) line.resize(line.length()-1);
103
 
                line.trim();
104
 
                ++line_number;
105
 
 
106
 
                DateTime datetime = DateTime::now();
107
 
                if ( protein_identification.getSearchEngine().empty() ) identifier = "InsPecT_" + datetime.getDate();
108
 
                else protein_identification.getSearchEngine() + "_" + datetime.getDate();
109
 
                
110
 
                // to get the precursor retention time and mz values later, save the filename and the numbers of the scans
111
 
                vector< pair< String, vector< pair< Size, Size > > > > files_and_peptide_identification_with_scan_number;
112
 
                // the record number is mapped to the position in the protein hits, to retrieve their sequences
113
 
                map<Size, Size> rn_position_map;
114
 
                
115
 
                // get the header
116
 
                Int
117
 
                        spectrum_file_column(-1),
118
 
                        scan_column(-1),
119
 
                        peptide_column(-1),
120
 
                        protein_column(-1),
121
 
                        charge_column(-1),
122
 
                        MQ_score_column(-1),
123
 
                        p_value_column(-1),
124
 
                        record_number_column(-1),
125
 
                        DB_file_pos_column(-1),
126
 
                        spec_file_pos_column(-1);
127
 
                        
128
 
                String::size_type start(0), end(0);
129
 
                
130
 
                try
131
 
                {
132
 
                        readOutHeader(result_filename, line, spectrum_file_column, scan_column, peptide_column, protein_column, charge_column, MQ_score_column, p_value_column, record_number_column, DB_file_pos_column, spec_file_pos_column, number_of_columns);
133
 
                }
134
 
    catch( Exception::ParseError & p_e )
135
 
                {
136
 
                        result_file.close();
137
 
                        result_file.clear();
 
49
  InspectOutfile::InspectOutfile()
 
50
  {
 
51
  }
 
52
 
 
53
  /// copy constructor
 
54
  InspectOutfile::InspectOutfile(const InspectOutfile &)
 
55
  {
 
56
  }
 
57
 
 
58
  /// destructor
 
59
  InspectOutfile::~InspectOutfile()
 
60
  {
 
61
  }
 
62
 
 
63
  /// assignment operator
 
64
  InspectOutfile & InspectOutfile::operator=(const InspectOutfile & inspect_outfile)
 
65
  {
 
66
    if (this == &inspect_outfile)
 
67
      return *this;
 
68
 
 
69
    return *this;
 
70
  }
 
71
 
 
72
  /// equality operator
 
73
  bool InspectOutfile::operator==(const InspectOutfile &) const
 
74
  {
 
75
    return true;
 
76
  }
 
77
 
 
78
  vector<Size> InspectOutfile::load(const String & result_filename, vector<PeptideIdentification> & peptide_identifications,
 
79
                                    ProteinIdentification & protein_identification, const DoubleReal p_value_threshold, const String & database_filename)
 
80
  {
 
81
    // check whether the p_value is correct
 
82
    if ((p_value_threshold < 0) || (p_value_threshold > 1))
 
83
    {
 
84
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "The parameters 'p_value_threshold' must be >= 0 and <=1 !");
 
85
    }
 
86
 
 
87
    ifstream result_file(result_filename.c_str());
 
88
    if (!result_file)
 
89
    {
 
90
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
 
91
    }
 
92
 
 
93
    String
 
94
      line,
 
95
      accession,
 
96
      accession_type,
 
97
      spectrum_file,
 
98
      identifier;
 
99
 
 
100
    Size
 
101
    record_number(0),
 
102
    scan_number(0),
 
103
    line_number(0),
 
104
    number_of_columns(0);
 
105
 
 
106
    vector<String> substrings;
 
107
    vector<Size> corrupted_lines;
 
108
 
 
109
    PeptideIdentification peptide_identification;
 
110
 
 
111
    if (!getline(result_file, line))       // the header is read in a special function, so it can be skipped
 
112
    {
 
113
      result_file.close();
 
114
      result_file.clear();
 
115
      throw Exception::FileEmpty(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
 
116
    }
 
117
    if (!line.empty() && (line[line.length() - 1] < 33))
 
118
      line.resize(line.length() - 1);
 
119
    line.trim();
 
120
    ++line_number;
 
121
 
 
122
    DateTime datetime = DateTime::now();
 
123
    if (protein_identification.getSearchEngine().empty())
 
124
      identifier = "InsPecT_" + datetime.getDate();
 
125
    else
 
126
      protein_identification.getSearchEngine() + "_" + datetime.getDate();
 
127
 
 
128
    // to get the precursor retention time and mz values later, save the filename and the numbers of the scans
 
129
    vector<pair<String, vector<pair<Size, Size> > > > files_and_peptide_identification_with_scan_number;
 
130
    // the record number is mapped to the position in the protein hits, to retrieve their sequences
 
131
    map<Size, Size> rn_position_map;
 
132
 
 
133
    // get the header
 
134
    Int
 
135
    spectrum_file_column(-1),
 
136
    scan_column(-1),
 
137
    peptide_column(-1),
 
138
    protein_column(-1),
 
139
    charge_column(-1),
 
140
    MQ_score_column(-1),
 
141
    p_value_column(-1),
 
142
    record_number_column(-1),
 
143
    DB_file_pos_column(-1),
 
144
    spec_file_pos_column(-1);
 
145
 
 
146
    String::size_type start(0), end(0);
 
147
 
 
148
    try
 
149
    {
 
150
      readOutHeader(result_filename, line, spectrum_file_column, scan_column, peptide_column, protein_column, charge_column, MQ_score_column, p_value_column, record_number_column, DB_file_pos_column, spec_file_pos_column, number_of_columns);
 
151
    }
 
152
    catch (Exception::ParseError & p_e)
 
153
    {
 
154
      result_file.close();
 
155
      result_file.clear();
138
156
      LOG_WARN << "ParseError (" << p_e.getMessage() << ") caught in " << __FILE__ << "\n";
139
157
      throw;
140
 
                }
141
 
                
142
 
                while ( getline(result_file, line) )
143
 
                {
144
 
                        ++line_number;
145
 
                        if ( !line.empty() && (line[line.length()-1] < 33) ) line.resize(line.length()-1);
146
 
                        line.trim();
147
 
                        if ( line.empty() ) continue;
148
 
                        
149
 
                        // check whether the line has enough columns
150
 
                        line.split('\t', substrings);
151
 
                        if ( substrings.size() != number_of_columns )
152
 
                        {
153
 
                                corrupted_lines.push_back(line_number);
154
 
                                continue;
155
 
                        }
156
 
                        
157
 
                        // if the pvalue is too small, skip the line
158
 
                        if ( substrings[p_value_column].toFloat() > p_value_threshold ) continue;
159
 
                        
160
 
                        // the protein
161
 
                        ProteinHit protein_hit;
162
 
                        // get accession number and type
163
 
                        getACAndACType(substrings[protein_column], accession, accession_type);
164
 
                        protein_hit.setAccession(accession);
165
 
//                      protein_hit.setScore(0.0);
166
 
                        
167
 
                        // the database position of the protein (the i-th protein)
168
 
                        record_number = substrings[record_number_column].toInt();
169
 
                        
170
 
                        // map the database position of the protein to its position in the protein hits and insert it, if it's a new protein
171
 
                        if ( rn_position_map.find(record_number) == rn_position_map.end() )
172
 
                        {
173
 
                                rn_position_map[record_number] = protein_identification.getHits().size();
174
 
                                protein_identification.insertHit(protein_hit);
175
 
                        }
176
 
                        
177
 
                        // if a new scan is found (new file or new scan), insert it into the vector (the first time the condition is fullfilled because spectrum_file is "")
178
 
                        if ( (substrings[spectrum_file_column] != spectrum_file) || ((Size) substrings[scan_column].toInt() != scan_number) )
179
 
                        {
180
 
                                if ( substrings[spectrum_file_column] != spectrum_file ) // if it's a new file, insert it into the vector (used to retrieve RT and MT later)
181
 
                                {
182
 
                                        // if it's the first file or if hits have been found in the file before, insert a new file
183
 
                                        if ( files_and_peptide_identification_with_scan_number.empty() || !files_and_peptide_identification_with_scan_number.back().second.empty() )
184
 
                                        {
185
 
                                                files_and_peptide_identification_with_scan_number.push_back(make_pair(substrings[spectrum_file_column], vector< pair<Size , Size> >()));
186
 
                                        }
187
 
                                        // otherwise change the name of the last file entry (the one without hits)
188
 
                                        else files_and_peptide_identification_with_scan_number.back().first = substrings[spectrum_file_column];
189
 
                                }
190
 
                                
191
 
                                spectrum_file = substrings[spectrum_file_column];
192
 
                                scan_number = substrings[scan_column].toInt();
193
 
                                
194
 
                                // if it's not the first scan and if hits have been found, insert the peptide identification
195
 
                                if ( !peptide_identification.empty() && !peptide_identification.getHits().empty() )
196
 
                                {
197
 
                                        files_and_peptide_identification_with_scan_number.back().second.push_back(make_pair(peptide_identifications.size(), scan_number));
198
 
                                        peptide_identifications.push_back(peptide_identification);
199
 
                                }
200
 
                                peptide_identification = PeptideIdentification();
201
 
                                
202
 
                                peptide_identification.setIdentifier(identifier);
203
 
                                peptide_identification.setSignificanceThreshold(p_value_threshold);
204
 
                                peptide_identification.setScoreType(score_type_);
205
 
                        }
206
 
                        
207
 
                        // get the peptide infos from the new peptide and insert it
208
 
                        PeptideHit peptide_hit;
209
 
                        peptide_hit.setCharge(substrings[charge_column].toInt());
210
 
                        peptide_hit.setScore(substrings[MQ_score_column].toFloat());
211
 
                        peptide_hit.setRank(0); // all ranks are set to zero and assigned later
212
 
                        
213
 
                        // get the sequence and the amino acid before and after
214
 
                        String sequence, sequence_with_mods;
215
 
                        sequence_with_mods = substrings[peptide_column];
216
 
                        start = sequence_with_mods.find('.') + 1;
217
 
                        end = sequence_with_mods.find_last_of('.');
218
 
                        if ( start >= 2 ) peptide_hit.setAABefore(sequence_with_mods[start - 2]);
219
 
                        if ( end< sequence_with_mods.length() + 1 ) peptide_hit.setAAAfter(sequence_with_mods[end + 1]);
220
 
                        
221
 
                        //remove modifications (small characters and anything that's not in the alphabet)
222
 
                        sequence_with_mods = substrings[peptide_column].substr(start, end-start);
223
 
                        for ( String::ConstIterator c_i = sequence_with_mods.begin(); c_i != sequence_with_mods.end(); ++c_i )
224
 
                        {
225
 
                                if ( (bool) isalpha(*c_i) && (bool) isupper(*c_i) ) sequence.append(1, *c_i);
226
 
                        }
227
 
                        
228
 
                        peptide_hit.setSequence(sequence);
229
 
                        peptide_hit.addProteinAccession(accession);
230
 
                        
231
 
                        peptide_identification.insertHit(peptide_hit);
232
 
                }
233
 
                
234
 
                // result file read
235
 
                result_file.close();
236
 
                result_file.clear();
237
 
                
238
 
                // if it's not the first scan and if hits have been found, insert the peptide identification
239
 
                if ( !peptide_identification.empty() && !peptide_identification.getHits().empty() )
240
 
                {
241
 
                        files_and_peptide_identification_with_scan_number.back().second.push_back(make_pair(peptide_identifications.size(), scan_number));
242
 
                        peptide_identifications.push_back(peptide_identification);
243
 
                }
244
 
                
245
 
                // if the last file had no hits, delete it
246
 
                if ( !files_and_peptide_identification_with_scan_number.empty() && files_and_peptide_identification_with_scan_number.back().second.empty() )
247
 
                {
248
 
                        files_and_peptide_identification_with_scan_number.pop_back();
249
 
                }
250
 
                
251
 
                if ( !peptide_identifications.empty() ) peptide_identifications.back().assignRanks();
252
 
                
253
 
                // search the sequence of the proteins
254
 
                if ( !protein_identification.getHits().empty() && !database_filename.empty() )
255
 
                {
256
 
                        vector< ProteinHit > protein_hits = protein_identification.getHits();
257
 
                        vector< String > sequences;
258
 
                        getSequences(database_filename, rn_position_map, sequences);
259
 
                        
260
 
                        // set the retrieved sequences
261
 
                        vector< String >::const_iterator s_i = sequences.begin();
262
 
                        for ( map< Size, Size >::const_iterator rn_i = rn_position_map.begin(); rn_i != rn_position_map.end(); ++rn_i, ++s_i ) protein_hits[rn_i->second].setSequence(*s_i);
263
 
                        
264
 
                        sequences.clear();
265
 
                        rn_position_map.clear();
266
 
                        protein_identification.setHits(protein_hits);
267
 
                        protein_hits.clear();
268
 
                }
269
 
                
270
 
                // get the precursor retention times and mz values
271
 
                getPrecursorRTandMZ(files_and_peptide_identification_with_scan_number, peptide_identifications);
272
 
                protein_identification.setDateTime(datetime);
273
 
                protein_identification.setIdentifier(identifier);
274
 
                
275
 
                return corrupted_lines;
276
 
  }
277
 
        
278
 
        // < record number, number of protein in a vector >
279
 
        vector< Size >
280
 
        InspectOutfile::getSequences(
281
 
                const String& database_filename,
282
 
                const map< Size, Size >& wanted_records,
283
 
                vector< String >& sequences)
284
 
        {
285
 
                ifstream database(database_filename.c_str());
286
 
                if (!database)
287
 
                {
288
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
289
 
                }
290
 
                
291
 
                vector< Size > not_found;
292
 
                Size seen_records(0);
293
 
                stringbuf sequence;
294
 
                database.seekg(0, ios::end);
295
 
                streampos sp = database.tellg();
296
 
                database.seekg(0, ios::beg);
297
 
                
298
 
                for ( map< Size, Size >::const_iterator wr_i = wanted_records.begin(); wr_i !=  wanted_records.end(); ++wr_i )
299
 
                {
300
 
                        for ( ; seen_records < wr_i->first; ++seen_records )
301
 
                        {
302
 
                                database.ignore(sp, trie_delimiter_);
303
 
                        }
304
 
                        database.get(sequence, trie_delimiter_);
305
 
                        sequences.push_back(sequence.str());
306
 
                        if ( sequences.back().empty() ) not_found.push_back(wr_i->first);
307
 
                        sequence.str("");
308
 
                }
309
 
                
310
 
                // close the filestreams
311
 
                database.close();
312
 
                database.clear();
313
 
                
314
 
                return not_found;
315
 
        }
316
 
 
317
 
        void
318
 
        InspectOutfile::getACAndACType(
319
 
                String line,
320
 
                String& accession,
321
 
                String& accession_type)
322
 
        {
323
 
                String swissprot_prefixes = "JLOPQUX";
324
 
                /// @todo replace this by general FastA implementation? (Martin)
325
 
                accession.clear();
326
 
                accession_type.clear();
327
 
                pair< String, String > p;
328
 
                // if it's a FASTA line
329
 
                if ( line.hasPrefix(">") ) line.erase(0,1);
330
 
                if ( !line.empty() && (line[line.length()-1] < 33) ) line.resize(line.length()-1);
331
 
                line.trim();
332
 
                
333
 
                // if it's a swissprot accession
334
 
                if ( line.hasPrefix("tr") || line.hasPrefix("sp") )
335
 
                {
336
 
                        accession = line.substr(3, line.find('|', 3)-3);
337
 
                        accession_type = "SwissProt";
338
 
                }
339
 
                else if ( line.hasPrefix("gi") )
340
 
                {
341
 
                        String::size_type snd(line.find('|', 3));
342
 
                        String::size_type third(0);
343
 
                        if ( snd != String::npos )
344
 
                        {
345
 
                                third = line.find('|', ++snd) + 1;
346
 
                                
347
 
                                accession = line.substr(third, line.find('|', third)-third);
348
 
                                accession_type = line.substr(snd, third-1-snd);
349
 
                        }
350
 
                        if ( accession_type == "gb" ) accession_type = "GenBank";
351
 
                        else if ( accession_type == "emb" ) accession_type = "EMBL";
352
 
                        else if ( accession_type == "dbj" ) accession_type = "DDBJ";
353
 
                        else if ( accession_type == "ref" ) accession_type = "NCBI";
354
 
                        else if ( (accession_type == "sp") || (accession_type == "tr") ) accession_type = "SwissProt";
355
 
                        else if ( accession_type == "gnl" )
356
 
                        {
357
 
                                accession_type = accession;
358
 
                                snd = line.find('|', third);
359
 
                                third = line.find('|', ++snd);
360
 
                                if ( third != String::npos ) accession = line.substr(snd, third-snd);
361
 
                                else
362
 
                                {
363
 
                                        third = line.find(' ', snd);
364
 
                                        if ( third != String::npos ) accession = line.substr(snd, third-snd);
365
 
                                        else accession = line.substr(snd);
366
 
                                }
367
 
                        }
368
 
                        else
369
 
                        {
370
 
                                String::size_type pos1(line.find('(', 0));
371
 
                                String::size_type pos2(0);
372
 
                                if ( pos1 != String::npos )
373
 
                                {
374
 
                                        pos2 = line.find(')', ++pos1);
375
 
                                        if ( pos2 != String::npos )
376
 
                                        {
377
 
                                                accession = line.substr(pos1, pos2 - pos1);
378
 
                                                if ( (accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos) ) accession_type = "SwissProt";
379
 
                                                else accession.clear();
380
 
                                        }
381
 
                                }
382
 
                                if ( accession.empty() )
383
 
                                {
384
 
                                        accession_type = "gi";
385
 
                                        if ( snd != String::npos ) accession = line.substr(3, snd-4);
386
 
                                        else
387
 
                                        {
388
 
                                                if ( snd == String::npos ) snd = line.find(' ', 3);
389
 
                                                if ( snd != String::npos ) accession = line.substr(3, snd-3);
390
 
                                                else accession = line.substr(3);
391
 
                                        }
392
 
                                }
393
 
                        }
394
 
                }
395
 
                else if ( line.hasPrefix("ref") )
396
 
                {
397
 
                        accession = line.substr(4, line.find('|', 4) - 4);
398
 
                        accession_type = "NCBI";
399
 
                }
400
 
                else if ( line.hasPrefix("gnl") )
401
 
                {
402
 
                        line.erase(0,3);
403
 
                        accession_type = line.substr(0, line.find('|', 0));
404
 
                        accession = line.substr(accession_type.length()+1);
405
 
                }
406
 
                else if ( line.hasPrefix("lcl") )
407
 
                {
408
 
                        line.erase(0,4);
409
 
                        accession_type = "lcl";
410
 
                        accession = line;
411
 
                }
412
 
                else
413
 
                {
414
 
                        String::size_type pos1(line.find('(', 0));
415
 
                        String::size_type pos2(0);
416
 
                        if ( pos1 != String::npos )
417
 
                        {
418
 
                                pos2 = line.find(')', ++pos1);
419
 
                                if ( pos2 != String::npos )
420
 
                                {
421
 
                                        accession = line.substr(pos1, pos2 - pos1);
422
 
                                        if ( (accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos) ) accession_type = "SwissProt";
423
 
                                        else accession.clear();
424
 
                                }
425
 
                        }
426
 
                        if ( accession.empty() )
427
 
                        {
428
 
                                pos1 = line.find('|');
429
 
                                accession = line.substr(0, pos1);
430
 
                                if ( (accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos) ) accession_type = "SwissProt";
431
 
                                else
432
 
                                {
433
 
                                        pos1 = line.find(' ');
434
 
                                        accession = line.substr(0, pos1);
435
 
                                        if ( (accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos) ) accession_type = "SwissProt";
436
 
                                        else
437
 
                                        {
438
 
                                                accession = line.substr(0, 6);
439
 
                                                if ( String(swissprot_prefixes).find(accession[0], 0) != String::npos ) accession_type = "SwissProt";
440
 
                                                else accession.clear();
441
 
                                        }
442
 
                                }
443
 
                        }
444
 
                }
445
 
                if ( accession.empty() )
446
 
                {
447
 
                        accession = line.trim();
448
 
                        accession_type = "unknown";
449
 
                }
450
 
        }
451
 
 
452
 
        void
453
 
        InspectOutfile::getPrecursorRTandMZ(
454
 
                const vector< pair< String, vector< pair < Size, Size > > > >& files_and_peptide_identification_with_scan_number,
455
 
                vector< PeptideIdentification >& ids)
456
 
        {
457
 
                MSExperiment<> experiment;
458
 
                String type;
459
 
                
460
 
                for ( vector< pair< String, vector< pair< Size, Size > > > >::const_iterator fs_i = files_and_peptide_identification_with_scan_number.begin(); fs_i != files_and_peptide_identification_with_scan_number.end(); ++fs_i )
461
 
                {
462
 
                        getExperiment(experiment, type, fs_i->first); // may throw an exception if the filetype could not be determined
463
 
                        
464
 
                        if ( experiment.size() < fs_i->second.back().second )
465
 
                        {
466
 
                                throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Not enought scans in file! (" + String(experiment.size()) + " available, should be at least " + String(fs_i->second.back().second) + ")", fs_i->first);
467
 
                        }
468
 
                        
469
 
                        for ( vector< pair< Size, Size > >::const_iterator pi_scan_i = fs_i->second.begin(); pi_scan_i != fs_i->second.end(); ++pi_scan_i )
470
 
                        {
471
 
                                ids[pi_scan_i->first].setMetaValue("MZ", experiment[pi_scan_i->second - 1].getPrecursors()[0].getMZ());
472
 
                                ids[pi_scan_i->first].setMetaValue("RT", experiment[pi_scan_i->second - 1].getRT());
473
 
                        }
474
 
                }
475
 
        }
476
 
 
477
 
        void
478
 
        InspectOutfile::compressTrieDB(
479
 
                const String& database_filename,
480
 
                const String& index_filename,
481
 
                vector< Size >& wanted_records,
482
 
                const String& snd_database_filename,
483
 
                const String& snd_index_filename,
484
 
                bool append)
485
 
        {
486
 
                if ( database_filename == snd_database_filename )
487
 
                {
488
 
                        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Same filename can not be used for original and second database!", database_filename);
489
 
                }
490
 
                if ( index_filename == snd_index_filename )
491
 
                {
492
 
                        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Same filename can not be used for original and second database!", index_filename);
493
 
                }
494
 
                ifstream database( database_filename.c_str());
495
 
                if ( !database )
496
 
                {
497
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
498
 
                }
499
 
                
500
 
                ifstream index(index_filename.c_str(), ios::in | ios::binary);
501
 
                if ( !index )
502
 
                {
503
 
                        database.close();
504
 
                        database.clear();
505
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, index_filename);
506
 
                }
507
 
                
508
 
                // determine the length of the index file
509
 
                index.seekg(0, ios::end);
510
 
                streampos index_length = index.tellg();
511
 
                index.seekg(0, ios::beg);
512
 
                bool empty_records = wanted_records.empty();
513
 
                if ( wanted_records.empty() )
514
 
                {
515
 
                        for ( Size i = 0; i < index_length / record_length_; ++i ) wanted_records.push_back(i);
516
 
                }
517
 
                
518
 
                // take the wanted records, copy their sequences to the new db and write the index file accordingly
519
 
                ofstream snd_database;
520
 
                if ( append ) snd_database.open(snd_database_filename.c_str(), std::ios::out | std::ios::app);
521
 
                else snd_database.open(snd_database_filename.c_str(), std::ios::out | std::ios::trunc);
522
 
                if ( !snd_database )
523
 
                {
524
 
                        database.close();
525
 
                        database.clear();
526
 
                        index.close();
527
 
                        index.clear();
528
 
                        throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, snd_database_filename);
529
 
                }
530
 
                
531
 
                ofstream snd_index;
532
 
                if ( append ) snd_index.open(snd_index_filename.c_str(), std::ios::out | std::ios::binary | std::ios::app);
533
 
                else snd_index.open(snd_index_filename.c_str(), std::ios::out | std::ios::binary | std::ios::trunc);
534
 
                if ( !snd_index )
535
 
                {
536
 
                        database.close();
537
 
                        database.clear();
538
 
                        index.close();
539
 
                        index.clear();
540
 
                        snd_database.close();
541
 
                        snd_database.clear();
542
 
                        throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, snd_index_filename);
543
 
                }
544
 
                
545
 
                char* index_record = new char[record_length_]; // to copy one record from the index file
546
 
                Size database_pos(0), snd_database_pos(0); // their sizes HAVE TO BE 4 bytes
547
 
                stringbuf sequence;
548
 
                streampos index_pos(0);
549
 
                
550
 
                for ( vector< Size >::const_iterator wr_i = wanted_records.begin(); wr_i != wanted_records.end(); ++wr_i )
551
 
                {
552
 
                        // get the according record in the index file
553
 
                        if ( index_length < Int((*wr_i + 1) * record_length_) ) // if the file is too short
554
 
                        {
555
 
        delete [] index_record;
556
 
                                database.close();
557
 
                                database.clear();
558
 
                                index.close();
559
 
                                index.clear();
560
 
                                snd_database.close();
561
 
                                snd_database.clear();
562
 
                                snd_index.close();
563
 
                                snd_index.clear();
564
 
                                throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "index file is too short!", index_filename);
565
 
                        }
566
 
                        index.seekg((*wr_i) * record_length_);
567
 
                        index.read(index_record, record_length_);
568
 
 
569
 
                        // all but the first sequence are preceded by an asterisk
570
 
                        if ( append ) snd_database.put(trie_delimiter_);
571
 
                        append = true;
572
 
                        
573
 
                        // check if we have to reverse the database_pos part (which is saved in little endian)
574
 
                        if (OPENMS_IS_BIG_ENDIAN)
575
 
                        {
576
 
                                char tmp;
577
 
                                for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
578
 
                                {
579
 
                                        tmp = index_record[db_pos_length_ + i];
580
 
                                        index_record[db_pos_length_ + i] = index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
581
 
                                        index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
582
 
                                }
583
 
                        }
584
 
 
585
 
                        // go to the beginning of the sequence
586
 
 
587
 
                        // whoever wrote this code - please don't ever do this again.
588
 
                        // x86 does *not* have a monopoly, nor does little endian.
589
 
                        memcpy(&database_pos, index_record + db_pos_length_, trie_db_pos_length_);
590
 
                        database.seekg(database_pos);
591
 
                        
592
 
                        // store the corresponding index for the second database
593
 
                        snd_database_pos = snd_database.tellp(); // get the position in the second database
594
 
 
595
 
                        memcpy(index_record + db_pos_length_, &snd_database_pos, trie_db_pos_length_); // and copy to its place in the index record
596
 
 
597
 
                        // fixing the above "suboptimal" code
598
 
                        if (OPENMS_IS_BIG_ENDIAN)
599
 
                        {
600
 
                                char tmp;
601
 
                                for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
602
 
                                {
603
 
                                        tmp = index_record[db_pos_length_ + i];
604
 
                                        index_record[db_pos_length_ + i] = index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
605
 
                                        index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
606
 
                                }
607
 
                        }
608
 
 
609
 
                        snd_index.write((char*) index_record, record_length_); // because only the trie-db position changed, not the position in the original database, nor the protein name
610
 
                        
611
 
                        // store the sequence
612
 
                        database.get(sequence, trie_delimiter_);
613
 
                        snd_database << sequence.str();
614
 
                        sequence.str("");
615
 
                }
616
 
                
617
 
                
618
 
                if ( empty_records ) wanted_records.clear();
619
 
    delete [] index_record;
620
 
                database.close();
621
 
                database.clear();
622
 
                index.close();
623
 
                index.clear();
624
 
                snd_database.close();
625
 
                snd_database.clear();
626
 
                snd_index.close();
627
 
                snd_index.clear();
628
 
        }
629
 
 
630
 
        void
631
 
        InspectOutfile::generateTrieDB(
632
 
                const String& source_database_filename,
633
 
                const String& database_filename,
634
 
                const String& index_filename,
635
 
                bool append,
636
 
                const String species)
637
 
        {
638
 
                ifstream source_database(source_database_filename.c_str());
639
 
                if ( !source_database )
640
 
                {
641
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, source_database_filename);
642
 
                }
643
 
                
644
 
                // get the labels
645
 
                String ac_label, sequence_start_label, sequence_end_label, comment_label, species_label;
646
 
                getLabels(source_database_filename, ac_label, sequence_start_label, sequence_end_label, comment_label, species_label);
647
 
 
648
 
                ofstream database;
649
 
                if ( append ) database.open(database_filename.c_str(), ios::app | ios::out );
650
 
                else database.open(database_filename.c_str());
651
 
                if ( !database )
652
 
                {
653
 
                        source_database.close();
654
 
                        source_database.clear();
655
 
                        throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
656
 
                }
657
 
                ofstream index;
658
 
                if ( append ) index.open(index_filename.c_str(), ios::app | ios::out | ios::binary );
659
 
                else index.open(index_filename.c_str(), ios::out | ios::binary );
660
 
                if ( !index )
661
 
                {
662
 
                        source_database.close();
663
 
                        source_database.clear();
664
 
                        database.close();
665
 
                        database.clear();
666
 
                        throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, index_filename);
667
 
                }
668
 
                
669
 
                // using flags to mark what has already been read
670
 
                // the flags
671
 
                unsigned char ac_flag = 1;
672
 
                unsigned char species_flag = !species.empty()*2; // if no species is given, take all proteins
673
 
                unsigned char sequence_flag = 4;
674
 
                // the value
675
 
                unsigned char record_flags = 0;
676
 
                
677
 
                String::size_type pos(0); // the position in a line
678
 
                unsigned long long source_database_pos = source_database.tellg(); // the start of a protein in the source database
679
 
                unsigned long long source_database_pos_buffer = 0; // because you don't know whether a new protein starts unless the line is read, the actual position is buffered before any new getline
680
 
                Size database_pos(0);
681
 
                String line, sequence, protein_name;
682
 
                char* record = new char[record_length_]; // a record in the index file
683
 
                char* protein_name_pos = record + db_pos_length_ + trie_db_pos_length_;
684
 
                
685
 
                while ( getline(source_database, line) )
686
 
                {
687
 
                        if ( !line.empty() && (line[line.length()-1] < 33) ) line.resize(line.length()-1);
688
 
                        line.trim();
689
 
                        
690
 
                        // empty and comment lines are skipped
691
 
                        if ( line.empty() || line.hasPrefix(comment_label) )
692
 
                        {
693
 
                                source_database_pos_buffer = source_database.tellg();
694
 
                                continue;
695
 
                        }
696
 
                        
697
 
                        // read the sequence if the accession and the species have been read already
698
 
                        if ( record_flags == (ac_flag | species_flag | sequence_flag) )
699
 
                        {
700
 
                                if ( !line.hasPrefix(sequence_end_label) ) // if it is still the same protein, append the sequence
701
 
                                {
702
 
                                        line.trim(); // erase all whitespaces from the sequence
703
 
                                        line.remove(trie_delimiter_);
704
 
                                        // save this part of the sequence
705
 
                                        sequence.append(line);
706
 
                                }
707
 
                                else // if a new protein is found, write down the old one
708
 
                                {
709
 
                                        // if the sequence is not empty, the record has the correct form
710
 
                                        if ( !sequence.empty() )
711
 
                                        {
712
 
                                                // all but the first record in the database are preceded by an asterisk (if in append mode an asterisk has to be put at any time)
713
 
                                                if ( append ) database.put('*');
714
 
                                                database_pos = database.tellp();
715
 
                                                
716
 
                                                // write the record
717
 
                                                memcpy(record, &source_database_pos, db_pos_length_); // source database position
718
 
                                          if (OPENMS_IS_BIG_ENDIAN)
719
 
                                                {
720
 
                                                        char tmp;
721
 
                                                        for (Size i = 0; i < db_pos_length_ / 2; i++)
722
 
                                                        {
723
 
                                                                tmp = record[i];
724
 
                                                                record[i] = record[db_pos_length_ - 1 - i];
725
 
                                                                record[db_pos_length_ - 1 - i] = tmp;
726
 
                                                        }
727
 
                                                }
728
 
                                                
729
 
                                                // whoever wrote this code - please don't ever do this again.
730
 
                                                // x86 does *not* have a monopoly, nor does little endian.
731
 
                                                memcpy(record + db_pos_length_, &database_pos, trie_db_pos_length_); // database position
732
 
                                                
733
 
                                                // fix the above "suboptimal" code
734
 
                                                if (OPENMS_IS_BIG_ENDIAN)
735
 
                                                {
736
 
                                                        char tmp;
737
 
                                                        for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
738
 
                                                        {
739
 
                                                                tmp = record[db_pos_length_ + i];
740
 
                                                                record[db_pos_length_ + i] = record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
741
 
                                                                record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
742
 
                                                        }
743
 
                                                }
744
 
 
745
 
                                                index.write(record, record_length_);
746
 
                                                // protein name / accession has already been written
747
 
                                                database << sequence;
748
 
                                                source_database_pos = source_database_pos_buffer; // the position of the start of the new protein
749
 
                                                append = true;
750
 
                                        }
751
 
                                        sequence.clear();
752
 
                                        
753
 
                                        // set back the record flags for a new record
754
 
                                        record_flags = 0;
755
 
                                }
756
 
                        }
757
 
                        
758
 
                        // if not reading the sequence
759
 
                        if ( !(record_flags & sequence_flag) )
760
 
                        {
761
 
                                if ( line.hasPrefix(ac_label) )
762
 
                                {
763
 
                                        pos = ac_label.length(); // find the beginning of the accession
764
 
                                        
765
 
                                        while ( (line.length() > pos) && (line[pos] < 33) ) ++pos; // discard the whitespaces after the label
766
 
                                        if ( pos != line.length() ) // if no accession is found, skip this protein
767
 
                                        {
768
 
                                                memset(protein_name_pos, 0, protein_name_length_); // clear the protein name
769
 
                                                // read at most protein_name_length_ characters from the record name and write them to the record
770
 
                                                protein_name = line.substr(pos, protein_name_length_);
771
 
                                                protein_name.substitute('>', '}');
772
 
                                                memcpy(protein_name_pos, protein_name.c_str(), protein_name.length());
773
 
                                                
774
 
                                                record_flags |= ac_flag; // set the ac flag
775
 
                                        }
776
 
                                        else record_flags = 0;
777
 
                                }
778
 
                                // if a species line is found and an accession has already been found, check whether this record is from the wanted species, if not, skip it
779
 
                                if ( species_flag && line.hasPrefix(species_label) && (record_flags == ac_flag) )
780
 
                                {
781
 
                                        pos = species_label.length();
782
 
                                        if ( line.find(species, pos) != String::npos ) record_flags |= species_flag;
783
 
                                        else record_flags = 0;
784
 
                                }
785
 
                                // if the beginning of the sequence is found and accession and correct species have been found
786
 
                                if ( line.hasPrefix(sequence_start_label) && ((record_flags & (ac_flag | species_flag)) == (ac_flag | species_flag)) ) record_flags |= sequence_flag;
787
 
                        }
788
 
                        source_database_pos_buffer = source_database.tellg();
789
 
                }
790
 
                // source file read
791
 
                source_database.close();
792
 
                source_database.clear();
793
 
                
794
 
                // if the last record has no sequence end label, the sequence has to be appended nevertheless (e.g. FASTA)
795
 
                if ( record_flags == (ac_flag | species_flag | sequence_flag) && !sequence.empty() )
796
 
                {
797
 
                        // all but the first record in the database are preceded by an asterisk (if in append mode an asterisk has to be put at any time)
798
 
                        if ( append ) database.put('*');
799
 
                        database_pos = database.tellp();
800
 
                        
801
 
                        // write the record
802
 
                        // whoever wrote this code - please don't ever do this again.
803
 
                        // x86 does *not* have a monopoly, nor does little endian.
804
 
                        memcpy(record, &source_database_pos, db_pos_length_); // source database position
805
 
      if (OPENMS_IS_BIG_ENDIAN)
806
 
                        {
807
 
                                char tmp;
808
 
                                for (Size i = 0; i < db_pos_length_ / 2; i++)
809
 
                                {
810
 
                                        tmp = record[i];
811
 
                                        record[i] = record[db_pos_length_ - 1 - i];
812
 
                                        record[db_pos_length_ - 1 - i] = tmp;
813
 
                                }
814
 
                        }
815
 
 
816
 
                        memcpy(record + db_pos_length_, &database_pos, trie_db_pos_length_); // database position
817
 
 
818
 
                        // fix the above "suboptimal" code
819
 
                        if (OPENMS_IS_BIG_ENDIAN)
820
 
                        {
821
 
                                char tmp;
822
 
                                for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
823
 
                                {
824
 
                                        tmp = record[db_pos_length_ + i];
825
 
                                        record[db_pos_length_ + i] = record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
826
 
                                        record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
827
 
                                }
828
 
                        }
829
 
 
830
 
                        index.write(record, record_length_);
831
 
                        // protein name / accession has already been written
832
 
                        database << sequence;
833
 
                        append = true;
834
 
                }
835
 
                
836
 
    delete [] record;
837
 
                
838
 
                // close the filestreams
839
 
                database.close();
840
 
                database.clear();
841
 
                index.close();
842
 
                index.clear();
843
 
        }
844
 
 
845
 
        void InspectOutfile::getLabels(
846
 
                const String& source_database_filename,
847
 
                String& ac_label,
848
 
                String& sequence_start_label,
849
 
                String& sequence_end_label,
850
 
                String& comment_label,
851
 
                String& species_label)
852
 
        {
853
 
                ac_label = sequence_start_label = sequence_end_label = comment_label = species_label = "";
854
 
                ifstream source_database(source_database_filename.c_str());
855
 
                if ( !source_database )
856
 
                {
857
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, source_database_filename);
858
 
                }
859
 
                
860
 
                String line;
861
 
                while ( getline(source_database, line) && (sequence_start_label.empty()) )
862
 
                {
863
 
                        if ( !line.empty() && (line[line.length()-1] < 33) ) line.resize(line.length()-1);
864
 
                        if ( line.trim().empty() ) continue;
865
 
                        
866
 
                        else if ( line.hasPrefix(">") )
867
 
                        {
868
 
                                ac_label = ">";
869
 
                                sequence_start_label = ">";
870
 
                                sequence_end_label = ">";
871
 
                                comment_label = ";";
872
 
                                species_label = ">";
873
 
                        }
874
 
                        else if ( line.hasPrefix("SQ") )
875
 
                        {
876
 
                                ac_label = "AC";
877
 
                                sequence_start_label = "SQ";
878
 
                                sequence_end_label = "//";
879
 
                                comment_label = "CC";
880
 
                                species_label = "OS";
881
 
                        }
882
 
                }
883
 
                source_database.close();
884
 
                source_database.clear();
885
 
                
886
 
                // if no known start separator is found
887
 
                if (sequence_start_label.empty())
888
 
                {
889
 
                        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "database has unknown file format (neither trie nor FASTA nor swissprot)" , source_database_filename);
890
 
                }
891
 
        }
892
 
 
893
 
        vector<Size> InspectOutfile::getWantedRecords(const String& result_filename, DoubleReal p_value_threshold)
894
 
        {
895
 
                // check whether the p_value is correct
896
 
                if ( (p_value_threshold < 0) || (p_value_threshold > 1) )
897
 
                {
898
 
                        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the parameters 'p_value_threshold' must be >= 0 and <=1 !");
899
 
                }
900
 
                
901
 
                ifstream result_file(result_filename.c_str());
902
 
                if (!result_file)
903
 
                {
904
 
                        throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
905
 
                }
906
 
                
907
 
                String line;
908
 
                vector< String > substrings;
909
 
                
910
 
                set< Size > wanted_records_set;
911
 
                
912
 
                vector< Size >
913
 
                        wanted_records,
914
 
                        corrupted_lines;
915
 
                
916
 
                Size line_number(0);
917
 
                
918
 
                // get the header
919
 
                Int
920
 
                        spectrum_file_column(-1),
921
 
                        scan_column(-1),
922
 
                        peptide_column(-1),
923
 
                        protein_column(-1),
924
 
                        charge_column(-1),
925
 
                        MQ_score_column(-1),
926
 
                        p_value_column(-1),
927
 
                        record_number_column(-1),
928
 
                        DB_file_pos_column(-1),
929
 
                        spec_file_pos_column(-1);
930
 
                        
931
 
                Size number_of_columns(0);
932
 
                
933
 
                if ( !getline(result_file, line) )
934
 
                {
935
 
                        result_file.close();
936
 
                        result_file.clear();
937
 
                        throw Exception::FileEmpty(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
938
 
                }
939
 
                ++line_number;
940
 
                readOutHeader(result_filename, line, spectrum_file_column, scan_column, peptide_column, protein_column, charge_column, MQ_score_column, p_value_column, record_number_column, DB_file_pos_column, spec_file_pos_column, number_of_columns);
941
 
                
942
 
                while (getline(result_file, line))
943
 
                {
944
 
                        ++line_number;
945
 
                        if (!line.empty() && (line[line.length()-1] < 33)) line.resize(line.length() - 1);
946
 
                        line.trim();
947
 
                        if ( line.empty() ) continue;
948
 
                        line.split('\t', substrings);
949
 
                        
950
 
                        // check whether the line has enough columns
951
 
                        if ( substrings.size() != number_of_columns )
952
 
                        {
953
 
                                corrupted_lines.push_back(line_number);
954
 
                                continue;
955
 
                        }
956
 
                        
957
 
                        // check whether the line has enough columns
958
 
                        if (substrings.size() != number_of_columns) continue;
959
 
                        
960
 
                        // take only those peptides whose p-value is less or equal the given threshold
961
 
                        if (substrings[p_value_column].toFloat() > p_value_threshold) continue;
962
 
                        
963
 
                        wanted_records_set.insert(substrings[record_number_column].toInt());
964
 
                }
965
 
                
966
 
                result_file.close();
967
 
                result_file.clear();
968
 
                
969
 
                for ( set< Size >::const_iterator rn_i = wanted_records_set.begin(); rn_i != wanted_records_set.end(); ++rn_i )
970
 
                {
971
 
                        wanted_records.push_back(*rn_i);
972
 
                }
973
 
                
974
 
                return wanted_records;
975
 
        }
976
 
 
977
 
        bool
978
 
        InspectOutfile::getSearchEngineAndVersion(
979
 
                const String& cmd_output,
980
 
                ProteinIdentification& protein_identification)
981
 
        {
982
 
          protein_identification.setSearchEngine("InsPecT");
983
 
          protein_identification.setSearchEngineVersion("unknown");
984
 
                // searching for something like this: InsPecT version 20060907, InsPecT version 20100331
985
 
    QString response(cmd_output.toQString()); 
 
158
    }
 
159
 
 
160
    while (getline(result_file, line))
 
161
    {
 
162
      ++line_number;
 
163
      if (!line.empty() && (line[line.length() - 1] < 33))
 
164
        line.resize(line.length() - 1);
 
165
      line.trim();
 
166
      if (line.empty())
 
167
        continue;
 
168
 
 
169
      // check whether the line has enough columns
 
170
      line.split('\t', substrings);
 
171
      if (substrings.size() != number_of_columns)
 
172
      {
 
173
        corrupted_lines.push_back(line_number);
 
174
        continue;
 
175
      }
 
176
 
 
177
      // if the pvalue is too small, skip the line
 
178
      if (substrings[p_value_column].toFloat() > p_value_threshold)
 
179
        continue;
 
180
 
 
181
      // the protein
 
182
      ProteinHit protein_hit;
 
183
      // get accession number and type
 
184
      getACAndACType(substrings[protein_column], accession, accession_type);
 
185
      protein_hit.setAccession(accession);
 
186
//          protein_hit.setScore(0.0);
 
187
 
 
188
      // the database position of the protein (the i-th protein)
 
189
      record_number = substrings[record_number_column].toInt();
 
190
 
 
191
      // map the database position of the protein to its position in the protein hits and insert it, if it's a new protein
 
192
      if (rn_position_map.find(record_number) == rn_position_map.end())
 
193
      {
 
194
        rn_position_map[record_number] = protein_identification.getHits().size();
 
195
        protein_identification.insertHit(protein_hit);
 
196
      }
 
197
 
 
198
      // if a new scan is found (new file or new scan), insert it into the vector (the first time the condition is fullfilled because spectrum_file is "")
 
199
      if ((substrings[spectrum_file_column] != spectrum_file) || ((Size) substrings[scan_column].toInt() != scan_number))
 
200
      {
 
201
        if (substrings[spectrum_file_column] != spectrum_file)           // if it's a new file, insert it into the vector (used to retrieve RT and MT later)
 
202
        {
 
203
          // if it's the first file or if hits have been found in the file before, insert a new file
 
204
          if (files_and_peptide_identification_with_scan_number.empty() || !files_and_peptide_identification_with_scan_number.back().second.empty())
 
205
          {
 
206
            files_and_peptide_identification_with_scan_number.push_back(make_pair(substrings[spectrum_file_column], vector<pair<Size, Size> >()));
 
207
          }
 
208
          // otherwise change the name of the last file entry (the one without hits)
 
209
          else
 
210
            files_and_peptide_identification_with_scan_number.back().first = substrings[spectrum_file_column];
 
211
        }
 
212
 
 
213
        spectrum_file = substrings[spectrum_file_column];
 
214
        scan_number = substrings[scan_column].toInt();
 
215
 
 
216
        // if it's not the first scan and if hits have been found, insert the peptide identification
 
217
        if (!peptide_identification.empty() && !peptide_identification.getHits().empty())
 
218
        {
 
219
          files_and_peptide_identification_with_scan_number.back().second.push_back(make_pair(peptide_identifications.size(), scan_number));
 
220
          peptide_identifications.push_back(peptide_identification);
 
221
        }
 
222
        peptide_identification = PeptideIdentification();
 
223
 
 
224
        peptide_identification.setIdentifier(identifier);
 
225
        peptide_identification.setSignificanceThreshold(p_value_threshold);
 
226
        peptide_identification.setScoreType(score_type_);
 
227
      }
 
228
 
 
229
      // get the peptide infos from the new peptide and insert it
 
230
      PeptideHit peptide_hit;
 
231
      peptide_hit.setCharge(substrings[charge_column].toInt());
 
232
      peptide_hit.setScore(substrings[MQ_score_column].toFloat());
 
233
      peptide_hit.setRank(0);       // all ranks are set to zero and assigned later
 
234
 
 
235
      // get the sequence and the amino acid before and after
 
236
      String sequence, sequence_with_mods;
 
237
      sequence_with_mods = substrings[peptide_column];
 
238
      start = sequence_with_mods.find('.') + 1;
 
239
      end = sequence_with_mods.find_last_of('.');
 
240
      if (start >= 2)
 
241
        peptide_hit.setAABefore(sequence_with_mods[start - 2]);
 
242
      if (end < sequence_with_mods.length() + 1)
 
243
        peptide_hit.setAAAfter(sequence_with_mods[end + 1]);
 
244
 
 
245
      //remove modifications (small characters and anything that's not in the alphabet)
 
246
      sequence_with_mods = substrings[peptide_column].substr(start, end - start);
 
247
      for (String::ConstIterator c_i = sequence_with_mods.begin(); c_i != sequence_with_mods.end(); ++c_i)
 
248
      {
 
249
        if ((bool) isalpha(*c_i) && (bool) isupper(*c_i))
 
250
          sequence.append(1, *c_i);
 
251
      }
 
252
 
 
253
      peptide_hit.setSequence(sequence);
 
254
      peptide_hit.addProteinAccession(accession);
 
255
 
 
256
      peptide_identification.insertHit(peptide_hit);
 
257
    }
 
258
 
 
259
    // result file read
 
260
    result_file.close();
 
261
    result_file.clear();
 
262
 
 
263
    // if it's not the first scan and if hits have been found, insert the peptide identification
 
264
    if (!peptide_identification.empty() && !peptide_identification.getHits().empty())
 
265
    {
 
266
      files_and_peptide_identification_with_scan_number.back().second.push_back(make_pair(peptide_identifications.size(), scan_number));
 
267
      peptide_identifications.push_back(peptide_identification);
 
268
    }
 
269
 
 
270
    // if the last file had no hits, delete it
 
271
    if (!files_and_peptide_identification_with_scan_number.empty() && files_and_peptide_identification_with_scan_number.back().second.empty())
 
272
    {
 
273
      files_and_peptide_identification_with_scan_number.pop_back();
 
274
    }
 
275
 
 
276
    if (!peptide_identifications.empty())
 
277
      peptide_identifications.back().assignRanks();
 
278
 
 
279
    // search the sequence of the proteins
 
280
    if (!protein_identification.getHits().empty() && !database_filename.empty())
 
281
    {
 
282
      vector<ProteinHit> protein_hits = protein_identification.getHits();
 
283
      vector<String> sequences;
 
284
      getSequences(database_filename, rn_position_map, sequences);
 
285
 
 
286
      // set the retrieved sequences
 
287
      vector<String>::const_iterator s_i = sequences.begin();
 
288
      for (map<Size, Size>::const_iterator rn_i = rn_position_map.begin(); rn_i != rn_position_map.end(); ++rn_i, ++s_i)
 
289
        protein_hits[rn_i->second].setSequence(*s_i);
 
290
 
 
291
      sequences.clear();
 
292
      rn_position_map.clear();
 
293
      protein_identification.setHits(protein_hits);
 
294
      protein_hits.clear();
 
295
    }
 
296
 
 
297
    // get the precursor retention times and mz values
 
298
    getPrecursorRTandMZ(files_and_peptide_identification_with_scan_number, peptide_identifications);
 
299
    protein_identification.setDateTime(datetime);
 
300
    protein_identification.setIdentifier(identifier);
 
301
 
 
302
    return corrupted_lines;
 
303
  }
 
304
 
 
305
  // < record number, number of protein in a vector >
 
306
  vector<Size>
 
307
  InspectOutfile::getSequences(
 
308
    const String & database_filename,
 
309
    const map<Size, Size> & wanted_records,
 
310
    vector<String> & sequences)
 
311
  {
 
312
    ifstream database(database_filename.c_str());
 
313
    if (!database)
 
314
    {
 
315
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
 
316
    }
 
317
 
 
318
    vector<Size> not_found;
 
319
    Size seen_records(0);
 
320
    stringbuf sequence;
 
321
    database.seekg(0, ios::end);
 
322
    streampos sp = database.tellg();
 
323
    database.seekg(0, ios::beg);
 
324
 
 
325
    for (map<Size, Size>::const_iterator wr_i = wanted_records.begin(); wr_i !=  wanted_records.end(); ++wr_i)
 
326
    {
 
327
      for (; seen_records < wr_i->first; ++seen_records)
 
328
      {
 
329
        database.ignore(sp, trie_delimiter_);
 
330
      }
 
331
      database.get(sequence, trie_delimiter_);
 
332
      sequences.push_back(sequence.str());
 
333
      if (sequences.back().empty())
 
334
        not_found.push_back(wr_i->first);
 
335
      sequence.str("");
 
336
    }
 
337
 
 
338
    // close the filestreams
 
339
    database.close();
 
340
    database.clear();
 
341
 
 
342
    return not_found;
 
343
  }
 
344
 
 
345
  void
 
346
  InspectOutfile::getACAndACType(
 
347
    String line,
 
348
    String & accession,
 
349
    String & accession_type)
 
350
  {
 
351
    String swissprot_prefixes = "JLOPQUX";
 
352
    /// @todo replace this by general FastA implementation? (Martin)
 
353
    accession.clear();
 
354
    accession_type.clear();
 
355
    pair<String, String> p;
 
356
    // if it's a FASTA line
 
357
    if (line.hasPrefix(">"))
 
358
      line.erase(0, 1);
 
359
    if (!line.empty() && (line[line.length() - 1] < 33))
 
360
      line.resize(line.length() - 1);
 
361
    line.trim();
 
362
 
 
363
    // if it's a swissprot accession
 
364
    if (line.hasPrefix("tr") || line.hasPrefix("sp"))
 
365
    {
 
366
      accession = line.substr(3, line.find('|', 3) - 3);
 
367
      accession_type = "SwissProt";
 
368
    }
 
369
    else if (line.hasPrefix("gi"))
 
370
    {
 
371
      String::size_type snd(line.find('|', 3));
 
372
      String::size_type third(0);
 
373
      if (snd != String::npos)
 
374
      {
 
375
        third = line.find('|', ++snd) + 1;
 
376
 
 
377
        accession = line.substr(third, line.find('|', third) - third);
 
378
        accession_type = line.substr(snd, third - 1 - snd);
 
379
      }
 
380
      if (accession_type == "gb")
 
381
        accession_type = "GenBank";
 
382
      else if (accession_type == "emb")
 
383
        accession_type = "EMBL";
 
384
      else if (accession_type == "dbj")
 
385
        accession_type = "DDBJ";
 
386
      else if (accession_type == "ref")
 
387
        accession_type = "NCBI";
 
388
      else if ((accession_type == "sp") || (accession_type == "tr"))
 
389
        accession_type = "SwissProt";
 
390
      else if (accession_type == "gnl")
 
391
      {
 
392
        accession_type = accession;
 
393
        snd = line.find('|', third);
 
394
        third = line.find('|', ++snd);
 
395
        if (third != String::npos)
 
396
          accession = line.substr(snd, third - snd);
 
397
        else
 
398
        {
 
399
          third = line.find(' ', snd);
 
400
          if (third != String::npos)
 
401
            accession = line.substr(snd, third - snd);
 
402
          else
 
403
            accession = line.substr(snd);
 
404
        }
 
405
      }
 
406
      else
 
407
      {
 
408
        String::size_type pos1(line.find('(', 0));
 
409
        String::size_type pos2(0);
 
410
        if (pos1 != String::npos)
 
411
        {
 
412
          pos2 = line.find(')', ++pos1);
 
413
          if (pos2 != String::npos)
 
414
          {
 
415
            accession = line.substr(pos1, pos2 - pos1);
 
416
            if ((accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos))
 
417
              accession_type = "SwissProt";
 
418
            else
 
419
              accession.clear();
 
420
          }
 
421
        }
 
422
        if (accession.empty())
 
423
        {
 
424
          accession_type = "gi";
 
425
          if (snd != String::npos)
 
426
            accession = line.substr(3, snd - 4);
 
427
          else
 
428
          {
 
429
            if (snd == String::npos)
 
430
              snd = line.find(' ', 3);
 
431
            if (snd != String::npos)
 
432
              accession = line.substr(3, snd - 3);
 
433
            else
 
434
              accession = line.substr(3);
 
435
          }
 
436
        }
 
437
      }
 
438
    }
 
439
    else if (line.hasPrefix("ref"))
 
440
    {
 
441
      accession = line.substr(4, line.find('|', 4) - 4);
 
442
      accession_type = "NCBI";
 
443
    }
 
444
    else if (line.hasPrefix("gnl"))
 
445
    {
 
446
      line.erase(0, 3);
 
447
      accession_type = line.substr(0, line.find('|', 0));
 
448
      accession = line.substr(accession_type.length() + 1);
 
449
    }
 
450
    else if (line.hasPrefix("lcl"))
 
451
    {
 
452
      line.erase(0, 4);
 
453
      accession_type = "lcl";
 
454
      accession = line;
 
455
    }
 
456
    else
 
457
    {
 
458
      String::size_type pos1(line.find('(', 0));
 
459
      String::size_type pos2(0);
 
460
      if (pos1 != String::npos)
 
461
      {
 
462
        pos2 = line.find(')', ++pos1);
 
463
        if (pos2 != String::npos)
 
464
        {
 
465
          accession = line.substr(pos1, pos2 - pos1);
 
466
          if ((accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos))
 
467
            accession_type = "SwissProt";
 
468
          else
 
469
            accession.clear();
 
470
        }
 
471
      }
 
472
      if (accession.empty())
 
473
      {
 
474
        pos1 = line.find('|');
 
475
        accession = line.substr(0, pos1);
 
476
        if ((accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos))
 
477
          accession_type = "SwissProt";
 
478
        else
 
479
        {
 
480
          pos1 = line.find(' ');
 
481
          accession = line.substr(0, pos1);
 
482
          if ((accession.size() == 6) && (String(swissprot_prefixes).find(accession[0], 0) != String::npos))
 
483
            accession_type = "SwissProt";
 
484
          else
 
485
          {
 
486
            accession = line.substr(0, 6);
 
487
            if (String(swissprot_prefixes).find(accession[0], 0) != String::npos)
 
488
              accession_type = "SwissProt";
 
489
            else
 
490
              accession.clear();
 
491
          }
 
492
        }
 
493
      }
 
494
    }
 
495
    if (accession.empty())
 
496
    {
 
497
      accession = line.trim();
 
498
      accession_type = "unknown";
 
499
    }
 
500
  }
 
501
 
 
502
  void
 
503
  InspectOutfile::getPrecursorRTandMZ(
 
504
    const vector<pair<String, vector<pair<Size, Size> > > > & files_and_peptide_identification_with_scan_number,
 
505
    vector<PeptideIdentification> & ids)
 
506
  {
 
507
    MSExperiment<> experiment;
 
508
    String type;
 
509
 
 
510
    for (vector<pair<String, vector<pair<Size, Size> > > >::const_iterator fs_i = files_and_peptide_identification_with_scan_number.begin(); fs_i != files_and_peptide_identification_with_scan_number.end(); ++fs_i)
 
511
    {
 
512
      getExperiment(experiment, type, fs_i->first);       // may throw an exception if the filetype could not be determined
 
513
 
 
514
      if (experiment.size() < fs_i->second.back().second)
 
515
      {
 
516
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Not enought scans in file! (" + String(experiment.size()) + " available, should be at least " + String(fs_i->second.back().second) + ")", fs_i->first);
 
517
      }
 
518
 
 
519
      for (vector<pair<Size, Size> >::const_iterator pi_scan_i = fs_i->second.begin(); pi_scan_i != fs_i->second.end(); ++pi_scan_i)
 
520
      {
 
521
        ids[pi_scan_i->first].setMetaValue("MZ", experiment[pi_scan_i->second - 1].getPrecursors()[0].getMZ());
 
522
        ids[pi_scan_i->first].setMetaValue("RT", experiment[pi_scan_i->second - 1].getRT());
 
523
      }
 
524
    }
 
525
  }
 
526
 
 
527
  void
 
528
  InspectOutfile::compressTrieDB(
 
529
    const String & database_filename,
 
530
    const String & index_filename,
 
531
    vector<Size> & wanted_records,
 
532
    const String & snd_database_filename,
 
533
    const String & snd_index_filename,
 
534
    bool append)
 
535
  {
 
536
    if (database_filename == snd_database_filename)
 
537
    {
 
538
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Same filename can not be used for original and second database!", database_filename);
 
539
    }
 
540
    if (index_filename == snd_index_filename)
 
541
    {
 
542
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Same filename can not be used for original and second database!", index_filename);
 
543
    }
 
544
    ifstream database(database_filename.c_str());
 
545
    if (!database)
 
546
    {
 
547
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
 
548
    }
 
549
 
 
550
    ifstream index(index_filename.c_str(), ios::in | ios::binary);
 
551
    if (!index)
 
552
    {
 
553
      database.close();
 
554
      database.clear();
 
555
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, index_filename);
 
556
    }
 
557
 
 
558
    // determine the length of the index file
 
559
    index.seekg(0, ios::end);
 
560
    streampos index_length = index.tellg();
 
561
    index.seekg(0, ios::beg);
 
562
    bool empty_records = wanted_records.empty();
 
563
    if (wanted_records.empty())
 
564
    {
 
565
      for (Size i = 0; i < index_length / record_length_; ++i)
 
566
        wanted_records.push_back(i);
 
567
    }
 
568
 
 
569
    // take the wanted records, copy their sequences to the new db and write the index file accordingly
 
570
    ofstream snd_database;
 
571
    if (append)
 
572
      snd_database.open(snd_database_filename.c_str(), std::ios::out | std::ios::app);
 
573
    else
 
574
      snd_database.open(snd_database_filename.c_str(), std::ios::out | std::ios::trunc);
 
575
    if (!snd_database)
 
576
    {
 
577
      database.close();
 
578
      database.clear();
 
579
      index.close();
 
580
      index.clear();
 
581
      throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, snd_database_filename);
 
582
    }
 
583
 
 
584
    ofstream snd_index;
 
585
    if (append)
 
586
      snd_index.open(snd_index_filename.c_str(), std::ios::out | std::ios::binary | std::ios::app);
 
587
    else
 
588
      snd_index.open(snd_index_filename.c_str(), std::ios::out | std::ios::binary | std::ios::trunc);
 
589
    if (!snd_index)
 
590
    {
 
591
      database.close();
 
592
      database.clear();
 
593
      index.close();
 
594
      index.clear();
 
595
      snd_database.close();
 
596
      snd_database.clear();
 
597
      throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, snd_index_filename);
 
598
    }
 
599
 
 
600
    char * index_record = new char[record_length_];    // to copy one record from the index file
 
601
    Size database_pos(0), snd_database_pos(0);     // their sizes HAVE TO BE 4 bytes
 
602
    stringbuf sequence;
 
603
    streampos index_pos(0);
 
604
 
 
605
    for (vector<Size>::const_iterator wr_i = wanted_records.begin(); wr_i != wanted_records.end(); ++wr_i)
 
606
    {
 
607
      // get the according record in the index file
 
608
      if (index_length < Int((*wr_i + 1) * record_length_))         // if the file is too short
 
609
      {
 
610
        delete[] index_record;
 
611
        database.close();
 
612
        database.clear();
 
613
        index.close();
 
614
        index.clear();
 
615
        snd_database.close();
 
616
        snd_database.clear();
 
617
        snd_index.close();
 
618
        snd_index.clear();
 
619
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "index file is too short!", index_filename);
 
620
      }
 
621
      index.seekg((*wr_i) * record_length_);
 
622
      index.read(index_record, record_length_);
 
623
 
 
624
      // all but the first sequence are preceded by an asterisk
 
625
      if (append)
 
626
        snd_database.put(trie_delimiter_);
 
627
      append = true;
 
628
 
 
629
      // check if we have to reverse the database_pos part (which is saved in little endian)
 
630
      if (OPENMS_IS_BIG_ENDIAN)
 
631
      {
 
632
        char tmp;
 
633
        for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
 
634
        {
 
635
          tmp = index_record[db_pos_length_ + i];
 
636
          index_record[db_pos_length_ + i] = index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
 
637
          index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
 
638
        }
 
639
      }
 
640
 
 
641
      // go to the beginning of the sequence
 
642
 
 
643
      // whoever wrote this code - please don't ever do this again.
 
644
      // x86 does *not* have a monopoly, nor does little endian.
 
645
      memcpy(&database_pos, index_record + db_pos_length_, trie_db_pos_length_);
 
646
      database.seekg(database_pos);
 
647
 
 
648
      // store the corresponding index for the second database
 
649
      snd_database_pos = snd_database.tellp();       // get the position in the second database
 
650
 
 
651
      memcpy(index_record + db_pos_length_, &snd_database_pos, trie_db_pos_length_);       // and copy to its place in the index record
 
652
 
 
653
      // fixing the above "suboptimal" code
 
654
      if (OPENMS_IS_BIG_ENDIAN)
 
655
      {
 
656
        char tmp;
 
657
        for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
 
658
        {
 
659
          tmp = index_record[db_pos_length_ + i];
 
660
          index_record[db_pos_length_ + i] = index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
 
661
          index_record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
 
662
        }
 
663
      }
 
664
 
 
665
      snd_index.write((char *) index_record, record_length_);      // because only the trie-db position changed, not the position in the original database, nor the protein name
 
666
 
 
667
      // store the sequence
 
668
      database.get(sequence, trie_delimiter_);
 
669
      snd_database << sequence.str();
 
670
      sequence.str("");
 
671
    }
 
672
 
 
673
 
 
674
    if (empty_records)
 
675
      wanted_records.clear();
 
676
    delete[] index_record;
 
677
    database.close();
 
678
    database.clear();
 
679
    index.close();
 
680
    index.clear();
 
681
    snd_database.close();
 
682
    snd_database.clear();
 
683
    snd_index.close();
 
684
    snd_index.clear();
 
685
  }
 
686
 
 
687
  void
 
688
  InspectOutfile::generateTrieDB(
 
689
    const String & source_database_filename,
 
690
    const String & database_filename,
 
691
    const String & index_filename,
 
692
    bool append,
 
693
    const String species)
 
694
  {
 
695
    ifstream source_database(source_database_filename.c_str());
 
696
    if (!source_database)
 
697
    {
 
698
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, source_database_filename);
 
699
    }
 
700
 
 
701
    // get the labels
 
702
    String ac_label, sequence_start_label, sequence_end_label, comment_label, species_label;
 
703
    getLabels(source_database_filename, ac_label, sequence_start_label, sequence_end_label, comment_label, species_label);
 
704
 
 
705
    ofstream database;
 
706
    if (append)
 
707
      database.open(database_filename.c_str(), ios::app | ios::out);
 
708
    else
 
709
      database.open(database_filename.c_str());
 
710
    if (!database)
 
711
    {
 
712
      source_database.close();
 
713
      source_database.clear();
 
714
      throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, database_filename);
 
715
    }
 
716
    ofstream index;
 
717
    if (append)
 
718
      index.open(index_filename.c_str(), ios::app | ios::out | ios::binary);
 
719
    else
 
720
      index.open(index_filename.c_str(), ios::out | ios::binary);
 
721
    if (!index)
 
722
    {
 
723
      source_database.close();
 
724
      source_database.clear();
 
725
      database.close();
 
726
      database.clear();
 
727
      throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, index_filename);
 
728
    }
 
729
 
 
730
    // using flags to mark what has already been read
 
731
    // the flags
 
732
    unsigned char ac_flag = 1;
 
733
    unsigned char species_flag = !species.empty() * 2;   // if no species is given, take all proteins
 
734
    unsigned char sequence_flag = 4;
 
735
    // the value
 
736
    unsigned char record_flags = 0;
 
737
 
 
738
    String::size_type pos(0);     // the position in a line
 
739
    unsigned long long source_database_pos = source_database.tellg();     // the start of a protein in the source database
 
740
    unsigned long long source_database_pos_buffer = 0;     // because you don't know whether a new protein starts unless the line is read, the actual position is buffered before any new getline
 
741
    Size database_pos(0);
 
742
    String line, sequence, protein_name;
 
743
    char * record = new char[record_length_];    // a record in the index file
 
744
    char * protein_name_pos = record + db_pos_length_ + trie_db_pos_length_;
 
745
 
 
746
    while (getline(source_database, line))
 
747
    {
 
748
      if (!line.empty() && (line[line.length() - 1] < 33))
 
749
        line.resize(line.length() - 1);
 
750
      line.trim();
 
751
 
 
752
      // empty and comment lines are skipped
 
753
      if (line.empty() || line.hasPrefix(comment_label))
 
754
      {
 
755
        source_database_pos_buffer = source_database.tellg();
 
756
        continue;
 
757
      }
 
758
 
 
759
      // read the sequence if the accession and the species have been read already
 
760
      if (record_flags == (ac_flag | species_flag | sequence_flag))
 
761
      {
 
762
        if (!line.hasPrefix(sequence_end_label))           // if it is still the same protein, append the sequence
 
763
        {
 
764
          line.trim();           // erase all whitespaces from the sequence
 
765
          line.remove(trie_delimiter_);
 
766
          // save this part of the sequence
 
767
          sequence.append(line);
 
768
        }
 
769
        else         // if a new protein is found, write down the old one
 
770
        {
 
771
          // if the sequence is not empty, the record has the correct form
 
772
          if (!sequence.empty())
 
773
          {
 
774
            // all but the first record in the database are preceded by an asterisk (if in append mode an asterisk has to be put at any time)
 
775
            if (append)
 
776
              database.put('*');
 
777
            database_pos = database.tellp();
 
778
 
 
779
            // write the record
 
780
            memcpy(record, &source_database_pos, db_pos_length_);             // source database position
 
781
            if (OPENMS_IS_BIG_ENDIAN)
 
782
            {
 
783
              char tmp;
 
784
              for (Size i = 0; i < db_pos_length_ / 2; i++)
 
785
              {
 
786
                tmp = record[i];
 
787
                record[i] = record[db_pos_length_ - 1 - i];
 
788
                record[db_pos_length_ - 1 - i] = tmp;
 
789
              }
 
790
            }
 
791
 
 
792
            // whoever wrote this code - please don't ever do this again.
 
793
            // x86 does *not* have a monopoly, nor does little endian.
 
794
            memcpy(record + db_pos_length_, &database_pos, trie_db_pos_length_);             // database position
 
795
 
 
796
            // fix the above "suboptimal" code
 
797
            if (OPENMS_IS_BIG_ENDIAN)
 
798
            {
 
799
              char tmp;
 
800
              for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
 
801
              {
 
802
                tmp = record[db_pos_length_ + i];
 
803
                record[db_pos_length_ + i] = record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
 
804
                record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
 
805
              }
 
806
            }
 
807
 
 
808
            index.write(record, record_length_);
 
809
            // protein name / accession has already been written
 
810
            database << sequence;
 
811
            source_database_pos = source_database_pos_buffer;             // the position of the start of the new protein
 
812
            append = true;
 
813
          }
 
814
          sequence.clear();
 
815
 
 
816
          // set back the record flags for a new record
 
817
          record_flags = 0;
 
818
        }
 
819
      }
 
820
 
 
821
      // if not reading the sequence
 
822
      if (!(record_flags & sequence_flag))
 
823
      {
 
824
        if (line.hasPrefix(ac_label))
 
825
        {
 
826
          pos = ac_label.length();           // find the beginning of the accession
 
827
 
 
828
          while ((line.length() > pos) && (line[pos] < 33))
 
829
            ++pos;                                                             // discard the whitespaces after the label
 
830
          if (pos != line.length())             // if no accession is found, skip this protein
 
831
          {
 
832
            memset(protein_name_pos, 0, protein_name_length_);             // clear the protein name
 
833
            // read at most protein_name_length_ characters from the record name and write them to the record
 
834
            protein_name = line.substr(pos, protein_name_length_);
 
835
            protein_name.substitute('>', '}');
 
836
            memcpy(protein_name_pos, protein_name.c_str(), protein_name.length());
 
837
 
 
838
            record_flags |= ac_flag;             // set the ac flag
 
839
          }
 
840
          else
 
841
            record_flags = 0;
 
842
        }
 
843
        // if a species line is found and an accession has already been found, check whether this record is from the wanted species, if not, skip it
 
844
        if (species_flag && line.hasPrefix(species_label) && (record_flags == ac_flag))
 
845
        {
 
846
          pos = species_label.length();
 
847
          if (line.find(species, pos) != String::npos)
 
848
            record_flags |= species_flag;
 
849
          else
 
850
            record_flags = 0;
 
851
        }
 
852
        // if the beginning of the sequence is found and accession and correct species have been found
 
853
        if (line.hasPrefix(sequence_start_label) && ((record_flags & (ac_flag | species_flag)) == (ac_flag | species_flag)))
 
854
          record_flags |= sequence_flag;
 
855
      }
 
856
      source_database_pos_buffer = source_database.tellg();
 
857
    }
 
858
    // source file read
 
859
    source_database.close();
 
860
    source_database.clear();
 
861
 
 
862
    // if the last record has no sequence end label, the sequence has to be appended nevertheless (e.g. FASTA)
 
863
    if (record_flags == (ac_flag | species_flag | sequence_flag) && !sequence.empty())
 
864
    {
 
865
      // all but the first record in the database are preceded by an asterisk (if in append mode an asterisk has to be put at any time)
 
866
      if (append)
 
867
        database.put('*');
 
868
      database_pos = database.tellp();
 
869
 
 
870
      // write the record
 
871
      // whoever wrote this code - please don't ever do this again.
 
872
      // x86 does *not* have a monopoly, nor does little endian.
 
873
      memcpy(record, &source_database_pos, db_pos_length_);       // source database position
 
874
      if (OPENMS_IS_BIG_ENDIAN)
 
875
      {
 
876
        char tmp;
 
877
        for (Size i = 0; i < db_pos_length_ / 2; i++)
 
878
        {
 
879
          tmp = record[i];
 
880
          record[i] = record[db_pos_length_ - 1 - i];
 
881
          record[db_pos_length_ - 1 - i] = tmp;
 
882
        }
 
883
      }
 
884
 
 
885
      memcpy(record + db_pos_length_, &database_pos, trie_db_pos_length_);       // database position
 
886
 
 
887
      // fix the above "suboptimal" code
 
888
      if (OPENMS_IS_BIG_ENDIAN)
 
889
      {
 
890
        char tmp;
 
891
        for (Size i = 0; i < trie_db_pos_length_ / 2; i++)
 
892
        {
 
893
          tmp = record[db_pos_length_ + i];
 
894
          record[db_pos_length_ + i] = record[db_pos_length_ + trie_db_pos_length_ - 1 - i];
 
895
          record[db_pos_length_ + trie_db_pos_length_ - 1 - i] = tmp;
 
896
        }
 
897
      }
 
898
 
 
899
      index.write(record, record_length_);
 
900
      // protein name / accession has already been written
 
901
      database << sequence;
 
902
      append = true;
 
903
    }
 
904
 
 
905
    delete[] record;
 
906
 
 
907
    // close the filestreams
 
908
    database.close();
 
909
    database.clear();
 
910
    index.close();
 
911
    index.clear();
 
912
  }
 
913
 
 
914
  void InspectOutfile::getLabels(
 
915
    const String & source_database_filename,
 
916
    String & ac_label,
 
917
    String & sequence_start_label,
 
918
    String & sequence_end_label,
 
919
    String & comment_label,
 
920
    String & species_label)
 
921
  {
 
922
    ac_label = sequence_start_label = sequence_end_label = comment_label = species_label = "";
 
923
    ifstream source_database(source_database_filename.c_str());
 
924
    if (!source_database)
 
925
    {
 
926
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, source_database_filename);
 
927
    }
 
928
 
 
929
    String line;
 
930
    while (getline(source_database, line) && (sequence_start_label.empty()))
 
931
    {
 
932
      if (!line.empty() && (line[line.length() - 1] < 33))
 
933
        line.resize(line.length() - 1);
 
934
      if (line.trim().empty())
 
935
        continue;
 
936
 
 
937
      else if (line.hasPrefix(">"))
 
938
      {
 
939
        ac_label = ">";
 
940
        sequence_start_label = ">";
 
941
        sequence_end_label = ">";
 
942
        comment_label = ";";
 
943
        species_label = ">";
 
944
      }
 
945
      else if (line.hasPrefix("SQ"))
 
946
      {
 
947
        ac_label = "AC";
 
948
        sequence_start_label = "SQ";
 
949
        sequence_end_label = "//";
 
950
        comment_label = "CC";
 
951
        species_label = "OS";
 
952
      }
 
953
    }
 
954
    source_database.close();
 
955
    source_database.clear();
 
956
 
 
957
    // if no known start separator is found
 
958
    if (sequence_start_label.empty())
 
959
    {
 
960
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "database has unknown file format (neither trie nor FASTA nor swissprot)", source_database_filename);
 
961
    }
 
962
  }
 
963
 
 
964
  vector<Size> InspectOutfile::getWantedRecords(const String & result_filename, DoubleReal p_value_threshold)
 
965
  {
 
966
    // check whether the p_value is correct
 
967
    if ((p_value_threshold < 0) || (p_value_threshold > 1))
 
968
    {
 
969
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the parameters 'p_value_threshold' must be >= 0 and <=1 !");
 
970
    }
 
971
 
 
972
    ifstream result_file(result_filename.c_str());
 
973
    if (!result_file)
 
974
    {
 
975
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
 
976
    }
 
977
 
 
978
    String line;
 
979
    vector<String> substrings;
 
980
 
 
981
    set<Size> wanted_records_set;
 
982
 
 
983
    vector<Size>
 
984
    wanted_records,
 
985
      corrupted_lines;
 
986
 
 
987
    Size line_number(0);
 
988
 
 
989
    // get the header
 
990
    Int
 
991
    spectrum_file_column(-1),
 
992
    scan_column(-1),
 
993
    peptide_column(-1),
 
994
    protein_column(-1),
 
995
    charge_column(-1),
 
996
    MQ_score_column(-1),
 
997
    p_value_column(-1),
 
998
    record_number_column(-1),
 
999
    DB_file_pos_column(-1),
 
1000
    spec_file_pos_column(-1);
 
1001
 
 
1002
    Size number_of_columns(0);
 
1003
 
 
1004
    if (!getline(result_file, line))
 
1005
    {
 
1006
      result_file.close();
 
1007
      result_file.clear();
 
1008
      throw Exception::FileEmpty(__FILE__, __LINE__, __PRETTY_FUNCTION__, result_filename);
 
1009
    }
 
1010
    ++line_number;
 
1011
    readOutHeader(result_filename, line, spectrum_file_column, scan_column, peptide_column, protein_column, charge_column, MQ_score_column, p_value_column, record_number_column, DB_file_pos_column, spec_file_pos_column, number_of_columns);
 
1012
 
 
1013
    while (getline(result_file, line))
 
1014
    {
 
1015
      ++line_number;
 
1016
      if (!line.empty() && (line[line.length() - 1] < 33))
 
1017
        line.resize(line.length() - 1);
 
1018
      line.trim();
 
1019
      if (line.empty())
 
1020
        continue;
 
1021
      line.split('\t', substrings);
 
1022
 
 
1023
      // check whether the line has enough columns
 
1024
      if (substrings.size() != number_of_columns)
 
1025
      {
 
1026
        corrupted_lines.push_back(line_number);
 
1027
        continue;
 
1028
      }
 
1029
 
 
1030
      // check whether the line has enough columns
 
1031
      if (substrings.size() != number_of_columns)
 
1032
        continue;
 
1033
 
 
1034
      // take only those peptides whose p-value is less or equal the given threshold
 
1035
      if (substrings[p_value_column].toFloat() > p_value_threshold)
 
1036
        continue;
 
1037
 
 
1038
      wanted_records_set.insert(substrings[record_number_column].toInt());
 
1039
    }
 
1040
 
 
1041
    result_file.close();
 
1042
    result_file.clear();
 
1043
 
 
1044
    for (set<Size>::const_iterator rn_i = wanted_records_set.begin(); rn_i != wanted_records_set.end(); ++rn_i)
 
1045
    {
 
1046
      wanted_records.push_back(*rn_i);
 
1047
    }
 
1048
 
 
1049
    return wanted_records;
 
1050
  }
 
1051
 
 
1052
  bool
 
1053
  InspectOutfile::getSearchEngineAndVersion(
 
1054
    const String & cmd_output,
 
1055
    ProteinIdentification & protein_identification)
 
1056
  {
 
1057
    protein_identification.setSearchEngine("InsPecT");
 
1058
    protein_identification.setSearchEngineVersion("unknown");
 
1059
    // searching for something like this: InsPecT version 20060907, InsPecT version 20100331
 
1060
    QString response(cmd_output.toQString());
986
1061
    QRegExp rx("InsPecT (version|vesrion) (\\d+)"); // older versions of InsPecT have typo...
987
 
    if (rx.indexIn(response) == -1) return false;
988
 
          protein_identification.setSearchEngineVersion(String(rx.cap(2)));
 
1062
    if (rx.indexIn(response) == -1)
 
1063
      return false;
 
1064
 
 
1065
    protein_identification.setSearchEngineVersion(String(rx.cap(2)));
989
1066
    return true;
990
 
        }
991
 
        
992
 
        void
993
 
        InspectOutfile::readOutHeader(
994
 
                const String& filename,
995
 
                const String& header_line,
996
 
                Int& spectrum_file_column,
997
 
                Int& scan_column,
998
 
                Int& peptide_column,
999
 
                Int& protein_column,
1000
 
                Int& charge_column,
1001
 
                Int& MQ_score_column,
1002
 
                Int& p_value_column,
1003
 
                Int& record_number_column,
1004
 
                Int& DB_file_pos_column,
1005
 
                Int& spec_file_pos_column,
1006
 
                Size& number_of_columns)
1007
 
        {
1008
 
                spectrum_file_column = scan_column = peptide_column = protein_column = charge_column = MQ_score_column = p_value_column = record_number_column = DB_file_pos_column = spec_file_pos_column = -1;
1009
 
                
1010
 
                vector< String > substrings;
1011
 
                header_line.split('\t', substrings);
1012
 
                
1013
 
                // #SpectrumFile        Scan#   Annotation      Protein Charge  MQScore Length  TotalPRMScore   MedianPRMScore  FractionY       FractionB       Intensity       NTT     p-value F-Score DeltaScore      DeltaScoreOther RecordNumber    DBFilePos       SpecFilePos
1014
 
                for ( vector< String >::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i )
1015
 
                {
1016
 
                        if ( (*s_i) == "#SpectrumFile" ) spectrum_file_column = s_i - substrings.begin();
1017
 
                        else if ( (*s_i) == "Scan#" ) scan_column = s_i - substrings.begin();
1018
 
                        else if ( (*s_i) == "Annotation" ) peptide_column = s_i - substrings.begin();
1019
 
                        else if ( (*s_i) == "Protein" ) protein_column = s_i - substrings.begin();
1020
 
                        else if ( (*s_i) == "Charge" ) charge_column = s_i - substrings.begin();
1021
 
                        else if ( (*s_i) == "MQScore" ) MQ_score_column = s_i - substrings.begin();
1022
 
                        else if ( (*s_i) == "p-value" ) p_value_column = s_i - substrings.begin();
1023
 
                        else if ( (*s_i) == "RecordNumber" ) record_number_column = s_i - substrings.begin();
1024
 
                        else if ( (*s_i) == "DBFilePos" ) DB_file_pos_column = s_i - substrings.begin();
1025
 
                        else if ( (*s_i) == "SpecFilePos" ) spec_file_pos_column = s_i - substrings.begin();
1026
 
                }
1027
 
                
1028
 
                if ( (spectrum_file_column == -1) || (scan_column == -1) || (peptide_column == -1) || (protein_column == -1) || (charge_column == -1) || (MQ_score_column == -1) || (p_value_column == -1) || (record_number_column == -1) || (DB_file_pos_column == -1) || (spec_file_pos_column == -1) )
1029
 
                {
1030
 
                        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "at least one of the columns '#SpectrumFile', 'Scan#', 'Annotation', 'Protein', 'Charge', 'MQScore', 'p-value', 'RecordNumber', 'DBFilePos' or 'SpecFilePos' is missing!", filename);
1031
 
                }
1032
 
                number_of_columns = substrings.size();
1033
 
        }
1034
 
        
1035
 
        const Size InspectOutfile::db_pos_length_ = 8;
1036
 
        const Size InspectOutfile::trie_db_pos_length_ = 4;
1037
 
        const Size InspectOutfile::protein_name_length_ = 80;
1038
 
        const Size InspectOutfile::record_length_ = db_pos_length_ + trie_db_pos_length_ + protein_name_length_;
1039
 
        const char InspectOutfile::trie_delimiter_ = '*';
1040
 
        const String InspectOutfile::score_type_ = "Inspect";
1041
 
        
 
1067
  }
 
1068
 
 
1069
  void
 
1070
  InspectOutfile::readOutHeader(
 
1071
    const String & filename,
 
1072
    const String & header_line,
 
1073
    Int & spectrum_file_column,
 
1074
    Int & scan_column,
 
1075
    Int & peptide_column,
 
1076
    Int & protein_column,
 
1077
    Int & charge_column,
 
1078
    Int & MQ_score_column,
 
1079
    Int & p_value_column,
 
1080
    Int & record_number_column,
 
1081
    Int & DB_file_pos_column,
 
1082
    Int & spec_file_pos_column,
 
1083
    Size & number_of_columns)
 
1084
  {
 
1085
    spectrum_file_column = scan_column = peptide_column = protein_column = charge_column = MQ_score_column = p_value_column = record_number_column = DB_file_pos_column = spec_file_pos_column = -1;
 
1086
 
 
1087
    vector<String> substrings;
 
1088
    header_line.split('\t', substrings);
 
1089
 
 
1090
    // #SpectrumFile Scan# Annotation Protein Charge MQScore Length TotalPRMScore MedianPRMScore FractionY FractionB Intensity NTT p-value F-Score DeltaScore DeltaScoreOther RecordNumber DBFilePos SpecFilePos
 
1091
    for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i)
 
1092
    {
 
1093
      if ((*s_i) == "#SpectrumFile")
 
1094
        spectrum_file_column = s_i - substrings.begin();
 
1095
      else if ((*s_i) == "Scan#")
 
1096
        scan_column = s_i - substrings.begin();
 
1097
      else if ((*s_i) == "Annotation")
 
1098
        peptide_column = s_i - substrings.begin();
 
1099
      else if ((*s_i) == "Protein")
 
1100
        protein_column = s_i - substrings.begin();
 
1101
      else if ((*s_i) == "Charge")
 
1102
        charge_column = s_i - substrings.begin();
 
1103
      else if ((*s_i) == "MQScore")
 
1104
        MQ_score_column = s_i - substrings.begin();
 
1105
      else if ((*s_i) == "p-value")
 
1106
        p_value_column = s_i - substrings.begin();
 
1107
      else if ((*s_i) == "RecordNumber")
 
1108
        record_number_column = s_i - substrings.begin();
 
1109
      else if ((*s_i) == "DBFilePos")
 
1110
        DB_file_pos_column = s_i - substrings.begin();
 
1111
      else if ((*s_i) == "SpecFilePos")
 
1112
        spec_file_pos_column = s_i - substrings.begin();
 
1113
    }
 
1114
 
 
1115
    if ((spectrum_file_column == -1) || (scan_column == -1) || (peptide_column == -1) || (protein_column == -1) || (charge_column == -1) || (MQ_score_column == -1) || (p_value_column == -1) || (record_number_column == -1) || (DB_file_pos_column == -1) || (spec_file_pos_column == -1))
 
1116
    {
 
1117
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "at least one of the columns '#SpectrumFile', 'Scan#', 'Annotation', 'Protein', 'Charge', 'MQScore', 'p-value', 'RecordNumber', 'DBFilePos' or 'SpecFilePos' is missing!", filename);
 
1118
    }
 
1119
    number_of_columns = substrings.size();
 
1120
  }
 
1121
 
 
1122
  const Size InspectOutfile::db_pos_length_ = 8;
 
1123
  const Size InspectOutfile::trie_db_pos_length_ = 4;
 
1124
  const Size InspectOutfile::protein_name_length_ = 80;
 
1125
  const Size InspectOutfile::record_length_ = db_pos_length_ + trie_db_pos_length_ + protein_name_length_;
 
1126
  const char InspectOutfile::trie_delimiter_ = '*';
 
1127
  const String InspectOutfile::score_type_ = "Inspect";
 
1128
 
1042
1129
} //namespace OpenMS