~ubuntu-branches/ubuntu/wily/openms/wily

« back to all changes in this revision

Viewing changes to source/APPLICATIONS/TOPP/SequestAdapter.C

  • Committer: Package Import Robot
  • Author(s): Filippo Rusconi
  • Date: 2012-11-12 15:58:12 UTC
  • Revision ID: package-import@ubuntu.com-20121112155812-vr15wtg9b50cuesg
Tags: upstream-1.9.0
ImportĀ upstreamĀ versionĀ 1.9.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
// -*- mode: C++; tab-width: 2; -*-
 
2
// vi: set ts=2:
 
3
//
 
4
// --------------------------------------------------------------------------
 
5
//                   OpenMS Mass Spectrometry Framework
 
6
// --------------------------------------------------------------------------
 
7
//  Copyright (C) 2003-2011 -- Oliver Kohlbacher, Knut Reinert
 
8
//
 
9
//  This library is free software; you can redistribute it and/or
 
10
//  modify it under the terms of the GNU Lesser General Public
 
11
//  License as published by the Free Software Foundation; either
 
12
//  version 2.1 of the License, or (at your option) any later version.
 
13
//
 
14
//  This library is distributed in the hope that it will be useful,
 
15
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
16
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
17
//  Lesser General Public License for more details.
 
18
//
 
19
//  You should have received a copy of the GNU Lesser General Public
 
20
//  License along with this library; if not, write to the Free Software
 
21
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
22
//
 
23
// --------------------------------------------------------------------------
 
24
// $Maintainer: Andreas Bertsch $
 
25
// $Authors: Martin Langwisch $
 
26
// --------------------------------------------------------------------------
 
27
 
 
28
 
 
29
#include <OpenMS/FORMAT/IdXMLFile.h>
 
30
#include <OpenMS/APPLICATIONS/TOPPBase.h>
 
31
#include <OpenMS/CONCEPT/Exception.h>
 
32
#include <OpenMS/DATASTRUCTURES/String.h>
 
33
#include <OpenMS/FORMAT/DTAFile.h>
 
34
#include <OpenMS/FORMAT/MzXMLFile.h>
 
35
#include <OpenMS/FORMAT/SequestInfile.h>
 
36
#include <OpenMS/FORMAT/SequestOutfile.h>
 
37
#include <OpenMS/KERNEL/MSExperiment.h>
 
38
#include <OpenMS/METADATA/ContactPerson.h>
 
39
#include <OpenMS/SYSTEM/File.h>
 
40
#include <OpenMS/FORMAT/FileHandler.h>
 
41
#include <OpenMS/FORMAT/FileTypes.h>
 
42
#include <OpenMS/FORMAT/PTMXMLFile.h>
 
43
 
 
44
 
 
45
#include <cstdlib>
 
46
#include <vector>
 
47
#include <algorithm>
 
48
 
 
49
using namespace OpenMS;
 
50
using namespace std;
 
51
 
 
52
//-------------------------------------------------------------
 
53
//Doxygen docu
 
54
//-------------------------------------------------------------
 
55
 
 
56
 
 
57
/**
 
58
        @page TOPP_SequestAdapter SequestAdapter
 
59
 
 
60
        @brief Identifies peptides in MS/MS spectra via Sequest.
 
61
 
 
62
        @experimental This tool has not been tested thoroughly and might behave not as expected!
 
63
 
 
64
        This wrapper application serves for getting peptide peptide_identifications
 
65
        for MS/MS spectra. The wrapper can be executed in three different
 
66
        modes:
 
67
        <ol>
 
68
                                <li>
 
69
                                The whole process of ProteinIdentification via Sequest is executed.
 
70
                                Inputfile is one (or more) mz file containing the MS/MS spectra
 
71
                                (Supported spectrum file formats are .mzXML, .mzData)
 
72
                                for which the identifications are to be found and one database in
 
73
                                FASTA format containing the possible proteins.
 
74
                                The results are written as an IdXML output file. This mode is selected
 
75
                                by default.
 
76
                                Note: You need a user with network access on the computer hosting sequest.
 
77
                                </li>
 
78
 
 
79
                                <li>
 
80
                                Only the first part of the ProteinIdentification process is performed.
 
81
                                This means that a Sequest input file is generated and dta files are
 
82
                                created from the mz file.
 
83
                                Calling a Sequest process should look like the following:
 
84
 
 
85
                                @code sequest -P\<inputfilename\> \<path to dta files\>*.dta  @endcode
 
86
 
 
87
                                Consult your Sequest reference manual for further details.
 
88
 
 
89
                                This mode is selected by the <b>-sequest_in</b> option in the command line.
 
90
                                </li>
 
91
 
 
92
                                <li>
 
93
                                Only the second part of the ProteinIdentification process is performed.
 
94
                                This means that the output of sequest is translated into IdXML.
 
95
 
 
96
                                This mode is selected by the <b>-sequest_out</b> option in the command line.
 
97
                                </li>
 
98
        </ol>
 
99
 
 
100
        @todo Check for missing precursors (Andreas)
 
101
        
 
102
        <B>The command line parameters of this tool are:</B>
 
103
        @verbinclude TOPP_SequestAdapter.cli
 
104
*/
 
105
 
 
106
// We do not want this class to show up in the docu -> cond
 
107
// @cond
 
108
 
 
109
class TOPPSequestAdapter
 
110
        : public TOPPBase
 
111
{
 
112
        public:
 
113
                TOPPSequestAdapter()
 
114
                        : TOPPBase("SequestAdapter", "Annotates MS/MS spectra using Sequest.")
 
115
                {
 
116
                }
 
117
 
 
118
        protected:
 
119
                static const Int max_peptide_mass_units = 2;
 
120
                static const Size max_dtas_per_run = 1000; // sequest has a problem when there are too many dtas, so they have to be splitted, 1000 seemed to work very good
 
121
                Size dtas;
 
122
 
 
123
                void registerOptionsAndFlags_()
 
124
                {
 
125
                        addText_("The definitions for the parameters are taken from the site:\n"
 
126
                                                                                 "http://www.grosse-coosmann.de/~florian/Parameters.html#file.");
 
127
                        // do not change this to registerInputFile_() as it might also be a directory, which fails the property check of a file on Windows
 
128
                        registerStringOption_("in", "<file>", "", "input file(s) in mzXML or mzData format (comma-separated).\n"
 
129
                                                                                                                                                                                                "Note: In mode 'sequest_out' a directory with Sequest results files\n"
 
130
                                                                                                                                                                                                "(*.out) is read", false);
 
131
                        registerOutputFile_("out", "<file>", "", "output file in IdXML format.\n"
 
132
                                                                   "Note: In mode 'sequest_in' a Sequest input file is written.", false);
 
133
                        registerFlag_("sequest_in", "if this flag is set the SequestAdapter will read in mzXML or mzData\n"
 
134
                                                                                                                                                                                                "and write an Sequest input file\n"
 
135
                                                                                                                                                                                                "and create dta files from the given mzXML or mzData files");
 
136
                        registerFlag_("sequest_out", "if this flag is set the SequestAdapter will read in Sequest result files\n"
 
137
                                                                                                                                                                                                        "and write IdXML");
 
138
                        registerStringOption_("mz_files", "<files>", "", "when using sequest_out the mzXML or mzData files (comma-separated)\n"
 
139
                                                                                                                                                                                                                                                                                                                "have to be given to retrieve the retention times", false);
 
140
                        registerFlag_("show_enzymes", "show a list with enzymes and corresponding numbers to choose from");
 
141
 
 
142
      // TODO: if this get rewritten at some point you can use 'registerInputFile_()' to have the user
 
143
      // specify the location of 'sequest.exe' (or similar). In this case also use "skipexists as a tag argument
 
144
      // e.g.   registerInputFile_("xtandem_executable", "<file>", "", "X!Tandem executable of the installtation e.g. 'tandem.exe'", true, false, StringList::create("skipexists"));
 
145
      // to avoid TOPPBase throwing an error when sequest.exe is not found in the current directory, but can be found in $PATH
 
146
 
 
147
                        registerStringOption_("sequest_computer", "<name>", "", "the name of the computer in the network that hosts Sequest\n"
 
148
                                                                                                                                                                                                                                                        "(rdesktop is used to connect to this computer)", false);
 
149
                        registerStringOption_("sequest_directory_win", "<dir>", "", "the windows directory in which Sequest (sequest.exe) is located", false);
 
150
                        registerStringOption_("user", "<name>", "", "user name for the sequest computer (has to have access to network!)", false);
 
151
                        registerStringOption_("password", "<pw>", "", "password for this user (if not given, you have to enter it at prompt)", false);
 
152
                        registerStringOption_("temp_data_directory", "<dir>", "", "a directory in which some temporary files can be stored", false);
 
153
                        registerStringOption_("temp_data_directory_win", "<dir>", "", "windows path of the temporary data directory,\n"
 
154
                                                                                                                                                                                                                                                                                                                                                                                                                        "e.g. X:\\temp_data_dir", false);
 
155
                        registerStringOption_("db", "<file>", "", "name of FASTA-database to search in", false);
 
156
                        registerInputFile_("sequest_input", "<file>", "", "name for the input file of Sequest (may only be used in a full run)", false);
 
157
                        addEmptyLine_();
 
158
                        addText_("For each directory, one corresponding network drive has to be given");
 
159
                        registerStringOption_("temp_data_directory_network", "<path>", "", "network path of the temporary data directory,\n"
 
160
                                                                                                                                                                                                                                                                                                                                                                                                                        "e.g. \\\\computername\\username\\temp_data_dir", false);
 
161
                        registerStringOption_("db_directory_network", "<path>", "", "network path of the database directory", false);
 
162
                        registerStringOption_("sequest_input_directory_network", "<path>", "", "network path of the sequest input file directory", false);
 
163
                        addEmptyLine_();
 
164
                        registerDoubleOption_("precursor_mass_tolerance", "<tol>", 2.0 , "the precursor mass tolerance", false);
 
165
                        registerDoubleOption_("peak_mass_tolerance", "<tol>", 1.0, "the peak mass tolerance", false);
 
166
                        registerDoubleOption_("p_value", "<prob>", 1.0, "annotations with inferior p-value are ignored", false);
 
167
                  registerStringOption_("charges", "[1>3,5]", "", "comma-seperated list of charge states (or ranges)", false);
 
168
                        registerIntOption_("num_results", "<num>", 1, "the maximal number of results (peptides) to show (per scan/dta)", false);
 
169
                        registerStringOption_("cleavage", "<enz>", "Trypsin", "the number of the enzyme used for digestion", false);
 
170
                        registerStringOption_("enzyme_info", "<>", "", "information about the enzyme used\n"
 
171
                                                                                                                                                                                                                                                                                                                        "<name>,<cut direction: N to C?>,<cuts after>,<doesn't cut before>\n"
 
172
                                                                                                                                                                                                                                                                                                                        "cuts after, doesn't cut before: amino acids in 1-letter code\n"
 
173
                                                                                                                                                                                                                                                                                                                        "or '-' for unspecific cleavage", false);
 
174
                        registerFlag_("list_modifications", "show a list of the available modifications");
 
175
                        registerStringOption_("modifications", "<mods>", "", "the colon-seperated modifications; may be\n"
 
176
                                                                                                                                                                                                                                                                                                                                                                                "<name>,<type>, e.g.: Deamidation,opt or\n"
 
177
                                                                                                                                                                                                                                                                                                                                                                                "<composition>,<residues>,<type>,<name>, e.g.: H2C2O,KCS,opt,Acetyl or\n"
 
178
                                                                                                                                                                                                                                                                                                                                                                                "<mass>,<residues>,<type>,<name>, e.g.: 42.0367,KCS,opt,Acetyl or\n"
 
179
                                                                                                                                                                                                                                                                                                                                                                                "Valid values for type are \"fix\" and \"opt\" (default)\n"
 
180
                                                                                                                                                                                                                                                                                                                                                                                "If you want terminal PTMs, write \"cterm\", \"nterm\", \"cterm_prot\" or \"nterm_prot\" instead of residues", false);
 
181
                        registerFlag_("use_monoisotopic_mod_mass", "use monoisotopic masses for the modifications");
 
182
                        registerStringOption_("modifications_xml_file", "<file>", "", "name of an XML file with the modifications", false);
 
183
                        registerIntOption_("max_num_dif_AA_per_mod", "<num>", 0, "limits the maximum total number of\n"
 
184
                                                                                                                                                                                                                                                                                                                                                                                                                         "variable modifications per amino acid", false);
 
185
                        registerIntOption_("max_num_dif_mods_per_peptide", "<num>", 0, "limits the maximum total number of\n"
 
186
                                                                                                                                                                                                                                                                                                                                                                                                                                                         "each single variable modification in one peptide", false);
 
187
                        registerDoubleOption_("match_peak_tol", "", 0, "the minimal space between two peaks", false);
 
188
                        registerStringOption_("neutral_loss_ABY", "[ABY]", "011", "ABY: 0 or 1 whether neutral losses of the series should be honored,\n"
 
189
                                                                                                                                                                                                                                                                                                                                                                 "e.g.: 011", false);
 
190
                        registerStringOption_("ion_series_weights", "[abcdvwxyz]", "0,1.0,0,0,0,0,0,1.0,0", "[0.0, 1.0] factor for the series,\n"
 
191
                                                                                                                                                                                                                                                                                                                                                                                                                        "e.g.: 0,0.5,0,0,0,0,0,1.0,0", false);
 
192
                        registerDoubleOption_("ion_cutoff", "<num>", 0.0, "This value selects a cut-off below which a matching peptide is rejected.\n"
 
193
                                                                                                                                                                                                                         "The value has to be in [0,1] and is compared with the ratio\n"
 
194
                                                                                                                                                                                                                         "(# matching theoretical fragment peaks)/(# total theoretical fragment peaks)\n"
 
195
                                                                                                                                                                                                                         "which means that one select a minimum coverage of matching peaks.", false);
 
196
                        registerIntOption_("pep_mass_unit", "<num>", 0, "peptide mass unit: 0=amu (atomic mass unit), 1=mmu (millimass unit),\n"
 
197
                                                                                                                                                                                                                                                                                                                                         "2=ppm (parts per million)", false);
 
198
                        registerDoubleOption_("prot_mass", "<num>", 0, "protein mass or minimum protein mass (see below)", false);
 
199
                        registerDoubleOption_("max_prot_mass_or_tol", "<num>", 0, "maximum protein mass or tolerance", false);
 
200
                        registerIntOption_("max_num_int_cleav_sites", "<num>", 0, "This value is the number of cleavage positions\n"
 
201
                                                                                                                                                                                                                                                                                                                                                                                                         "that may have been ignored by the enzyme.", false);
 
202
                        registerIntOption_("match_peak_count", "<num>", 0, "The highest abundant experimental peaks are checked\n"
 
203
                                                                                                                                                                                                                                                                                                                                                                        "whether they are matched by the theoretical ones.\n"
 
204
                                                                                                                                                                                                                                                                                                                                                                        "match_peak_count is the number of the top abundant peaks to check.\n"
 
205
                                                                                                                                                                                                                                "A maximum of match_peak_allowed_error may lack this test.\n", false);
 
206
                        registerIntOption_("match_peak_allowed_error", "<num>", 0, "see match_peak_count", false);
 
207
                        registerFlag_("show_fragment_ions", "If set the fragment peaks of the top scored peptide are listed\n"
 
208
                                                                                                                                                                                                                                                                "at the end of the output");
 
209
                        registerFlag_("remove_precursor_peak", "If set the peaks near (15 amu) the precursor are removed.");
 
210
                        registerFlag_("mass_type_precursor", "Set selects monoisotopic masses, not set selects average masses\n"
 
211
                                                                                                                                                                                                                                                                 "for calculating precursor peaks.");
 
212
                        registerFlag_("mass_type_peak", "Set selects monoisotopic masses, not set selects average masses\n"
 
213
                                                                                                                                                                                                                                 "for calculating peaks.");
 
214
                        registerFlag_("normalize_xcorr", "Whether to use normalized xcorr values in the out files.");
 
215
                        registerFlag_("residues_in_lower_case", "Whether the residues in the FASTA database are in lower case.");
 
216
                        registerStringOption_("partial_sequence", "<sequences>", "", "A comma delimited list of amino acid sequences that must occur\n"
 
217
                                                                                                                                                                                                                                                                                                                                                                                                                 "in the theoretical spectra.", false);
 
218
                        registerStringOption_("header_filter", "<sequences>", "", "Several elements can be splitted by commas.\n"
 
219
                                                                                                                                                                                                                                                                                                                                                                                         "Each element can be introduced by an exclamation mark (!)\n"
 
220
                                                                                                                                                                                                                                                                                                                                                                                         "meaning that this element must not appear in the header of\n"
 
221
                                                                                                                                                                                                                                                                                                                                                                                        "a protein or the protein will be skipped. This test is done first.\n"
 
222
                                                                                                                                                                                                                                                                                                                                                                                        "Next, all other elements are tested. The protein is processed\n"
 
223
                                                                                                                                                                                                                                                                                                                                                                                        "if one filter string matches the header string.\n"
 
224
                                                                                                                                                                                                                                                                                                                                                                                        "A tilde (~) in the filter string is replaced by a blank during comparison.", false);
 
225
                        registerFlag_("keep_out_files", "If set the Sequest .out-files are not removed");
 
226
                        registerFlag_("keep_dta_files", "If set the dta-files that were created from the mzXML or mzData files are not removed");
 
227
                        registerIntOption_("nuc_reading_frame", "<num>", 0, "Format of the FASTA database:\n"
 
228
                                                                                                                                                                                                                                         "0  The FASTA file contains amino acid codes. No translation is needed.\n"
 
229
                                                                                                                                                                                                                                         "1  The DNA sequence is scanned left to right (forward direction).\n"
 
230
                                                                                                                                                                                                                                         "The amino acid code starts with the first DNA code.\n"
 
231
                                                                                                                                                                                                                                         "2  The DNA sequence is scanned left to right (forward direction).\n"
 
232
                                                                                                                                                                                                                                         "The amino acid code starts with the second DNA code.\n"
 
233
                                                                                                                                                                                                                                         "3  The DNA sequence is scanned left to right (forward direction).\n"
 
234
                                                                                                                                                                                                                                         "The amino acid code starts with the third DNA code.\n"
 
235
                                                                                                                                                                                                                                         "4  The DNA sequence is scanned right to left (backward direction\n"
 
236
                                                                                                                                                                                                                                         "for the complementary strand).\n"
 
237
                                                                                                                                                                                                                                         "The amino acid code starts with the first DNA code.\n"
 
238
                                                                                                                                                                                                                                         "5  The DNA sequence is scanned right to left (backward direction\n"
 
239
                                                                                                                                                                                                                                         "for the complementary strand).\n"
 
240
                                                                                                                                                                                                                                         "The amino acid code starts with the second DNA code.\n"
 
241
                                                                                                                                                                                                                                         "6  The DNA sequence is scanned right to left (backward direction\n"
 
242
                                                                                                                                                                                                                                         "for the complementary strand).\n"
 
243
                                                                                                                                                                                                                                         "The amino acid code starts with the third DNA code.\n"
 
244
                                                                                                                                                                                                                                         "7  Use each of the DNA translations of the codes 1, 2, 3.\n"
 
245
                                                                                                                                                                                                                                         "8  Use each of the DNA translations of the codes 4, 5, 6.\n"
 
246
                                                                                                                                                                                                                                         "9  Use each of the DNA translations of the codes 1, 2, 3, 4, 5, 6.\n", false);
 
247
                        registerStringOption_("contact_name", "<name>", "unknown", "Name of the contact", false);
 
248
                        registerStringOption_("contact_institution", "<name>", "unknown", "Name of the contact institution", false);
 
249
                        registerStringOption_("contact_info", "<info>", "unknown", "Some information about the contact", false);
 
250
                }
 
251
 
 
252
                bool isWinFormat(const string& name)
 
253
                {
 
254
                        // check for the directory and the backslash afterwards
 
255
                        if ( name.length() > 1 )
 
256
                        {
 
257
                                if ( name[1] == ':' )
 
258
                                {
 
259
                                        if ( name.length() > 3 )
 
260
                                        {
 
261
                                                if ( name[2] == '\\' )
 
262
                                                {
 
263
                                                        // make sure there's no space within the name, as in windows 'cmd /C "command"' is used, so there's no possibility to use any more ""
 
264
                                                        if ( name.find(" ") == String::npos ) return true;
 
265
                                                        else return false;
 
266
                                                }
 
267
                                                else return false;
 
268
                                        }
 
269
                                        else return true;
 
270
                                }
 
271
                        }
 
272
                        return false;
 
273
                }
 
274
 
 
275
                bool correctNetworkPath(String& network_path, Size backslashes = 2)
 
276
                {
 
277
                        String::size_type pos(0);
 
278
                        while ( (pos < network_path.length()) && (network_path[pos] == '\\') ) ++pos;
 
279
                        if ( pos < backslashes ) network_path.insert(network_path.begin(), backslashes-pos, '\\');
 
280
                        else network_path.erase(0, pos-backslashes);
 
281
                        if ( network_path.length() < backslashes+1 ) return false;
 
282
                        if ( network_path[network_path.length() - 1] != '\\' ) network_path.append("\\"); // if it doesn't end with a slash, append one
 
283
                        return true;
 
284
                }
 
285
 
 
286
  Size
 
287
                MSExperiment2DTAs(
 
288
                        MSExperiment<Peak1D>& msexperiment,
 
289
                        const String& common_name,
 
290
                        const vector< Int >& charges,
 
291
                        map< String, DoubleReal >& outfile_names_and_precursor_retention_times,
 
292
                        vector< String >& dta_filenames,
 
293
                        bool make_dtas = true)
 
294
                {
 
295
                        DTAFile dtafile;
 
296
                        String filename;
 
297
                        Size scan_number(0);
 
298
                        Size msms_spectra(0);
 
299
                        Size dtas(0);
 
300
 
 
301
                        for ( MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it )
 
302
                        {
 
303
                                ++scan_number;
 
304
                                if ( (spectra_it->getMSLevel() == 2) && (!spectra_it->empty()) )
 
305
                                {
 
306
                                        ++msms_spectra;
 
307
                                        if ( spectra_it->getPrecursors()[0].getCharge() )
 
308
                                        {
 
309
                                                filename = common_name + "." + String(scan_number) + "." + String(spectra_it->getPrecursors()[0].getCharge()) + ".dta_" + String( (dtas / max_dtas_per_run) );
 
310
                                                ++dtas;
 
311
                                                if ( make_dtas ) dtafile.store(filename, *spectra_it);
 
312
                                                dta_filenames.push_back(filename);
 
313
                                                filename = File::basename(filename);
 
314
                                                filename.replace(filename.length() - 4, 4, ".out");
 
315
                                                outfile_names_and_precursor_retention_times[filename] = spectra_it->getRT();
 
316
                                        }
 
317
                                        else
 
318
                                        {
 
319
                                                for ( vector< Int >::const_iterator charges_it = charges.begin(); charges_it != charges.end(); ++charges_it )
 
320
                                                {
 
321
                                                        filename = common_name + "." + String(scan_number) + "." + *charges_it + ".dta_" + String( (dtas / max_dtas_per_run) );
 
322
                                                        ++dtas;
 
323
                                                        if ( make_dtas )
 
324
                                                        {
 
325
                                                                spectra_it->getPrecursors()[0].setCharge(*charges_it);
 
326
                                                                dtafile.store(filename, *spectra_it);
 
327
                                                        }
 
328
                                                        dta_filenames.push_back(filename);
 
329
                                                        filename = File::basename(filename);
 
330
                                                        filename.replace(filename.length() - 4, 4, ".out");
 
331
                                                        outfile_names_and_precursor_retention_times[filename] = spectra_it->getRT();
 
332
                                                }
 
333
                                                spectra_it->getPrecursors()[0].setCharge(0);
 
334
                                        }
 
335
                                }
 
336
                        }
 
337
 
 
338
                        return msms_spectra;
 
339
                }
 
340
 
 
341
                ExitCodes main_(int , const char**)
 
342
                {
 
343
                        //-------------------------------------------------------------
 
344
                        // (1) variables
 
345
                        //-------------------------------------------------------------
 
346
 
 
347
                        // (1.0) variables for running the program
 
348
                        SequestInfile sequest_infile;
 
349
                        SequestOutfile sequest_outfile;
 
350
 
 
351
                        String
 
352
                                logfile,
 
353
                                output_filename,
 
354
                                input_filename,
 
355
                                input_file_directory_network,
 
356
                                user,
 
357
                                password,
 
358
                                sequest_computer,
 
359
                                temp_data_directory,
 
360
                                temp_data_directory_win,
 
361
                                temp_data_directory_network,
 
362
                                sequest_directory_win,
 
363
                                database,
 
364
                                database_directory_network,
 
365
                                out_directory,
 
366
                                batch_filename,
 
367
                                string_buffer,
 
368
                                string_buffer2,
 
369
                                modifications_filename,
 
370
                                dta_files_common_name,
 
371
                                basename;
 
372
 
 
373
                        bool
 
374
                                sequest_in(false),
 
375
                                sequest_out(false),
 
376
                                keep_out_files(false),
 
377
                                keep_dta_files(false),
 
378
                                monoisotopic(false),
 
379
                                make_dtas(false);
 
380
 
 
381
                        vector< String >
 
382
                                substrings,
 
383
                                substrings2,
 
384
                                spectra;
 
385
 
 
386
 
 
387
                        Size
 
388
                                msms_spectra_in_file(0),
 
389
                                msms_spectra_altogether(0);
 
390
 
 
391
                        vector< Int > charges;
 
392
 
 
393
                        DoubleReal
 
394
                                Real_buffer(0.0),
 
395
                                Real_buffer2(0.0),
 
396
                                p_value(1.0);
 
397
 
 
398
                        Int int_buffer(0);
 
399
 
 
400
                        ContactPerson contact_person;
 
401
                        ExitCodes exit_code = EXECUTION_OK;
 
402
                        FileHandler fh;
 
403
                        FileTypes::Type type;
 
404
                        MSExperiment<Peak1D> msexperiment;
 
405
                        vector<PeptideIdentification> peptide_identifications;
 
406
                        vector<ProteinIdentification> pis;
 
407
                        ProteinIdentification protein_identification;
 
408
                        StringList out_files;
 
409
 
 
410
                        // the outfile-names and their retention_times
 
411
                        map< String, DoubleReal > outfile_names_and_precursor_retention_times;
 
412
 
 
413
                        // the names of the dta_files - used to erase them afterwards
 
414
                        vector< String > dta_filenames;
 
415
 
 
416
                        // filename and tag: file has to: 1 - exist  2 - be readable  4 - writable  8 - be deleted afterwards
 
417
                        map< String, Size > files;
 
418
                        Size const
 
419
                                exist(1),
 
420
                                readable(2),
 
421
                                writable(4),
 
422
                                delete_afterwards(8);
 
423
 
 
424
                        //-------------------------------------------------------------
 
425
                        // (2) parsing and checking parameters
 
426
                        //-------------------------------------------------------------
 
427
                        modifications_filename = getStringOption_("modifications_xml_file");
 
428
 
 
429
                        if ( getFlag_("list_modifications") )
 
430
                        {
 
431
                                if ( modifications_filename.empty() )
 
432
                                {
 
433
                                        writeLog_("No modifications XML file given. Aborting!");
 
434
                                        return INPUT_FILE_NOT_FOUND;
 
435
                                }
 
436
                                if ( !File::readable(modifications_filename) )
 
437
                                {
 
438
                                        writeLog_("Modifications XML file is not readable. Aborting!");
 
439
                                        return INPUT_FILE_NOT_READABLE;
 
440
                                }
 
441
                                map< String, pair< String, String > > PTM_informations;
 
442
                                try
 
443
                                {
 
444
                                        PTMXMLFile().load(modifications_filename, PTM_informations);
 
445
                                }
 
446
                                catch ( Exception::ParseError pe )
 
447
                                {
 
448
                                        writeLog_(pe.getMessage());
 
449
                                        return PARSE_ERROR;
 
450
                                }
 
451
 
 
452
                                // output the information
 
453
                                stringstream PTM_info;
 
454
                                String::size_type max_name_length(4), max_composition_length(11), max_amino_acids_length(11);
 
455
                                for ( map< String, pair< String, String > >::const_iterator mod_it = PTM_informations.begin(); mod_it != PTM_informations.end(); ++mod_it )
 
456
                                {
 
457
                                        max_name_length = max(max_name_length, mod_it->first.length());
 
458
                                        max_composition_length = max(max_composition_length, mod_it->second.first.length());
 
459
                                        max_amino_acids_length = max(max_amino_acids_length, mod_it->second.second.length());
 
460
                                }
 
461
                                PTM_info << "name" << String(max_name_length - 4, ' ') << "\t" << "composition" << String(max_composition_length - 11, ' ') << "\t" << "amino_acids" << String(max_amino_acids_length - 11, ' ') << endl;
 
462
                                for ( map< String, pair< String, String > >::const_iterator mod_it = PTM_informations.begin(); mod_it != PTM_informations.end(); ++mod_it )
 
463
                                {
 
464
                                        PTM_info << mod_it->first << String(max_name_length - mod_it->first.length(), ' ') << "\t" << mod_it->second.first << String(max_composition_length - mod_it->second.first.length(), ' ') << "\t" << mod_it->second.second << String(max_amino_acids_length - mod_it->second.second.length(), ' ') << endl;
 
465
                                }
 
466
                                std::cout << PTM_info.str() << std::endl;
 
467
 
 
468
                                return EXECUTION_OK;
 
469
                        }
 
470
 
 
471
                        // only show the available enzymes, then quit
 
472
                        if ( getFlag_("show_enzymes") )
 
473
                        {
 
474
                                writeLog_("Option show_enzymes chosen.");
 
475
                                writeLog_(sequest_infile.getEnzymeInfoAsString());
 
476
                                return EXECUTION_OK;
 
477
                        }
 
478
                        // (2.0) variables for running the program
 
479
                        sequest_in = getFlag_("sequest_in");
 
480
                        sequest_out = getFlag_("sequest_out");
 
481
 
 
482
                        // a 'normal' sequest run corresponds to both sequest_in and sequest_out set
 
483
                        if ( !sequest_in && !sequest_out ) sequest_in = sequest_out = true;
 
484
 
 
485
                        logfile = getStringOption_("log");
 
486
                        if ( logfile.empty() )
 
487
                        {
 
488
                                logfile = "temp.sequest.log";
 
489
                                files[logfile] = (writable | delete_afterwards);
 
490
                        }
 
491
                        else files[logfile] = writable;
 
492
 
 
493
                        string_buffer = getStringOption_("charges");
 
494
                        if ( string_buffer.empty() )
 
495
                        {
 
496
                                writeLog_("No charge states given. Aborting!");
 
497
                                return ILLEGAL_PARAMETERS;
 
498
                        }
 
499
                        else
 
500
                        {
 
501
                                Int range_start(-1), range_end(-1);
 
502
                                string_buffer.split(',', substrings);
 
503
 
 
504
                                for ( vector< String >::iterator substrings_it = substrings.begin(); substrings_it != substrings.end(); )
 
505
                                {
 
506
                                        if ( substrings_it->empty() ) substrings.erase(substrings_it);
 
507
                                        else
 
508
                                        {
 
509
                                                substrings_it->split('>', substrings2);
 
510
                                                if ( substrings2.size() < 2 ) // only one number, no range
 
511
                                                {
 
512
                                                        if ( (*substrings_it)[substrings_it->length()-1] == '-' ) charges.push_back(-1 * substrings_it->toInt());
 
513
                                                        else charges.push_back(substrings_it->toInt());
 
514
                                                }
 
515
                                                else // range of charge states
 
516
                                                {
 
517
                                                        if ( substrings2.size() > 2 )
 
518
                                                        {
 
519
                                                                writeLog_("Illegal range of charge states given: " + *substrings_it + ". Aborting!");
 
520
                                                                return ILLEGAL_PARAMETERS;
 
521
                                                        }
 
522
 
 
523
                                                        if ( substrings2[0][substrings2[0].length()-1] == '-' ) range_start = -1 * substrings2[0].toInt();
 
524
                                                        else range_start = substrings[0].toInt();
 
525
 
 
526
                                                        if ( substrings2[1][substrings2[1].length()-1] == '-' ) range_end = -1 * substrings2[1].toInt();
 
527
                                                        else range_end = substrings2[1].toInt();
 
528
 
 
529
                                                        for ( Int i = min(range_start, range_end); i <= max(range_start, range_end); ++i )
 
530
                                                        {
 
531
                                                                if ( i ) charges.push_back(i);
 
532
                                                        }
 
533
                                                }
 
534
 
 
535
                                                ++substrings_it;
 
536
                                        }
 
537
                                }
 
538
 
 
539
                                if ( charges.empty() )
 
540
                                {
 
541
                                        writeLog_("No charges states given. Aborting!");
 
542
                                        return ILLEGAL_PARAMETERS;
 
543
                                }
 
544
                                sort(charges.begin(), charges.end());
 
545
                                for ( vector< Int >::iterator charges_it = charges.begin(); charges_it != --charges.end(); )
 
546
                                {
 
547
                                        if ( (*charges_it) == (*(charges_it+1)) ) charges.erase(charges_it+1);
 
548
                                        else ++charges_it;
 
549
                                }
 
550
                        }
 
551
 
 
552
                        temp_data_directory = getStringOption_("temp_data_directory");
 
553
                        if ( temp_data_directory.empty() )
 
554
                        {
 
555
                                writeLog_("No directory for temporary files given. Aborting!");
 
556
                                return ILLEGAL_PARAMETERS;
 
557
                        }
 
558
                        temp_data_directory = File::absolutePath(temp_data_directory);
 
559
                        temp_data_directory.ensureLastChar('/');
 
560
 
 
561
                        string_buffer = getStringOption_("in");
 
562
                        if ( string_buffer.empty() )
 
563
                        {
 
564
                                writeLog_("No input file specified. Aborting!");
 
565
                                return ILLEGAL_PARAMETERS;
 
566
                        }
 
567
                        else
 
568
                        {
 
569
                                if ( sequest_in ) // if sequest_in is set, in are the spectra
 
570
                                {
 
571
                                        string_buffer.split(',', spectra);
 
572
                                        out_directory = temp_data_directory;
 
573
                                }
 
574
                                else // if only sequest_out is set, in is the out_directory
 
575
                                {
 
576
                                        out_directory = string_buffer;
 
577
                                        out_directory = File::absolutePath(out_directory);
 
578
                                        out_directory.ensureLastChar('/');
 
579
 
 
580
                                        // if only sequest_out is set, the mz files have to be given to retrieve the retention times
 
581
                                        string_buffer = getStringOption_("mz_files");
 
582
                                        if ( string_buffer.empty() )
 
583
                                        {
 
584
                                                writeLog_("No mz files specified. Aborting!");
 
585
                                                return ILLEGAL_PARAMETERS;
 
586
                                        }
 
587
                                        else
 
588
                                        {
 
589
                                                string_buffer.split(',', spectra);
 
590
                                        }
 
591
                                }
 
592
                        }
 
593
 
 
594
                        keep_out_files = getFlag_("keep_out_files");
 
595
                        if ( sequest_out && !sequest_in ) keep_out_files = true;
 
596
 
 
597
                        keep_dta_files = getFlag_("keep_dta_files");
 
598
                        if ( sequest_in && !sequest_out ) keep_dta_files = true;
 
599
 
 
600
                        contact_person.setName(getStringOption_("contact_name"));
 
601
                        contact_person.setInstitution(getStringOption_("contact_institution"));
 
602
                        contact_person.setContactInfo(getStringOption_("contact_info"));
 
603
 
 
604
                        if ( sequest_in )
 
605
                        {
 
606
                                temp_data_directory_win = getStringOption_("temp_data_directory_win");
 
607
                                temp_data_directory_win.ensureLastChar('\\');
 
608
 
 
609
                                if ( !isWinFormat(temp_data_directory_win) )
 
610
                                {
 
611
                                        writeLog_("Windows path for the directory for temporary files has wrong format: " + temp_data_directory_win + ". borting!");
 
612
                                        return ILLEGAL_PARAMETERS;
 
613
                                }
 
614
                                temp_data_directory_network = getStringOption_("temp_data_directory_network");
 
615
                                if ( temp_data_directory_network.empty() )
 
616
                                {
 
617
                                        writeLog_("No network path for the directory for temporary files given. Aborting!");
 
618
                                        return ILLEGAL_PARAMETERS;
 
619
                                }
 
620
                                if ( !correctNetworkPath(temp_data_directory_network) )
 
621
                                {
 
622
                                        writeLog_(temp_data_directory_network + "is no network path. Aborting!");
 
623
                                        return ILLEGAL_PARAMETERS;
 
624
                                }
 
625
 
 
626
                                database = getStringOption_("db");
 
627
                                if ( database.empty() )
 
628
                                {
 
629
                                        writeLog_("No database specified. Aborting!");
 
630
                                        return ILLEGAL_PARAMETERS;
 
631
                                }
 
632
                                files[database] = readable;
 
633
 
 
634
                                if ( !sequest_out )
 
635
                                {
 
636
                                        input_filename = getStringOption_("out");
 
637
                                        if ( input_filename.empty() )
 
638
                                        {
 
639
                                                writeLog_("No output file specified. Aborting!");
 
640
                                                return ILLEGAL_PARAMETERS;
 
641
                                        }
 
642
 
 
643
                                        input_file_directory_network = getStringOption_("sequest_input_directory_network");
 
644
                                        if ( input_file_directory_network.empty() )
 
645
                                        {
 
646
                                                writeLog_("No network path for the directory of the Sequest input file given. Aborting!");
 
647
                                                return ILLEGAL_PARAMETERS;
 
648
                                        }
 
649
                                        if ( !correctNetworkPath(input_file_directory_network) )
 
650
                                        {
 
651
                                                writeLog_(input_file_directory_network + "is no network path. Aborting!");
 
652
                                                return ILLEGAL_PARAMETERS;
 
653
                                        }
 
654
                                }
 
655
                                else
 
656
                                {
 
657
                                        input_filename = getStringOption_("sequest_input");
 
658
                                        if ( input_filename.empty() )
 
659
                                        {
 
660
                                                input_filename = temp_data_directory + "temp.sequest.in";
 
661
                                                files[input_filename] = (writable | delete_afterwards);
 
662
                                                input_file_directory_network = temp_data_directory_network;
 
663
                                        }
 
664
                                        else
 
665
                                        {
 
666
                                                input_file_directory_network = getStringOption_("sequest_input_directory_network");
 
667
                                                if ( input_file_directory_network.empty() )
 
668
                                                {
 
669
                                                        writeLog_("No network path for the directory of the Sequest input file given. Aborting!");
 
670
                                                        return ILLEGAL_PARAMETERS;
 
671
                                                }
 
672
                                                files[input_filename] = readable;
 
673
                                        }
 
674
                                        if ( !correctNetworkPath(input_file_directory_network) )
 
675
                                        {
 
676
                                                writeLog_(input_file_directory_network + "is no network path. Aborting!");
 
677
                                                return ILLEGAL_PARAMETERS;
 
678
                                        }
 
679
                                }
 
680
                        }
 
681
 
 
682
                        if ( sequest_in && sequest_out )
 
683
                        {
 
684
                                user = getStringOption_("user");
 
685
 
 
686
                                password = getStringOption_("password");
 
687
 
 
688
                                sequest_directory_win = getStringOption_("sequest_directory_win");
 
689
                                if ( !sequest_directory_win.hasSuffix("sequest.exe") ) sequest_directory_win.ensureLastChar('\\');
 
690
                                if ( !isWinFormat(sequest_directory_win) )
 
691
                                {
 
692
                                        writeLog_("Windows path for the SEQUEST working directory has wrong format: " + sequest_directory_win + ". Aborting!");
 
693
                                        return ILLEGAL_PARAMETERS;
 
694
                                }
 
695
                                else if ( sequest_directory_win.empty() )
 
696
                                {
 
697
                                        writeLog_("No windows path for the SEQUEST working directory given. Assuming PATH variable to be set accordingly!");
 
698
                                        sequest_directory_win = "sequest";
 
699
                                }
 
700
 
 
701
                                sequest_computer = getStringOption_("sequest_computer");
 
702
                                if ( sequest_computer.empty() )
 
703
                                {
 
704
                                        writeLog_("No sequest computer name given. Aborting!");
 
705
                                        return ILLEGAL_PARAMETERS;
 
706
                                }
 
707
                        }
 
708
 
 
709
                        if ( logfile == temp_data_directory + "sequest.log")
 
710
                        {
 
711
                                writeLog_("The logfile must not be named " + temp_data_directory + "sequest.log. Aborting!");
 
712
                                return ILLEGAL_PARAMETERS;
 
713
                        }
 
714
 
 
715
                        // the batchfile will have to be in the temporary data directory, otherwise it's hard to connect to windows and use the other files
 
716
                        batch_filename = "sequest_run.bat";
 
717
                        files[temp_data_directory + batch_filename] = (writable | delete_afterwards);
 
718
 
 
719
                        if ( sequest_in )
 
720
                        {
 
721
                                database_directory_network = getStringOption_("db_directory_network");
 
722
                                if ( !correctNetworkPath(database_directory_network) )
 
723
                                {
 
724
                                        writeLog_(database_directory_network + "is no network path. Aborting!");
 
725
                                        return ILLEGAL_PARAMETERS;
 
726
                                }
 
727
                                string_buffer = File::basename(database);
 
728
                                if ( !database_directory_network.hasSuffix(string_buffer) ) database_directory_network.append(string_buffer);
 
729
                                sequest_infile.setDatabase(database_directory_network);
 
730
 
 
731
                                Real_buffer = getDoubleOption_("precursor_mass_tolerance");
 
732
                                if ( Real_buffer == -1 )
 
733
                                {
 
734
                                        writeLog_("No precursor mass tolerance specified. Aborting!");
 
735
                                        return ILLEGAL_PARAMETERS;
 
736
                                }
 
737
                                else if ( Real_buffer < 0 )
 
738
                                {
 
739
                                        writeLog_("Precursor mass tolerance < 0. Aborting!");
 
740
                                        return ILLEGAL_PARAMETERS;
 
741
                                }
 
742
                                else sequest_infile.setPrecursorMassTolerance(Real_buffer);
 
743
 
 
744
                                Real_buffer = getDoubleOption_("peak_mass_tolerance");
 
745
                                if ( Real_buffer == -1 )
 
746
                                {
 
747
                                        writeLog_("No peak mass tolerance specified. Aborting!");
 
748
                                        return ILLEGAL_PARAMETERS;
 
749
                                }
 
750
                                else if ( Real_buffer < 0 )
 
751
                                {
 
752
                                        writeLog_("peak mass tolerance < 0. Aborting!");
 
753
                                        return ILLEGAL_PARAMETERS;
 
754
                                }
 
755
                                else sequest_infile.setPeakMassTolerance(Real_buffer);
 
756
 
 
757
                                Real_buffer = getDoubleOption_("match_peak_tol");
 
758
                                if ( Real_buffer == -1 )
 
759
                                {
 
760
                                        writeLog_("No match peak tolerance specified. Aborting!");
 
761
                                        return ILLEGAL_PARAMETERS;
 
762
                                }
 
763
                                else if ( Real_buffer < 0 )
 
764
                                {
 
765
                                        writeLog_("Match peak tolerance < 0. Aborting!");
 
766
                                        return ILLEGAL_PARAMETERS;
 
767
                                }
 
768
                                else sequest_infile.setMatchPeakTolerance(Real_buffer);
 
769
 
 
770
                                Real_buffer = getDoubleOption_("ion_cutoff");
 
771
                                if ( Real_buffer < 0 || Real_buffer > 1 )
 
772
                                {
 
773
                                        writeLog_("Ion cutoff not in [0,1]. Aborting!");
 
774
                                        return ILLEGAL_PARAMETERS;
 
775
                                }
 
776
                                else sequest_infile.setIonCutoffPercentage(Real_buffer);
 
777
 
 
778
                                int_buffer = getIntOption_("pep_mass_unit");
 
779
                                if ( (int_buffer < 0) || (int_buffer > max_peptide_mass_units) )
 
780
                                {
 
781
                                        writeLog_("Illegal peptide mass unit (not in [0,2]). Aborting!");
 
782
                                        return ILLEGAL_PARAMETERS;
 
783
                                }
 
784
                                else sequest_infile.setPeptideMassUnit(int_buffer);
 
785
 
 
786
                                int_buffer = getIntOption_("num_results");
 
787
                                if ( (int_buffer < 1) )
 
788
                                {
 
789
                                        writeLog_("Illegal number of results (< 1). Aborting!");
 
790
                                        return ILLEGAL_PARAMETERS;
 
791
                                }
 
792
                                else sequest_infile.setOutputLines(int_buffer);
 
793
 
 
794
                                string_buffer = getStringOption_("enzyme_info");
 
795
                                if ( !string_buffer.empty() )
 
796
                                {
 
797
                                        string_buffer.split(':', substrings);
 
798
                                        vector< String > enzyme_info;
 
799
                                        for ( vector< String >::iterator einfo_it = substrings.begin(); einfo_it != substrings.end(); ++ einfo_it )
 
800
                                        {
 
801
                                                einfo_it->split(',', enzyme_info);
 
802
                                                if ( (enzyme_info.size() < 3) || (enzyme_info.size() > 4) )
 
803
                                                {
 
804
                                                        writeLog_("Illegal number of entries for enzyme (not in [3,4]). Aborting!");
 
805
                                                        return ILLEGAL_PARAMETERS;
 
806
                                                }
 
807
                                                if ( !((enzyme_info[1] == "0") || (enzyme_info[1] == "1"))  )
 
808
                                                {
 
809
                                                        writeLog_("Cut direction for enzyme not specified correctly (has to be 1 (N to C)) or 0 (C to N))). Aborting!");
 
810
                                                        return ILLEGAL_PARAMETERS;
 
811
                                                }
 
812
                                                if ( enzyme_info.size() == 3 ) enzyme_info.push_back("-");
 
813
                                                sequest_infile.addEnzymeInfo(enzyme_info);
 
814
                                        }
 
815
                                }
 
816
                                else
 
817
                                {
 
818
                                        substrings.clear();
 
819
                                        SignedSize highest_enzyme_number = sequest_infile.setEnzyme(getStringOption_("cleavage"));
 
820
                                        if ( highest_enzyme_number )
 
821
                                        {
 
822
                                                writeLog_("Chosen enzym is not in list. Aborting!");
 
823
                                                writeLog_(sequest_infile.getEnzymeInfoAsString());
 
824
                                                return ILLEGAL_PARAMETERS;
 
825
                                        }
 
826
                                }
 
827
 
 
828
                                Real_buffer = getDoubleOption_("prot_mass");
 
829
                                if ( Real_buffer < 0 )
 
830
                                {
 
831
                                        writeLog_("Illegal minimum protein mass (< 0). Aborting!");
 
832
                                        return ILLEGAL_PARAMETERS;
 
833
                                }
 
834
                                else
 
835
                                {
 
836
                                        Real_buffer2 = getDoubleOption_("max_prot_mass_or_tol");
 
837
                                        if ( Real_buffer2 < 0 )
 
838
                                        {
 
839
                                                writeLog_("Illegal maximum protein mass/ tolerance (< 0). Aborting!");
 
840
                                                return ILLEGAL_PARAMETERS;
 
841
                                        }
 
842
                                        else if ( Real_buffer2 < Real_buffer && Real_buffer2 > 100  ) // the second value has either got to be a mass (greater than the first one), or a probability
 
843
                                        {
 
844
                                                writeLog_("Illegal tolerance (not in [0, 100]). Aborting!");
 
845
                                                return ILLEGAL_PARAMETERS;
 
846
                                        }
 
847
                                        else sequest_infile.setProteinMassFilter(String(Real_buffer) + " " +  String(Real_buffer2));
 
848
                                }
 
849
 
 
850
                                int_buffer = getIntOption_("max_num_dif_AA_per_mod");
 
851
                                if ( int_buffer < 0 )
 
852
                                {
 
853
                                        writeLog_("No maximum number of modified amino acids per different modification. Aborting!");
 
854
                                        return ILLEGAL_PARAMETERS;
 
855
                                }
 
856
                                else sequest_infile.setMaxAAPerModPerPeptide(int_buffer);
 
857
 
 
858
                                int_buffer = getIntOption_("max_num_dif_mods_per_peptide");
 
859
                                if ( int_buffer < 0 )
 
860
                                {
 
861
                                        writeLog_("No maximum number of differential modifications per peptide. Aborting!");
 
862
                                        return ILLEGAL_PARAMETERS;
 
863
                                }
 
864
                                else sequest_infile.setMaxModsPerPeptide(int_buffer);
 
865
 
 
866
                                int_buffer = getIntOption_("nuc_reading_frame");
 
867
                                if ( (int_buffer < 0) || (int_buffer > 9) )
 
868
                                {
 
869
                                        writeLog_("Illegal number for nucleotide reading frame. Aborting!");
 
870
                                        return ILLEGAL_PARAMETERS;
 
871
                                }
 
872
                                else sequest_infile.setNucleotideReadingFrame(int_buffer);
 
873
 
 
874
                                int_buffer = getIntOption_("max_num_int_cleav_sites");
 
875
                                if ( int_buffer < 0 )
 
876
                                {
 
877
                                        writeLog_("Illegal number of maximum internal cleavage sites. Aborting!");
 
878
                                        return ILLEGAL_PARAMETERS;
 
879
                                }
 
880
                                else sequest_infile.setMaxInternalCleavageSites(int_buffer);
 
881
 
 
882
                                int_buffer = getIntOption_("match_peak_count");
 
883
                                if ( int_buffer < 0 )
 
884
                                {
 
885
                                        writeLog_("Illegal number of auto-detected peaks to try matching. Aborting!");
 
886
                                        return ILLEGAL_PARAMETERS;
 
887
                                }
 
888
                                else sequest_infile.setMatchPeakCount(int_buffer);
 
889
 
 
890
                                int_buffer = getIntOption_("match_peak_allowed_error");
 
891
                                if ( int_buffer < 0 )
 
892
                                {
 
893
                                        writeLog_("Illegal number of allowed errors in matching auto-detected peaks. Aborting!");
 
894
                                        return ILLEGAL_PARAMETERS;
 
895
                                }
 
896
                                else sequest_infile.setMatchPeakAllowedError(int_buffer);
 
897
 
 
898
                                sequest_infile.setShowFragmentIons(getFlag_("show_fragment_ions"));
 
899
                                sequest_infile.setRemovePrecursorNearPeaks(getFlag_("remove_precursor_peak"));
 
900
                                sequest_infile.setMassTypeParent(getFlag_("mass_type_precursor"));
 
901
                                sequest_infile.setMassTypeFragment(getFlag_("mass_type_peak"));
 
902
                                sequest_infile.setNormalizeXcorr(getFlag_("normalize_xcorr"));
 
903
                                sequest_infile.setResiduesInUpperCase(!getFlag_("residues_in_lower_case"));
 
904
 
 
905
                                string_buffer = getStringOption_("neutral_loss_ABY");
 
906
                                string_buffer2 = "01";
 
907
                                if ( (string_buffer.size() != 3) || (string_buffer2.find(string_buffer[0], 0) == String::npos) || (string_buffer2.find(string_buffer[1], 0) == String::npos) || (string_buffer2.find(string_buffer[2], 0) == String::npos) )
 
908
                                {
 
909
                                        writeLog_("Neutral losses for ABY-ions not given (or illegal values given). Aborting!");
 
910
                                        return ILLEGAL_PARAMETERS;
 
911
                                }
 
912
                                else
 
913
                                {
 
914
                                        string_buffer.insert(2, 1, ' ');
 
915
                                        string_buffer.insert(1, 1, ' ');
 
916
                                        sequest_infile.setNeutralLossesForIons(string_buffer);
 
917
                                }
 
918
 
 
919
                                string_buffer = getStringOption_("ion_series_weights");
 
920
                                string_buffer.split(',', substrings);
 
921
                                if ( substrings.size() != 9 )
 
922
                                {
 
923
                                        writeLog_("Weights for ion series not given (or illegal values given). Aborting!");
 
924
                                        return ILLEGAL_PARAMETERS;
 
925
                                }
 
926
                                else
 
927
                                {
 
928
                                        for ( vector< String >::iterator substrings_it = substrings.begin(); substrings_it != substrings.end(); ++substrings_it )
 
929
                                        {
 
930
                                                // the values are expected to be DoubleReal, otherwise they will be seen as 0!
 
931
                                                Real_buffer = String(substrings_it->c_str()).toDouble();
 
932
                                                if ( (Real_buffer < 0) || (Real_buffer > 1) )
 
933
                                                {
 
934
                                                        writeLog_("Illegal weights for ion series given. Aborting!");
 
935
 
 
936
                                                        return ILLEGAL_PARAMETERS;
 
937
                                                }
 
938
                                                (*substrings_it) = String(Real_buffer);
 
939
                                        }
 
940
                                        string_buffer.concatenate(substrings.begin(), substrings.end(), " ");
 
941
                                        sequest_infile.setIonSeriesWeights(string_buffer);
 
942
                                }
 
943
                                // modifications
 
944
                                string_buffer = getStringOption_("modifications");
 
945
                                monoisotopic = getFlag_("use_monoisotopic_mod_mass");
 
946
                                try
 
947
                                {
 
948
                                        sequest_infile.handlePTMs(string_buffer, modifications_filename, monoisotopic);
 
949
                                }
 
950
                                catch ( Exception::FileNotFound fnf_e )
 
951
                                {
 
952
                                        writeLog_("No modifications XML file given. Aborting!");
 
953
                                        return INPUT_FILE_NOT_FOUND;
 
954
                                }
 
955
                                catch ( Exception::FileNotReadable fnr_e )
 
956
                                {
 
957
                                        writeLog_("Modifications XML file is not readable. Aborting!");
 
958
                                        return INPUT_FILE_NOT_READABLE;
 
959
                                }
 
960
                                catch ( Exception::ParseError p_e )
 
961
                                {
 
962
                                        writeLog_(String(p_e.getMessage()) + ". Aborting!");
 
963
                                        return PARSE_ERROR;
 
964
                                }
 
965
                                string_buffer = getStringOption_("partial_sequence");
 
966
                                string_buffer.substitute(',', ' ');
 
967
                                sequest_infile.setPartialSequence(string_buffer);
 
968
 
 
969
                                string_buffer = getStringOption_("header_filter");
 
970
                                string_buffer.substitute(',', ' ');
 
971
                                sequest_infile.setSequenceHeaderFilter(string_buffer);
 
972
                        }
 
973
 
 
974
                        if ( sequest_out )
 
975
                        {
 
976
                                output_filename = getStringOption_("out");
 
977
                                if ( output_filename.empty() )
 
978
                                {
 
979
                                        writeLog_("No output file specified. Aborting!");
 
980
                                        return ILLEGAL_PARAMETERS;
 
981
                                }
 
982
                                files[output_filename] = writable;
 
983
 
 
984
                                p_value = getDoubleOption_("p_value");
 
985
                                if ( (p_value <= 0) || (p_value > 1) )
 
986
                                {
 
987
                                        writeLog_("P-value not in (0,1]. Aborting!");
 
988
                                        return ILLEGAL_PARAMETERS;
 
989
                                }
 
990
                        }
 
991
 
 
992
                        //-------------------------------------------------------------
 
993
                        // running program according to parameters
 
994
                        //-------------------------------------------------------------
 
995
                        // checking accessability of files
 
996
                        bool existed(false);
 
997
                        Size file_tag(0);
 
998
 
 
999
                        for ( map< String, Size >::const_iterator files_it = files.begin(); files_it != files.end(); ++files_it )
 
1000
                        {
 
1001
                                string_buffer = files_it->first;
 
1002
                                file_tag = files_it->second;
 
1003
 
 
1004
                                if ( (file_tag & exist || file_tag & readable) && !File::exists(string_buffer) )
 
1005
                                {
 
1006
                                        exit_code = INPUT_FILE_NOT_FOUND;
 
1007
                                        writeLog_(String("File ")+ string_buffer + " does not exist. Aborting!");
 
1008
                                        break;
 
1009
                                }
 
1010
 
 
1011
                                if ( (file_tag & readable) && !File::readable(string_buffer) )
 
1012
                                {
 
1013
                                        exit_code = INPUT_FILE_NOT_READABLE;
 
1014
                                        writeLog_(String("File ")+ string_buffer + " is not readable. Aborting!");
 
1015
                                        break;
 
1016
                                }
 
1017
 
 
1018
                                existed = File::exists(string_buffer);
 
1019
                                if ( (file_tag & writable) && !File::writable(string_buffer) )
 
1020
                                {
 
1021
                                        exit_code = CANNOT_WRITE_OUTPUT_FILE;
 
1022
                                        writeLog_(String("Cannot write file ")+ string_buffer + ". Aborting!");
 
1023
                                        break;
 
1024
                                }
 
1025
                                else if ( !existed ) remove(string_buffer.c_str());
 
1026
                                existed = false;
 
1027
                        }
 
1028
                        // creating the input file
 
1029
                        if ( exit_code == EXECUTION_OK && sequest_in )
 
1030
                        {
 
1031
                                sequest_infile.store(input_filename);
 
1032
                        }
 
1033
 
 
1034
                        if ( exit_code == EXECUTION_OK )
 
1035
                        {
 
1036
                                // check the Mz files, get the names for the dtas and check whether they do no already exist
 
1037
                                bool make_dtas = ( sequest_out && !sequest_in ) ? false : true; // if only sequest_out is set, just get the retention times
 
1038
                                // creating the dta files
 
1039
                                if ( make_dtas ) writeLog_("creating dta files");
 
1040
                                // first get the dta names
 
1041
                                for ( vector< String >::iterator spectra_it = spectra.begin(); spectra_it != spectra.end(); ++spectra_it )
 
1042
                                {
 
1043
                                        *spectra_it = File::absolutePath(*spectra_it);
 
1044
                                        type = fh.getTypeByContent(*spectra_it);
 
1045
                                        if ( type == FileTypes::UNKNOWN )
 
1046
                                        {
 
1047
                                                writeLog_("Could not determine type of the file. Aborting!");
 
1048
                                                exit_code = PARSE_ERROR;
 
1049
                                                break;
 
1050
                                        }
 
1051
                                        fh.loadExperiment(*spectra_it, msexperiment, type);
 
1052
 
 
1053
                                        msms_spectra_in_file = MSExperiment2DTAs(msexperiment, temp_data_directory + File::basename(*spectra_it), charges, outfile_names_and_precursor_retention_times, dta_filenames, false);
 
1054
 
 
1055
                                        msms_spectra_altogether += msms_spectra_in_file;
 
1056
 
 
1057
                                        // if make_dtas is set, check whether one of them does already exist, if so, stop the adapter
 
1058
                                        if ( make_dtas )
 
1059
                                        {
 
1060
                                                for ( vector< String >::const_iterator dta_names_it = dta_filenames.begin(); dta_names_it != dta_filenames.end(); ++dta_names_it )
 
1061
                                                {
 
1062
                                                        string_buffer = temp_data_directory + *dta_names_it;
 
1063
                                                        if ( File::exists(string_buffer) )
 
1064
                                                        {
 
1065
                                                                writeLog_("The file " + string_buffer + " does already exist in directory " + temp_data_directory + ". Please remove it first. Aborting!");
 
1066
                                                                // deleting all temporary files
 
1067
                                                                for ( map< String, Size >::const_iterator files_it = files.begin(); files_it != files.end(); ++files_it )
 
1068
                                                                {
 
1069
                                                                        if ( files_it->second & delete_afterwards ) remove(files_it->first.c_str());
 
1070
                                                                }
 
1071
                                                                exit_code = UNKNOWN_ERROR;
 
1072
                                                                break;
 
1073
                                                        }
 
1074
                                                }
 
1075
                                        }
 
1076
                                }
 
1077
                        }
 
1078
 
 
1079
                        // if no msms spectra were found
 
1080
                        if ( exit_code == EXECUTION_OK && !msms_spectra_altogether )
 
1081
                        {
 
1082
                                writeLog_("No MS/MS spectra found in any of the mz files. Aborting!");
 
1083
                                exit_code = UNKNOWN_ERROR;
 
1084
                        }
 
1085
 
 
1086
                        // if make_dtas is set and non of the dta files did already exist, create them
 
1087
                        if ( exit_code == EXECUTION_OK && make_dtas )
 
1088
                        {
 
1089
                                for ( vector< String >::const_iterator spectra_it = spectra.begin(); spectra_it != spectra.end(); ++spectra_it )
 
1090
                                {
 
1091
                                        type = fh.getTypeByContent(*spectra_it);
 
1092
                                        if ( type == FileTypes::UNKNOWN )
 
1093
                                        {
 
1094
                                                writeLog_("Could not determine type of the file. Aborting!");
 
1095
                                                exit_code = PARSE_ERROR;
 
1096
                                        }
 
1097
                                        fh.loadExperiment(*spectra_it, msexperiment, type);
 
1098
                                        basename = File::basename(*spectra_it);
 
1099
                                        dta_files_common_name = temp_data_directory + basename;
 
1100
                                        msms_spectra_in_file = MSExperiment2DTAs(msexperiment, dta_files_common_name, charges, outfile_names_and_precursor_retention_times, dta_filenames, make_dtas);
 
1101
                                        writeLog_(String(msms_spectra_in_file) + " MS/MS spectra in file " + *spectra_it);
 
1102
                                }
 
1103
                        }
 
1104
 
 
1105
                        // (3.2.3) running the program
 
1106
                        if ( exit_code == EXECUTION_OK )
 
1107
                        {
 
1108
                                if ( sequest_in && sequest_out )
 
1109
                                {
 
1110
                                        // creating a batch file for windows (command doesn't accept commands that are longer than 256 chars)
 
1111
                                        String sequest_screen_output; // direct the screen-output to a file
 
1112
                                        do
 
1113
                                        {
 
1114
                                                sequest_screen_output = String::random(10);
 
1115
                                        }
 
1116
                                        while ( File::exists(sequest_screen_output) );
 
1117
                                        files[temp_data_directory + sequest_screen_output] = (writable | delete_afterwards);
 
1118
 
 
1119
                                        ofstream batchfile(String(temp_data_directory + batch_filename).c_str());
 
1120
//                                      if ( !batchfile )
 
1121
//                                      {
 
1122
//                                              throw Exception::UnableToCreateFile(__FILE__, __LINE__, __PRETTY_FUNCTION__, temp_data_directory + batch_filename);
 
1123
//                                      }
 
1124
                                        String call = "rdesktop";
 
1125
                                        if ( !user.empty() ) call.append(" -u " + user);
 
1126
                                        if ( !password.empty() ) call.append(" -p \"" + password + "\"");
 
1127
                                        call.append(" -s cmd\\ /K\\ \"");
 
1128
        //                              call.append("echo net use " + temp_data_directory_win.substr(0,2) + " \\\\" + temp_data_directory_network + " && ");
 
1129
                                        call.append("net use " + temp_data_directory_win.substr(0,2) + " \\\\" + temp_data_directory_network.substr(0, temp_data_directory_network.length() - 1) + " && ");
 
1130
        //                              call.append(" net use " + temp_data_directory_win.substr(0,2) + " " + temp_data_directory_network + " && ");
 
1131
 
 
1132
                                        batchfile << String(" cd " + temp_data_directory_win + " && " + temp_data_directory_win.substr(0,2));
 
1133
 
 
1134
                                        for ( Size i = 0; i <= Size(dtas / max_dtas_per_run); ++i )
 
1135
                                        {
 
1136
                                                batchfile << " && " << sequest_directory_win << "sequest.exe -P" << input_file_directory_network << File::basename(input_filename) << "  " << temp_data_directory_network << "*.dta_" << i<< " > " <<  temp_data_directory_network << sequest_screen_output << " && move sequest.log sequest.log" << i;
 
1137
                                        }
 
1138
                                        batchfile << " && " << sequest_directory_win.substr(0,2) << endl;
 
1139
        //                              batchfile << std::endl << String(" net use /delete " + temp_data_directory_win.substr(0,2));
 
1140
        //                              batchfile << std::endl << "logoff";
 
1141
                                        batchfile.close();
 
1142
                                        batchfile.clear();
 
1143
 
 
1144
                                        call.append(temp_data_directory_win + batch_filename + " && net use /delete " + temp_data_directory_win.substr(0,2) + " && logoff" +  "\" " + sequest_computer);
 
1145
                                        writeLog_("System call: " + call);
 
1146
          // todo: replace this by a call to QProcess::execute(<exe>,<args>), you can also use setWorkingDirectory() before
 
1147
                                        int status = system(call.c_str());
 
1148
 
 
1149
                                        if ( status != 0 )
 
1150
                                        {
 
1151
                                                exit_code = EXTERNAL_PROGRAM_ERROR;
 
1152
                                        }
 
1153
                                        else
 
1154
                                        {
 
1155
                                                bool no_log(false);
 
1156
                                                string_buffer.clear();
 
1157
                                                for ( Size i = 0; i <= (Size) (dtas / max_dtas_per_run); ++i )
 
1158
                                                {
 
1159
                                                        ifstream sequest_log(string(temp_data_directory + "sequest.log" + String(i)).c_str()); // write sequest log to logfile
 
1160
                                                        if ( !sequest_log )
 
1161
                                                        {
 
1162
                                                                no_log = true;
 
1163
                                                                break;
 
1164
                                                        }
 
1165
                                                        else
 
1166
                                                        {
 
1167
                                                                sequest_log.seekg (0, ios::end);
 
1168
                                                                streampos length = sequest_log.tellg();
 
1169
                                                                sequest_log.seekg (0, ios::beg);
 
1170
                                                                char * buffer = new char[length];
 
1171
                                                                sequest_log.read (buffer, length);
 
1172
                                                                sequest_log.close();
 
1173
                                                                sequest_log.clear();
 
1174
                                                                string_buffer2.assign(buffer);
 
1175
                                                                delete(buffer);
 
1176
                                                                string_buffer.append(string_buffer2.substr(string_buffer2.find("Total search time")));
 
1177
                                                                remove(string(temp_data_directory + "sequest.log" + String(i)).c_str());
 
1178
                                                        }
 
1179
                                                }
 
1180
                                                if ( no_log )
 
1181
                                                {
 
1182
                                                        writeLog_("No Sequest log found!");
 
1183
                                                        exit_code = EXTERNAL_PROGRAM_ERROR;
 
1184
                                                }
 
1185
                                                else writeLog_(string_buffer);
 
1186
                                        }
 
1187
                                }
 
1188
                        }
 
1189
 
 
1190
                        if ( sequest_out )
 
1191
                        {
 
1192
                                if ( exit_code == EXECUTION_OK )
 
1193
                                {
 
1194
                                        SequestOutfile sequest_outfile;
 
1195
                                        if (!File::fileList(out_directory, String("*.out"), out_files))
 
1196
                                        {
 
1197
                                                writeLog_(String("Error: No .out files found in '") + out_directory + "'. Aborting!");
 
1198
                                                exit_code = UNKNOWN_ERROR;
 
1199
                                        }
 
1200
                                }
 
1201
                                if ( exit_code == EXECUTION_OK )
 
1202
                                {
 
1203
                                        vector< pair < String, vector< DoubleReal > > > filenames_and_pvalues;
 
1204
                                        for ( StringList::iterator out_files_it = out_files.begin(); out_files_it != out_files.end(); ++out_files_it )
 
1205
                                        {
 
1206
                                                filenames_and_pvalues.push_back(make_pair(out_directory + *out_files_it, vector< DoubleReal >()));
 
1207
                                        }
 
1208
        //                              sequest_outfile.getPValuesFromOutFiles(filenames_and_pvalues);
 
1209
 
 
1210
                                        // set the parameters
 
1211
                                        ProteinIdentification::SearchParameters sp;
 
1212
                                        sp.db = "Fasta";
 
1213
                                        sp.taxonomy = sequest_infile.getSequenceHeaderFilter();
 
1214
                                        if ( monoisotopic ) sp.mass_type = ProteinIdentification::MONOISOTOPIC;
 
1215
                                        else sp.mass_type = ProteinIdentification::AVERAGE;
 
1216
                                        for ( vector< Int >::const_iterator charges_it = charges.begin(); charges_it != charges.end(); ++charges_it )
 
1217
                                        {
 
1218
                                                if ( *charges_it > 0 ) sp.charges.append("+");
 
1219
                                                sp.charges.append(String(*charges_it));
 
1220
                                        }
 
1221
                                        if ( sequest_infile.getEnzymeName() == "Trypsin" ) sp.enzyme = ProteinIdentification::TRYPSIN;
 
1222
                                        else if ( sequest_infile.getEnzymeName() == "No_Enzyme" ) sp.enzyme = ProteinIdentification::NO_ENZYME;
 
1223
                                        else sp.enzyme = ProteinIdentification::UNKNOWN_ENZYME;
 
1224
                                        sp.peak_mass_tolerance = sequest_infile.getPeakMassTolerance();
 
1225
                                        sp.precursor_tolerance = sequest_infile.getPrecursorMassTolerance();
 
1226
                                        protein_identification.setSearchParameters(sp);
 
1227
 
 
1228
                                        Size peptide_identification_size = peptide_identifications.size();
 
1229
                                        for ( vector< pair < String, vector< DoubleReal > > >::iterator filenames_and_pvalues_it = filenames_and_pvalues.begin(); filenames_and_pvalues_it != filenames_and_pvalues.end(); ++filenames_and_pvalues_it )
 
1230
                                        {
 
1231
                                                try
 
1232
                                                {
 
1233
                                                        sequest_outfile.load(filenames_and_pvalues_it->first, peptide_identifications, protein_identification, p_value, filenames_and_pvalues_it->second, database);
 
1234
                                                }
 
1235
                                                catch( Exception::ParseError pe )
 
1236
                                                {
 
1237
                                                        writeLog_(pe.getMessage());
 
1238
                                                        exit_code = INPUT_FILE_CORRUPT;
 
1239
                                                        break;
 
1240
                                                }
 
1241
 
 
1242
                                                // save the retention times if peptides have been identified to the p-level
 
1243
                                                if ( peptide_identification_size != peptide_identifications.size() )
 
1244
                                                {
 
1245
                                                        peptide_identification_size = peptide_identifications.size();
 
1246
                                                        string_buffer = File::basename(filenames_and_pvalues_it->first);
 
1247
                                                        if ( outfile_names_and_precursor_retention_times.find(string_buffer) != outfile_names_and_precursor_retention_times.end() ) peptide_identifications.back().setMetaValue("RT",  outfile_names_and_precursor_retention_times[string_buffer]);
 
1248
                                                        else peptide_identifications.back().setMetaValue("RT", 0);
 
1249
                                                }
 
1250
                                        }
 
1251
                                }
 
1252
                                if ( exit_code == EXECUTION_OK )
 
1253
                                {
 
1254
                                        pis.push_back(protein_identification);
 
1255
 
 
1256
                                        IdXMLFile().store(output_filename, pis, peptide_identifications);
 
1257
 
 
1258
                                        // remove all outs
 
1259
                                        if ( !keep_out_files )
 
1260
                                        {
 
1261
                                                writeLog_("removing out files");
 
1262
                                                for ( StringList::const_iterator out_files_it = out_files.begin(); out_files_it != out_files.end(); ++out_files_it )
 
1263
                                                {
 
1264
                                                        if ( !File::remove(out_directory + *out_files_it) )
 
1265
                                                        {
 
1266
                                                                writeLog_(String("'") + out_directory + *out_files_it + "' could not be removed!");
 
1267
                                                        }
 
1268
                                                }
 
1269
                                        }
 
1270
                                }
 
1271
                        }
 
1272
 
 
1273
                        if ( exit_code == EXTERNAL_PROGRAM_ERROR )
 
1274
                        {
 
1275
                                writeLog_("Sequest problem. Aborting! (Details can be seen in the logfile: \"" + logfile + "\")");
 
1276
                                files[logfile] = readable;
 
1277
                        }
 
1278
 
 
1279
                        // deleting all temporary files
 
1280
                        writeLog_("removing temporary files");
 
1281
                        for ( map< String, Size >::const_iterator files_it = files.begin(); files_it != files.end(); ++files_it )
 
1282
                        {
 
1283
                                if ( files_it->second & delete_afterwards ) remove(files_it->first.c_str());
 
1284
                        }
 
1285
                        // remove all dtas
 
1286
                        if ( !keep_dta_files )
 
1287
                        {
 
1288
                                writeLog_("removing dta files");
 
1289
                                for ( vector< String >::const_iterator dta_names_it = dta_filenames.begin(); dta_names_it != dta_filenames.end(); ++dta_names_it )
 
1290
                                {
 
1291
                                        if ( !File::remove(*dta_names_it) ) writeLog_("'" + string_buffer + "' could not be removed!");
 
1292
                                }
 
1293
                        }
 
1294
 
 
1295
                        return exit_code;
 
1296
                }
 
1297
};
 
1298
 
 
1299
//@endcond
 
1300
 
 
1301
 
 
1302
 
 
1303
int main( int argc, const char** argv )
 
1304
{
 
1305
        TOPPSequestAdapter tool;
 
1306
 
 
1307
        return tool.main(argc,argv);
 
1308
}