4
* @author Yu Peng (ypeng@cs.hku.hk)
17
#include "misc/options_description.h"
18
#include "misc/utils.h"
19
#include "sequence/sequence.h"
20
#include "sequence/sequence_io.h"
21
#include "sequence/short_sequence.h"
25
bool is_paired = false;
26
bool is_merged = false;
27
bool is_filtered = false;
29
deque<ShortSequence> reads;
32
bool Compare(int x, int y)
34
if (reads[x] != reads[y])
35
return reads[x] < reads[y];
37
return reads[x+1] < reads[y+1];
40
int main(int argc, char *argv[])
42
OptionsDescription desc;
43
desc.AddOption("paired", "", is_paired, "if the reads are paired-end in one file");
44
desc.AddOption("merge", "", is_merged, "if the reads are paired-end in two files");
45
desc.AddOption("filter", "", is_filtered, "filter out reads containing 'N'");
46
desc.AddOption("min_length", "", min_length, "minimum length ");
50
desc.Parse(argc, argv);
53
throw logic_error("not enough parameters");
58
cerr << e.what() << endl;
59
cerr << "fq2fa - Convert Fastq sequences to Fasta sequences." << endl;
60
cerr << "Usage: fq2fa tmp.fq tmp.fa [...] " << endl;
61
cerr << " fq2fa --paired tmp.fq tmp.fa" << endl;
62
cerr << " fq2fa --merge tmp_1.fq tmp_2.fq tmp.fa" << endl;
63
cerr << "Allowed Options: " << endl;
68
ReadSequence(argv[1], reads);
70
cout << "read" << endl;
71
for (int i = 0; i < (int)reads.size(); i += 2)
73
if (reads[i+1] < reads[i])
74
swap(reads[i], reads[i+1]);
76
if ((int)reads[i].size() >= min_length && (int)reads[i+1].size() >= min_length)
80
sort(aux.begin(), aux.end(), Compare);
81
cout << "sort" << endl;
85
FastaWriter writer(argv[2]);
86
for (int i = 0; i < (int)aux.size(); ++i)
89
if (last == -1 || reads[id] != reads[last] || reads[id+1] != reads[last+1])
91
Sequence seq1(reads[id]);
92
Sequence seq2(reads[id+1]);
93
writer.Write(seq1, FormatString("reads_%d/1", index));
94
writer.Write(seq2, FormatString("reads_%d/2", index));
101
cout << index << " " << reads.size();