2
Copyright (c) 2008-2010 Gordon Gremme <gremme@zbh.uni-hamburg.de>
3
Copyright (c) 2008 Center for Bioinformatics, University of Hamburg
5
Permission to use, copy, modify, and distribute this software for any
6
purpose with or without fee is hereby granted, provided that the above
7
copyright notice and this permission notice appear in all copies.
9
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19
#include "core/bioseq_iterator.h"
20
#include "core/fasta.h"
22
#include "core/outputfile.h"
23
#include "core/option_api.h"
24
#include "core/undef_api.h"
25
#include "tools/gt_seqfilter.h"
28
unsigned long minlength,
32
GtOutputFileInfo *ofi;
36
static void* gt_seqfilter_arguments_new(void)
38
SeqFilterArguments *arguments = gt_calloc(1, sizeof *arguments);
39
arguments->ofi = gt_outputfileinfo_new();
43
static void gt_seqfilter_arguments_delete(void *tool_arguments)
45
SeqFilterArguments *arguments = tool_arguments;
46
if (!arguments) return;
47
gt_file_delete(arguments->outfp);
48
gt_outputfileinfo_delete(arguments->ofi);
52
static GtOptionParser* gt_seqfilter_option_parser_new(void *tool_arguments)
54
SeqFilterArguments *arguments = tool_arguments;
59
op = gt_option_parser_new("[option ...] [sequence_file ...]",
60
"Filter the given sequence_file(s) and show the "
61
"results on stdout.");
64
option = gt_option_new_ulong("minlength",
65
"set minimum length a sequence must "
66
"have to pass the filter", &arguments->minlength,
68
gt_option_parser_add_option(op, option);
71
option = gt_option_new_ulong("maxlength", "set maximum length a sequence can "
72
"have to pass the filter", &arguments->maxlength,
74
gt_option_parser_add_option(op, option);
77
option = gt_option_new_ulong("maxseqnum", "set the maximum number of "
78
"sequences which can pass the filter",
79
&arguments->maxseqnum, GT_UNDEF_ULONG);
80
gt_option_parser_add_option(op, option);
83
option = gt_option_new_width(&arguments->width);
84
gt_option_parser_add_option(op, option);
86
gt_outputfile_register_options(op, &arguments->outfp, arguments->ofi);
91
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args,
92
void *tool_arguments, GtError *err)
94
SeqFilterArguments *arguments = tool_arguments;
95
GtBioseqIterator *bsi;
98
unsigned long long passed = 0, filtered = 0, num_of_sequences = 0;
102
gt_assert(tool_arguments);
104
bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);
106
while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq) {
107
for (i = 0; i < gt_bioseq_number_of_sequences(bioseq); i++) {
108
if ((arguments->minlength == GT_UNDEF_ULONG ||
109
gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) &&
110
(arguments->maxlength == GT_UNDEF_ULONG ||
111
gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength) &&
112
(arguments->maxseqnum == GT_UNDEF_ULONG ||
113
passed + 1 <= arguments->maxseqnum)) {
114
gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i),
115
gt_bioseq_get_sequence(bioseq, i),
116
gt_bioseq_get_sequence_length(bioseq, i),
117
arguments->width, arguments->outfp);
124
gt_bioseq_delete(bioseq);
127
/* show statistics */
129
gt_assert(passed + filtered == num_of_sequences);
130
fprintf(stderr, "# %llu out of %llu sequences have been removed (%.3f%%)\n",
131
filtered, num_of_sequences,
132
((double) filtered / num_of_sequences) * 100.0);
135
gt_bioseq_iterator_delete(bsi);
140
GtTool* gt_seqfilter(void)
142
return gt_tool_new(gt_seqfilter_arguments_new,
143
gt_seqfilter_arguments_delete,
144
gt_seqfilter_option_parser_new,
146
gt_seqfilter_runner);