5
fastm9_to_table - turn FASTA -m 9 output into NCBI -m 9 tabular output
9
fastm9_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ...
14
-e/--evalue evalue -- filter by evalue
15
-b/--bitscore bitscore -- filter by bitscore
16
--header -- boolean flag to print column header
17
-o/--out -- optional outputfile to write data,
18
otherwise will write to STDOUT
19
-h/--help -- show this documentation
21
Not technically a SearchIO script as this doesn't use any Bioperl
22
components but is a useful and fast. The output is tabular output
23
with the standard NCBI -m9 columns.
31
query start (if on rev-strand start > end)
33
hit start (if on rev-strand start > end)
38
Additionally 3 more columns are provided
47
=head1 AUTHOR - Jason Stajich
49
Jason Stajich jason_at_bioperl-dot-org
58
my ($evalue,$bitscore,$header,$outfile) = ( 10, 0);
60
'b|bitscore|bits:f' => \$bitscore,
61
'e|evalue:f' => \$evalue,
63
'o|out|outfile:s' => \$outfile,
64
'h|help' => sub { exec('perldoc',$0); exit; }
69
open($outfh, ">$outfile") || die("$outfile: $!");
79
my @fields = qw(qname hname percid alen mmcount gapcount
80
qstart qend hstart hend evalue score bits fs sw-score
81
percsim qlen hlen qgap hgap);
82
print $outfh "#",uc(join("", map{ sprintf("%-10s",$_) } @fields)), "\n" if $header;
86
if( /^\s*\d+>>>(\S+).+/ ) {
88
if( /\-\s+(\d+)\s+(aa|nt)\s+$/ ){
91
} elsif( $hitsection && /^>>>\Q$data{'qname'}/ ) {
93
} elsif( /^The best scores are:/ ) {
96
} elsif( $hitsection ) {
97
if( s/^(\S+)\s+(.+)\(\s*(\d+)\)\s+// ) {
98
my ($hit, $desc,$hitlen) = ($1,$2,$3);
99
my ($dir) = ( s/^\[(r|f)\]\s+// );
100
my @line = split(/\s+/,$_);
101
$data{'hname'} = $hit;
102
$data{'hlen'} = $hitlen;
103
$data{'score'} = shift @line;
104
$data{'bits'} = shift @line;
105
$data{'evalue'} = shift @line;
106
$data{'percid'} = shift @line;
108
$data{'percsim'} = shift @line;
109
$data{'sw-score'} = shift @line;
110
$data{'alen'} = shift @line;
111
$data{'qstart'} = shift @line;
112
$data{'qend'} = shift @line;
113
$data{'pn0'} = shift @line; # pn0
114
$data{'px0'} = shift @line; # px0
115
$data{'hstart'} = shift @line; # an1
116
$data{'hend'} = shift @line; # ax1
117
$data{'pn1'} = shift @line; # pn1
118
$data{'px1'} = shift @line; # px1
120
$data{'qgap'} = shift @line;
121
$data{'hgap'} = shift @line;
122
$data{'gapcount'} = $data{'qgap'} + $data{'hgap'};
123
$data{'fs'} = shift @line;
125
$data{'mmcount'} = $data{'alen'} - ( int($data{'percid'} * $data{'alen'}) + $data{'gapcount'});
126
#next if( $data{'evalue'} > $evalue ||
127
# $data{'bits'} < $bitscore );
129
for ( $data{'percid'}, $data{'percsim'} ) {
130
$_ = sprintf("%.2f",$_*100);
132
print $outfh join( "\t",map { $data{$_} } @fields),"\n";
134
# print STDERR "unrecognized line \n$linestr";
137
# warn("skipping a line like this: $_");