8
bp_parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with
9
different output options
13
bp_parse_hmmsearch [--po] [--ps] -s hmmsearch_file
15
bp_parse_hmmsearch [--po] [--ps] -m index_file
19
=head2 Mandatory Options:
21
-s HMMSEARCH file to parse.
22
-m INDEX file that contains a list of HMMSEARCH files for multiple
25
=head2 Special Options:
27
--po Print only the hits that have positive scores.
28
--ps Print the total of positive scores found.
29
--help Show this documentation.
35
User feedback is an integral part of the evolution of this and other
36
Bioperl modules. Send your comments and suggestions preferably to the
37
Bioperl mailing list. Your participation is much appreciated.
39
bioperl-l@bioperl.org - General discussion
40
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44
Report bugs to the Bioperl bug tracking system to help us keep track
45
of the bugs and their resolution. Bug reports can be submitted via the
48
https://redmine.open-bio.org/projects/bioperl/
52
Mauricio Herrera Cuadra <mauricio at open-bio.org>
56
# Modules, pragmas and variables to use
59
use vars qw($opt_s $opt_m $opt_po $opt_ps $opt_help);
61
# Gets options from the command line
62
GetOptions qw(-s:s -m:s --po --ps --help);
64
# Print documentation if help switch was given
65
exec('perldoc', $0) and exit() if $opt_help;
67
# If no mandatory options are given prints an error and exits
68
if (!$opt_s && !$opt_m) {
69
print "ERROR: No HMMSEARCH or INDEX file has been specified.\n Use
70
'--help' switch for documentation.\n" and exit();
71
} elsif ($opt_s && $opt_m) {
72
print "ERROR: You must select only one option (-s or -m) for input.\n
73
Use '--help' switch for documentation.\n" and exit();
76
# Initializes a counter for the domain positive scores if the option
78
my $pos_scores = 0 if $opt_ps;
80
# If single file mode was selected
82
parse_hmmsearch($opt_s);
84
# Prints the total domain positive scores if the option was given
86
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
88
print "Total domain positive scores: $pos_scores\n";
91
# If multiple files mode was selected
94
# Opens the INDEX file sent as input
95
open(FH, "<", $opt_m) or die("Unable to open INDEX file: $opt_m ($!)");
97
# Cycle that extracts one line for every loop until finding the
99
while (my $line = <FH>) {
101
# Deletes the new line characters from the line
104
# Parses the result file in turn
105
parse_hmmsearch($line);
106
print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
110
# Prints the total domain positive scores if the option was given
111
print "Total domain positive scores: $pos_scores\n" if $opt_ps;
120
# Subroutine that parses a HMMSEARCH results file
121
sub parse_hmmsearch {
123
# Gets the parameters sent to the function
126
# Creates a new Bio::SearchIO object
127
my $in = new Bio::SearchIO(
132
# Loops through the results file
133
while (my $result = $in->next_result()) {
135
# Prints program name and version (these are values from
136
# Bio::Search::Result::GenericResult methods)
137
print $result->algorithm(), " ", $result->algorithm_version(), "\n";
138
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141
# Prints HMM file and sequence database (these are values from
142
# Bio::Search::Result::HMMERResult methods)
143
print "HMM file:\t\t\t", $result->hmm_name(), "\n";
144
print "Sequence database:\t\t", $result->sequence_file(), "\n";
145
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
148
# Prints some values from Bio::Search::Result::GenericResult
150
print "Query HMM:\t\t\t", $result->query_name(), "\n";
151
print "Accession:\t\t\t", $result->query_accession(), "\n";
152
print "Description:\t\t\t", $result->query_description(), "\n";
153
print "Total hits:\t\t\t", $result->num_hits(), "\n";
155
# Loops through the sequence in turn
156
while (my $hit = $result->next_hit()) {
158
# If only positive scores option was given and the score
159
# in turn is greater than zero
161
printHits($hit) if ($hit->score() >= 0);
163
# Prints all hits otherwise
171
# Subroutine that prints the values from a Bio::Search::Hit::HitI
175
# Gets the parameters sent to the function
178
# Prints some values from Bio::Search::Hit::HitI methods
179
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
180
print "Hit ", $hit->rank(), "\n";
181
print "Sequence:\t\t\t", $hit->name(), "\n";
182
print "Description:\t\t\t", $hit->description(), "\n";
183
print "Score:\t\t\t\t", $hit->score(), "\n";
184
print "E-value:\t\t\t", $hit->significance(), "\n";
185
print "Number of domains:\t\t", $hit->num_hsps(), "\n";
187
# Loops through the domain in turn
188
while (my $hsp = $hit->next_hsp()) {
190
# Prints some values from Bio::Search::HSP::HSPI methods
191
print " - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
192
print " Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n";
193
print " seq-f:\t\t\t", $hsp->start('hit'), "\n";
194
print " seq-t:\t\t\t", $hsp->end('hit'), "\n";
195
print " hmm-f:\t\t\t", $hsp->start(), "\n";
196
print " hmm-t:\t\t\t", $hsp->end(), "\n";
197
print " score:\t\t\t", $hsp->score(), "\n";
198
$pos_scores++ if ($hsp->score() >= 0) && $opt_ps;
199
print " E-value:\t\t\t", $hsp->evalue(), "\n";
200
my $hmm_string = $hsp->query_string();
201
$hmm_string =~ s/<-\*$//;
202
print " hmm string:\t\t\t", $hmm_string, "\n";
203
print " homology string:\t\t", $hsp->homology_string(), "\n";
204
print " hit string:\t\t\t", $hsp->hit_string(), "\n";