7
parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with
8
different output options
12
parse_hmmsearch [--po] [--ps] -s hmmsearch_file
14
parse_hmmsearch [--po] [--ps] -m index_file
18
=head2 Mandatory Options:
20
-s HMMSEARCH file to parse.
21
-m INDEX file that contains a list of HMMSEARCH files for multiple
24
=head2 Special Options:
26
--po Print only the hits that have positive scores.
27
--ps Print the total of positive scores found.
28
--help Show this documentation.
34
User feedback is an integral part of the evolution of this and other
35
Bioperl modules. Send your comments and suggestions preferably to the
36
Bioperl mailing list. Your participation is much appreciated.
38
bioperl-l@bioperl.org - General discussion
39
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
43
Report bugs to the Bioperl bug tracking system to help us keep track
44
of the bugs and their resolution. Bug reports can be submitted via the
47
https://redmine.open-bio.org/projects/bioperl/
51
Mauricio Herrera Cuadra <mauricio at open-bio.org>
55
# Modules, pragmas and variables to use
58
use vars qw($opt_s $opt_m $opt_po $opt_ps $opt_help);
60
# Gets options from the command line
61
GetOptions qw(-s:s -m:s --po --ps --help);
63
# Print documentation if help switch was given
64
exec('perldoc', $0) and exit() if $opt_help;
66
# If no mandatory options are given prints an error and exits
67
if (!$opt_s && !$opt_m) {
68
print "ERROR: No HMMSEARCH or INDEX file has been specified.\n Use
69
'--help' switch for documentation.\n" and exit();
70
} elsif ($opt_s && $opt_m) {
71
print "ERROR: You must select only one option (-s or -m) for input.\n
72
Use '--help' switch for documentation.\n" and exit();
75
# Initializes a counter for the domain positive scores if the option
77
my $pos_scores = 0 if $opt_ps;
79
# If single file mode was selected
81
parse_hmmsearch($opt_s);
83
# Prints the total domain positive scores if the option was given
85
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
87
print "Total domain positive scores: $pos_scores\n";
90
# If multiple files mode was selected
93
# Opens the INDEX file sent as input
94
open(FH, "<", $opt_m) or die("Unable to open INDEX file: $opt_m ($!)");
96
# Cycle that extracts one line for every loop until finding the
98
while (my $line = <FH>) {
100
# Deletes the new line characters from the line
103
# Parses the result file in turn
104
parse_hmmsearch($line);
105
print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
109
# Prints the total domain positive scores if the option was given
110
print "Total domain positive scores: $pos_scores\n" if $opt_ps;
119
# Subroutine that parses a HMMSEARCH results file
120
sub parse_hmmsearch {
122
# Gets the parameters sent to the function
125
# Creates a new Bio::SearchIO object
126
my $in = new Bio::SearchIO(
131
# Loops through the results file
132
while (my $result = $in->next_result()) {
134
# Prints program name and version (these are values from
135
# Bio::Search::Result::GenericResult methods)
136
print $result->algorithm(), " ", $result->algorithm_version(), "\n";
137
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140
# Prints HMM file and sequence database (these are values from
141
# Bio::Search::Result::HMMERResult methods)
142
print "HMM file:\t\t\t", $result->hmm_name(), "\n";
143
print "Sequence database:\t\t", $result->sequence_file(), "\n";
144
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147
# Prints some values from Bio::Search::Result::GenericResult
149
print "Query HMM:\t\t\t", $result->query_name(), "\n";
150
print "Accession:\t\t\t", $result->query_accession(), "\n";
151
print "Description:\t\t\t", $result->query_description(), "\n";
152
print "Total hits:\t\t\t", $result->num_hits(), "\n";
154
# Loops through the sequence in turn
155
while (my $hit = $result->next_hit()) {
157
# If only positive scores option was given and the score
158
# in turn is greater than zero
160
printHits($hit) if ($hit->score() >= 0);
162
# Prints all hits otherwise
170
# Subroutine that prints the values from a Bio::Search::Hit::HitI
174
# Gets the parameters sent to the function
177
# Prints some values from Bio::Search::Hit::HitI methods
178
print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
179
print "Hit ", $hit->rank(), "\n";
180
print "Sequence:\t\t\t", $hit->name(), "\n";
181
print "Description:\t\t\t", $hit->description(), "\n";
182
print "Score:\t\t\t\t", $hit->score(), "\n";
183
print "E-value:\t\t\t", $hit->significance(), "\n";
184
print "Number of domains:\t\t", $hit->num_hsps(), "\n";
186
# Loops through the domain in turn
187
while (my $hsp = $hit->next_hsp()) {
189
# Prints some values from Bio::Search::HSP::HSPI methods
190
print " - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
191
print " Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n";
192
print " seq-f:\t\t\t", $hsp->start('hit'), "\n";
193
print " seq-t:\t\t\t", $hsp->end('hit'), "\n";
194
print " hmm-f:\t\t\t", $hsp->start(), "\n";
195
print " hmm-t:\t\t\t", $hsp->end(), "\n";
196
print " score:\t\t\t", $hsp->score(), "\n";
197
$pos_scores++ if ($hsp->score() >= 0) && $opt_ps;
198
print " E-value:\t\t\t", $hsp->evalue(), "\n";
199
my $hmm_string = $hsp->query_string();
200
$hmm_string =~ s/<-\*$//;
201
print " hmm string:\t\t\t", $hmm_string, "\n";
202
print " homology string:\t\t", $hsp->homology_string(), "\n";
203
print " hit string:\t\t\t", $hsp->hit_string(), "\n";