3
#---------------------------------------------------------------------------
4
# PROGRAM : parse_blast.pl
5
# PURPOSE : To demonstrate parsing features of the Bio::Tools::Blast.pm module.
6
# AUTHOR : Steve Chervitz (sac@bioperl.org)
8
# REVISION: $Id: parse_blast.pl,v 1.9 2002/01/11 08:05:38 sac Exp $
9
# WEBSITE : http://bio.perl.org/Projects/Blast/
10
# USAGE : parse_blast.pl -h
11
# EXAMPLES: parse_blast.pl -eg
14
# Set the require ".../blast_config.pl" to point to the proper location
15
# of the blast_config.pl file. See blast_config.pl for additional steps.
19
# Sample BLAST output files can be found in examples/blast/out/ of the
20
# distribution. This script can process Blast report files specified
21
# on the command line or supplied via a STDIN stream.
23
# This demo script does not exercise all of the functionality of the Blast
24
# object. See parse_blast2.pl and parse_positions.pl script for some other
25
# manipulations and the documentation in the Bio::Tools::Blast.pm,
26
# accessible from the above website or by running Blast.pm through pod2html.
29
# * Create an example that shows how to parse with HTML-formatted
30
# reports. The new Blast.pm module no longer parses such reports
34
# sac, 11 Mar 1999: Merged parse_stream.pl with parse.pl to create
35
# parse_blast.pl. Replaces parse_stream.pl and parse.pl.
36
# sac, 4 Sep 1998: Added example of using -filt_func option.
37
# sac, 16 Jun 1998: Added installation comment, require statement comments.
38
# Minor alteration of seq_inds() calls.
39
# sac, 15 Jul 1998: Segregated code into parse2.pl which was formerly in
40
# parse.pl but commented out.
41
#---------------------------------------------------------------------------
43
# Using blast_config.pl in the examples/blast distribution directory:
44
require "blast_config.pl";
45
# Proper path to blast_config.pl after you install it in your system:
46
#require "/share/www-data/html/perlOOP/bioperl/bin/blast/blast_config.pl";
48
# Using vars from blast_config to prevent warning messages under -w.
49
use vars qw($ID $VERSION $DESC $MONITOR %blastParam @objects
50
$opt_in $opt_table $opt_compress $opt_filt_func);
52
$ID = 'parse_blast.pl';
54
$DESC = "Demonstrates parsing Blast reports using Bio::Tools::Blast.pm";
59
# $hit->frac_aligned_hit >= 0.8; };
67
&blast_general_params;
73
# THESE NEED TO BE UPDATED TO INCLUDE MORE STREAM PARSING EXAMPLES
75
(Run these in the examples/blast/ directory of the distribution.)
78
./$ID out/blastp.2.gz -signif 1e-15 -table 1
79
./$ID out/blastp.2.gz -signif 1e-15 -table 1 -exponent -desc
80
./$ID out/blastp.2.gz -signif 1e-15 -table 2
81
./$ID out/blastp.2.wu -check_all -filt_func '\$hit->gaps == 0' -table 2
82
./$ID out/blastp.205.gz -signif 1e-1 -nostats
83
./$ID out/blastp.2.gz -noaligns -signif 1e-5
84
./$ID -signif 1e-5 -table 1 < out/tblastn.2 > parsed.out
85
./$ID out/blastx.2.email.gz -table 1 -signif 1e-4
86
./$ID out/blastn.2* -table 1 -best -nostats > parsed.out2
87
./$ID out/tblastn.206.out.gz -table 2 -signif 0.1
88
./$ID out/blastp.1.gz # should issue some warnings.
92
gzip -cd out/blastp.2* | ./$ID -signif 1e-5 -table 2 > blast.table2
93
cat ./out/blastx* | ./$ID -table 1 > blast.table1
94
print_blasts.pl ./out | ./$ID -best -noshare
96
The '-noshare' argument is necessary because the out/ directory
97
contains a mixed bag of Blast reports (version 1, 2, blastp, tblastn,
98
gapped, ungapped, etc.). Most of the time, -noshare is unnecessary
99
since all reports have the same program, version, gapping, etc.
101
The "print_blasts.pl dir" syntax or the parse_multi.pl script are
102
recommended when working with large numbers of Blast reports (thousands).
103
The Blasts reports located in "dir" can be compressed or not.
106
Parsing large numbers of Blast reports can lead to serious
107
memory usage problems. See documentation in parse_multi.pl and
108
Blast.pm for more information (including a workaround).
115
&init_blast(\&parse_usage);
117
if(!@ARGV and $opt_in) { push @ARGV, $opt_in; }
125
# Building object(s) from files specified on command line.
126
# Each file should contain one report.
127
# Note that we don't really need to capture the $blast_object
128
# created by create_blast() since we can always access it via
129
# the global $blastObj defined in blast_config.pl.
130
# However, doing so makes things more obvious.
131
$MONITOR && print STDERR "\nParsing Blast report file(s).\n";
134
# Load the file into the Blast parameters.
135
next unless -f and -s;
136
$blastParam{-file} = $_;
139
# Create the Blast object with the specified parameters.
140
# Using functions provided by blast_config.pl
141
# which also supplies $blastObj.
142
$blast_obj = &create_blast;
143
$opt_table ? &print_table($blast_obj) : &show_results($blast_obj);
145
$opt_compress && $blast_obj->compress_file;
146
$blast_obj->destroy(); # important when crunching lots of reports.
150
my $er = "\nFILE: $blastParam{-file}\n$@\n";
153
print STDERR ".", $count % 50 ? '' : "\n";
156
# @ARGV is empty. Build Blast objects from STDIN stream.
157
# May contain one or more reports.
158
print STDERR "\nParsing Blast stream from STDIN.\n";
160
# Process each Blast as you go.
161
$blastParam{-exec_func} = \&print_table;
162
# Alternatively, try this:
163
#? $blastParam{-exec_func} = \&display_hit_info();
165
# Save all the Blast objects.
166
$blastParam{-save_array} = \@objects;
169
eval { &parse_stream; };
172
die "\n*** TROUBLE:\n$@\n";
180
printf STDERR "\n*** %d Blast reports produced fatal errors:\n", scalar(@errs);
181
foreach(@errs) { print STDERR $_; }