2
# Author: Jason Stajich <jason-at-bioperl-dot-org>
3
# Purpose: Bioperl implementation of Sean Eddy's sreformat
4
# We're not as clever as Sean's squid library though so
5
# you have to specify the input format rather than letting
6
# the application guess.
14
my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
16
-h/--help Print this help
17
-if/--informat Specify the input format
18
-of/--outformat Specify the output format
19
-i/--input Specify the input file name
20
(to pass in data on STDIN use minus sign as filename)
21
-o/--output Specify the output file name
22
(to pass data out on STDOUT use minus sign as filename)
23
--msa Specify this is multiple sequence alignment data
24
--special=specialparams Specify special params supported by some formats
25
Comma or space separated please.
27
nointerleaved -- for phylip,non-interleaved format
28
idlinebreak -- for phylip, makes it molphy format
29
percentages -- for clustalw, show % id per line
30
flat -- don't show start-end in seqid
31
linelength -- line length for clustalw
32
mrbayes -- for MrBayes proper NEXUS output
36
my ($input,$output,$informat,$outformat,$msa,$special);
39
'h|help' => sub { print STDERR ($USAGE); exit(0) },
40
'i|input:s' => \$input,
41
'o|output:s' => \$output,
42
'if|informat:s' => \$informat,
43
'of|outformat:s' => \$outformat,
45
's|special:s' => \$special,
48
unless( defined $informat && defined $outformat ) {
49
die(sprintf("Cannot proceed without a defined input and output you gave (%s,%s)\n",
50
defined $informat ? $informat : "''" ,
51
defined $outformat ? $outformat : "''"));
57
@extra = map { my @rc;
58
if( /nointerleaved/) {
59
@rc = ('-interleaved' => '0');
60
} elsif( /mrbayes/ ) {
61
@rc = ('-show_symbols' => 0,
62
'-show_endblock' => 0);
63
} elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) }
64
else{ @rc = ("-$_" => 1) }
66
} split(/[\s,]/,$special);
68
# guess we're talking about MSA if any of the standard MSA names are used
69
if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
70
$outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
76
if( defined $input ) {
77
$in = new Bio::AlignIO(-format => $informat, -file => $input);
79
$in = new Bio::AlignIO(-format => $informat, -fh => \*ARGV);
83
die("Unknown MSA format to bioperl $informat\n");
87
$out = new Bio::AlignIO(-format => $outformat,
88
-file => ">$output", @extra);
90
# default to STDOUT for output
91
$out = new Bio::AlignIO(-format => $outformat,@extra);
95
die("Unknown MSA format to bioperl $outformat\n");
97
while( my $aln = $in->next_aln) {
98
if( $special =~ /flat/ ) {$aln->set_displayname_flat(1); }
99
$out->write_aln($aln) }
103
if( defined $input ) {
104
$in = new Bio::SeqIO(-format => $informat, -file => $input);
106
$in = new Bio::SeqIO(-format => $informat, -fh => \*ARGV);
110
if( $@ =~ /Could not open/ ) {
111
die("Could not open input file: $input\n");
113
die("Unknown sequence format to bioperl $informat\n");
118
$out = new Bio::SeqIO(-format => $outformat,
119
-file => ">$output");
121
# default to STDOUT for output
122
$out = new Bio::SeqIO(-format => $outformat);
126
if( $@ =~ /Could not open/ ) {
127
die("Could not open output file: $output\n");
129
die("Unknown sequence format to bioperl $outformat: $@\n");
132
while( my $seq = $in->next_seq ) {
133
$out->write_seq($seq);
139
bpsreformat - convert sequence formats
143
This script uses the SeqIO system that allows conversion of sequence
144
formats either sequence data or multiple sequence alignment data. The
145
name comes from the fact that Sean Eddy's program sreformat (part of
146
the HMMER pkg) already does this. Sean's program tries to guess the
147
input formats while in our code we currently require your to specify what
148
the input and output formats are and if the data is from a multiple
149
sequence alignment or from straight sequence files.
153
bpsreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
155
-h/--help Print this help
157
-if/--informat Specify the input format
159
-of/--outformat Specify the output format
161
-i/--input Specify the input file name
162
(to pass in data on STDIN use minus sign as filename)
163
-o/--output Specify the output file name
164
(to pass data out on STDOUT use minus sign as filename)
166
--msa Specify this is multiple sequence alignment data
168
--special Will pass on special parameters to the AlignIO/SeqIO
169
object -- most of these are for Bio::AlignIO objects
170
Comma separated list of the following
171
nointerleaved -- for phylip,non-interleaved format
172
idlinebreak -- for phylip, makes it molphy format
173
percentages -- for clustalw, show % id per line