2
# Author: Jason Stajich <jason-at-bioperl-dot-org>
3
# Purpose: Bioperl implementation of Sean Eddy's sreformat
4
# We're not as clever as Sean's squid library though so
5
# you have to specify the input format rather than letting
6
# the application guess.
13
my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
15
-h/--help Print this help
16
-if/--informat Specify the input format
17
-of/--outformat Specify the output format
18
-i/--input Specify the input file name
19
(to pass in data on STDIN use minus sign as filename)
20
-o/--output Specify the output file name
21
(to pass data out on STDOUT use minus sign as filename)
22
--msa Specify this is multiple sequence alignment data
23
--special=specialparams Specify special params supported by some formats
24
Comma or space separated please.
26
nointerleaved -- for phylip,non-interleaved format
27
idlinebreak -- for phylip, makes it molphy format
28
percentages -- for clustalw, show % id per line
29
flat -- don't show start-end in seqid
30
linelength -- line length for clustalw
31
mrbayes -- for MrBayes proper NEXUS output
35
my ($input,$output,$informat,$outformat,$msa,$special);
38
'h|help' => sub { print STDERR ($USAGE); exit(0) },
39
'i|input:s' => \$input,
40
'o|output:s' => \$output,
41
'if|informat:s' => \$informat,
42
'of|outformat:s' => \$outformat,
44
's|special:s' => \$special,
47
unless( defined $informat && defined $outformat ) {
48
die(sprintf("Cannot proceed without a defined input and output you gave (%s,%s)\n",
49
defined $informat ? $informat : "''" ,
50
defined $outformat ? $outformat : "''"));
56
@extra = map { my @rc;
57
if( /nointerleaved/) {
58
@rc = ('-interleaved' => '0');
59
} elsif( /mrbayes/ ) {
60
@rc = ('-show_symbols' => 0,
61
'-show_endblock' => 0);
62
} elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) }
63
else{ @rc = ("-$_" => 1) }
65
} split(/[\s,]/,$special);
67
# guess we're talking about MSA if any of the standard MSA names are used
68
if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
69
$outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
75
if( defined $input ) {
76
$in = new Bio::AlignIO(-format => $informat, -file => $input);
78
$in = new Bio::AlignIO(-format => $informat, -fh => \*ARGV);
82
die("Unknown MSA format to bioperl $informat\n");
86
$out = new Bio::AlignIO(-format => $outformat,
87
-file => ">$output", @extra);
89
# default to STDOUT for output
90
$out = new Bio::AlignIO(-format => $outformat,@extra);
94
die("Unknown MSA format to bioperl $outformat\n");
96
while( my $aln = $in->next_aln) {
97
if( $special =~ /flat/ ) {$aln->set_displayname_flat(1); }
98
$out->write_aln($aln) }
102
if( defined $input ) {
103
$in = new Bio::SeqIO(-format => $informat, -file => $input);
105
$in = new Bio::SeqIO(-format => $informat, -fh => \*ARGV);
109
if( $@ =~ /Could not open/ ) {
110
die("Could not open input file: $input\n");
112
die("Unknown sequence format to bioperl $informat\n");
117
$out = new Bio::SeqIO(-format => $outformat,
118
-file => ">$output");
120
# default to STDOUT for output
121
$out = new Bio::SeqIO(-format => $outformat);
125
if( $@ =~ /Could not open/ ) {
126
die("Could not open output file: $output\n");
128
die("Unknown sequence format to bioperl $outformat: $@\n");
131
while( my $seq = $in->next_seq ) {
132
$out->write_seq($seq);
138
bpsreformat - convert sequence formats
142
This script uses the SeqIO system that allows conversion of sequence
143
formats either sequence data or multiple sequence alignment data. The
144
name comes from the fact that Sean Eddy's program sreformat (part of
145
the HMMER pkg) already does this. Sean's program tries to guess the
146
input formats while in our code we currently require your to specify what
147
the input and output formats are and if the data is from a multiple
148
sequence alignment or from straight sequence files.
152
bpsreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
154
-h/--help Print this help
156
-if/--informat Specify the input format
158
-of/--outformat Specify the output format
160
-i/--input Specify the input file name
161
(to pass in data on STDIN use minus sign as filename)
162
-o/--output Specify the output file name
163
(to pass data out on STDOUT use minus sign as filename)
165
--msa Specify this is multiple sequence alignment data
167
--special Will pass on special parameters to the AlignIO/SeqIO
168
object -- most of these are for Bio::AlignIO objects
169
Comma separated list of the following
170
nointerleaved -- for phylip,non-interleaved format
171
idlinebreak -- for phylip, makes it molphy format
172
percentages -- for clustalw, show % id per line