2
## Bioperl Test Harness Script for Modules
3
## $Id: SimpleAlign.t,v 1.43.2.1 2006/10/02 23:10:40 sendu Exp $
5
use constant NUMTESTS => 75;
7
$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
10
eval { require Test::More; };
16
plan tests => NUMTESTS;
19
use_ok('Bio::SimpleAlign');
20
use_ok('Bio::AlignIO');
21
use_ok('Bio::Root::IO');
23
my ($str, $aln, @seqs, $seq);
25
$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
26
"t","data","testaln.pfam"));
27
isa_ok($str,'Bio::AlignIO');
28
$aln = $str->next_aln();
29
is $aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246',
32
my $aln1 = $aln->remove_columns(['mismatch']);
33
is($aln1->match_line, '::*::::*:**:*:*:***:**.***::*.*::**::**:***..**:'.
34
'*:*.::::*:.:*.*.**:***.**:*.:.**::**.*:***********:::*:.:*:**.*::*:'.
35
'.*.:*:**:****************::', 'match_line');
37
my $aln2 = $aln->select(1,3);
38
isa_ok($aln2, 'Bio::Align::AlignI');
39
is($aln2->no_sequences, 3, 'no_sequences');
41
# test select non continous
42
$aln2 = $aln->select_noncont(8,2,7);
43
is($aln2->no_sequences, 3, 'no_sequences');
44
is($aln2->get_seq_by_pos(2)->id, $aln->get_seq_by_pos(7)->id, 'get+seq_by_pos');
46
@seqs = $aln->each_seq();
47
is scalar @seqs, 16, 'each_seq';
48
is $seqs[0]->get_nse, '1433_LYCES/9-246', 'get_nse';
49
is $seqs[0]->id, '1433_LYCES', 'id';
50
is $seqs[0]->no_gaps, 3, 'no_gaps';
51
@seqs = $aln->each_alphabetically();
52
is scalar @seqs, 16, 'each_alphabetically';
54
is $aln->column_from_residue_number('1433_LYCES', 10), 2, 'column_from_residue_number';
55
is $aln->displayname('1433_LYCES/9-246', 'my_seq'), 'my_seq', 'display_name get/set';
56
is $aln->displayname('1433_LYCES/9-246'), 'my_seq', 'display_name get';
57
is substr ($aln->consensus_string(50), 0, 60),
58
"RE??VY?AKLAEQAERYEEMV??MK?VAE??????ELSVEERNLLSVAYKNVIGARRASW", 'consensus_string';
59
is substr ($aln->consensus_string(100), 0, 60),
60
"?????????L????E????M???M????????????L??E?RNL?SV?YKN??G??R??W", 'consensus_string';
61
is substr ($aln->consensus_string(0), 0, 60),
62
"REDLVYLAKLAEQAERYEEMVEFMKKVAELGAPAEELSVEERNLLSVAYKNVIGARRASW", 'consensus_string';
64
ok(@seqs = $aln->each_seq_with_id('143T_HUMAN'));
65
is scalar @seqs, 1, 'each_seq_with_id';
67
is $aln->is_flush, 1,'is_flush';
68
ok($aln->id('x') && $aln->id eq 'x','id get/set');
70
is $aln->length, 242, 'length';
71
is $aln->no_residues, 3769, 'no_residues';
72
is $aln->no_sequences, 16, 'no_sequences';
73
is (sprintf("%.2f",$aln->overall_percentage_identity()), 33.06, 'overall_percentage_identity');
74
is (sprintf("%.2f",$aln->average_percentage_identity()), 66.91, 'overall_percentage_identity');
76
ok $aln->set_displayname_count;
77
is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES_1', 'set_displayname_count';
78
ok $aln->set_displayname_flat;
79
is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES', 'set_displayname_flat';
80
ok $aln->set_displayname_normal;
81
is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES/9-246', 'set_displayname_normal';
83
ok $aln->map_chars('\.','-');
84
@seqs = $aln->each_seq_with_id('143T_HUMAN');
85
is substr($seqs[0]->seq, 0, 60),
86
'KTELIQKAKLAEQAERYDDMATCMKAVTEQGA---ELSNEERNLLSVAYKNVVGGRRSAW', 'uppercase, map_chars';
88
is($aln->match_line, ' ::*::::* : * *: *: *:***:**.***::*.'.
89
' *::**::**:*** . . ** :* :* . :: :: *: . :* .*. **:'.
90
'***.** :*. : .* * : : **.*:***********:::* : .: * :** .'.
91
'*::*: .*. : *: **:****************:: ', 'match_line');
92
ok $aln->remove_seq($seqs[0]),'remove_seqs';
93
is $aln->no_sequences, 15, 'remove_seqs';
94
ok $aln->add_seq($seqs[0]), 'add_seq';
95
is $aln->no_sequences, 16, 'add_seq';
96
ok $seq = $aln->get_seq_by_pos(1), 'get_seq_by_pos';
97
is( $seq->id, '1433_LYCES', 'get_seq_by_pos');
98
ok (($aln->missing_char(), 'P') and ($aln->missing_char('X'), 'X')) ;
99
ok (($aln->match_char(), '.') and ($aln->match_char('-'), '-')) ;
100
ok (($aln->gap_char(), '-') and ($aln->gap_char('.'), '.')) ;
102
is $aln->purge(0.7), 12, 'purge';
103
is $aln->no_sequences, 4, 'purge';
106
eval { require IO::String };
107
skip("IO::String not installed. Skipping tests.\n", 24) if $@;
110
my $out = IO::String->new($string);
112
my $s1 = new Bio::LocatableSeq (-id => 'AAA',
113
-seq => 'aawtat-tn-',
118
my $s2 = new Bio::LocatableSeq (-id => 'BBB',
119
-seq => '-aaaat-tt-',
124
$a = new Bio::SimpleAlign;
128
is ($a->consensus_iupac, "aAWWAT-TN-", 'IO::String consensus_iupac');
129
$s1->seq('aaaaattttt');
130
$s1->alphabet('dna');
132
$s2->seq('-aaaatttt-');
134
$a = new Bio::SimpleAlign;
138
my $strout = Bio::AlignIO->new(-fh => $out, -format => 'pfam');
139
$strout->write_aln($a);
141
"AAA/1-10 aaaaattttt\n".
142
"BBB/1-8 -aaaatttt-\n",
143
'IO::String write_aln normal');
147
my $b = $a->slice(2,9);
148
$strout->write_aln($b);
150
"AAA/2-9 aaaatttt\n".
151
"BBB/1-8 aaaatttt\n",
152
'IO::String write_aln slice';
156
$b = $a->slice(9,10);
157
$strout->write_aln($b);
161
'IO::String write_aln slice';
167
$strout->write_aln($b);
171
'IO::String write_aln slice';
173
# not sure what coordinates this should return...
177
$b = $a->slice(1,1,1);
178
$strout->write_aln($b);
182
'IO::String write_aln slice';
188
$strout->write_aln($b);
192
'IO::String write_aln slice';
195
$b = $a->slice(11,13);
200
# remove_columns by position
203
$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
204
"t","data","mini-align.aln"));
205
$aln1 = $str->next_aln;
206
$aln2 = $aln1->remove_columns([0,0]);
207
$strout->write_aln($aln2);
209
"P84139/1-33 NEGEHQIKLDELFEKLLRARLIFKNKDVLRRC\n".
210
"P814153/1-33 NEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC\n".
211
"BAB68554/1-14 ------------------AMLIFKDKQLLQQC\n".
212
"gb|443893|124775/1-32 MRFRFQIKVPPAVEGARPALLIFKSRPELGGC\n",
213
'remove_columns by position';
215
# and when arguments are entered in "wrong order"?
218
my $aln3 = $aln1->remove_columns([1,1],[30,30],[5,6]);
219
$strout->write_aln($aln3);
221
"P84139/1-33 MEGEIKLDELFEKLLRARLIFKNKDVLRC\n".
222
"P814153/1-33 MEGMIKLDVLFEKLLRARLIFKNKDVLRC\n".
223
"BAB68554/1-14 ----------------AMLIFKDKQLLQC\n".
224
"gb|443893|124775/1-32 -RFRIKVPPAVEGARPALLIFKSRPELGC\n",
225
'remove_columns by position (wrong order)';
227
my %cigars = $aln1->cigar_line;
228
is $cigars{'gb|443893|124775/1-32'},'19,19:21,24:29,29:32,32','cigar_line';
229
is $cigars{'P814153/1-33'},'20,20:22,25:30,30:33,33','cigar_line';
230
is $cigars{'BAB68554/1-14'},'1,1:3,6:11,11:14,14','cigar_line';
231
is $cigars{'P84139/1-33'},'20,20:22,25:30,30:33,33','cigar_line';
234
# sort_alphabetically
235
my $s3 = new Bio::LocatableSeq (-id => 'ABB',
236
-seq => '-attat-tt-',
243
is $a->get_seq_by_pos(2)->id,"BBB", 'sort_alphabetically - before';
244
ok $a->sort_alphabetically;
245
is $a->get_seq_by_pos(2)->id,"ABB", 'sort_alphabetically - after';
247
$b = $a->remove_gaps();
248
is $b->consensus_string, "aaaattt", 'remove_gaps';
250
$s1->seq('aaaaattt--');
252
$b = $a->remove_gaps(undef, 'all_gaps_only');
253
is $b->consensus_string, "aaaaatttt",'remove_gaps all_gaps_only';
255
# test set_new_reference:
256
$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
257
"t","data","testaln.aln"));
258
$aln=$str->next_aln();
259
my $new_aln=$aln->set_new_reference(3);
260
$a=$new_aln->get_seq_by_pos(1)->display_id;
261
$new_aln=$aln->set_new_reference('P851414');
262
$b=$new_aln->get_seq_by_pos(1)->display_id;
263
is $a, 'P851414','set_new_reference';
264
is $b, 'P851414','set_new_reference';
267
$str = Bio::AlignIO->new(-verbose => $DEBUG,
268
-file=> Bio::Root::IO->catfile(
269
"t","data","testaln2.fasta"));
270
$aln=$str->next_aln();
271
$new_aln=$aln->uniq_seq();
272
$a=$new_aln->no_sequences;
273
is $a, 11,'uniq_seq';
275
# check if slice works well with a LocateableSeq in its negative strand
276
my $seq1 = Bio::LocatableSeq->new(
277
-SEQ => "ATGCTG-ATG",
284
my $seq2 = Bio::LocatableSeq->new(
285
-SEQ => "A-GCTGCATG",
293
my $aln_negative = Bio::SimpleAlign->new();
294
$aln_negative->add_seq($seq1);
295
$aln_negative->add_seq($seq2);
297
$aln_negative->column_from_residue_number($aln_negative->get_seq_by_pos(1)->display_id,2);
299
$aln_negative->column_from_residue_number($aln_negative->get_seq_by_pos(1)->display_id,5);
300
$aln_negative = $aln_negative->slice($end_column,$start_column);
301
my $seq_negative = $aln_negative->get_seq_by_pos(1);
302
is($seq_negative->start,2,"bug 2099");
303
is($seq_negative->end,5,"bug 2099");
b'\\ No newline at end of file'