1
# BioPerl module for Bio::Graphics::Pictogram
3
# Cared for by Shawn Hoon <shawnh@fugu-sg.org>
7
# You may distribute this module under the same terms as perl itself
9
# POD documentation - main docs before the code
13
Bio::Graphics::Pictogram - generate SVG output of Pictogram display for consensus motifs
17
use Bio::Graphics::Pictogram;
20
my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
22
while(my $seq = $sio->next_seq){
26
my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
40
my $svg = $picto->make_svg(\@seq);
42
print $svg->xmlify."\n";
44
#Support for Bio::Matrix::PSM::SiteMatrix now included
46
use Bio::Matrix::PSM::IO;
48
my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
62
my $psm = $psmIO->next_psm;
63
my $svg = $picto->make_svg($psm);
69
A module for generating SVG output of Pictogram display for consensus
70
motifs. This method of representation was describe by Burge and
71
colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
72
525-560, CSHL press, 1999)
74
This is a simple module that takes in an array of sequences (assuming
75
equal lengths) and calculates relative base frequencies where the
76
height of each letter reflects the frequency of each nucleotide at a
77
given position. It can also plot the information content at each
78
position scaled by the background frequencies of each nucleotide.
80
It requires the SVG-2.26 or later module by Ronan Oger available at
83
Recommended viewing of the SVG is the plugin available at Adobe:
84
http://www.adobe.com/svg
91
User feedback is an integral part of the evolution of this and other
92
Bioperl modules. Send your comments and suggestions preferably to one
93
of the Bioperl mailing lists. Your participation is much appreciated.
95
bioperl-l@bioperl.org - General discussion
96
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
100
Report bugs to the Bioperl bug tracking system to help us keep track
101
the bugs and their resolution. Bug reports can be submitted via the
104
http://bugzilla.open-bio.org/
106
=head1 AUTHOR - Shawn Hoon
108
Email shawnh@fugu-sg.org
114
The rest of the documentation details each of the object
115
methods. Internal methods are usually preceded with a "_".
119
package Bio::Graphics::Pictogram;
123
use base qw(Bio::Root::Root);
125
use constant MAXBITS => 2;
130
Usage : my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
143
Function: Constructor for Pictogram Object
144
Returns : L<Bio::Graphics::Pictogram>
149
my ($caller,@args) = @_;
150
my $self = $caller->SUPER::new(@args);
151
my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
154
my $svg = SVG->new(width=>$width,height=>$height);
155
$self->svg_obj($svg);
157
$self->fontsize($fontsize) if $fontsize;
158
$color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
159
$self->color($color);
160
$background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
161
$self->background($background);
162
$self->plot_bits($bit) if $bit;
163
$self->normalize($normalize) if $normalize;
171
Usage : $picto->make_svg();
172
Function: make the SVG object
174
Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI>
179
my ($self,$input) = @_;
180
my $fontsize = $self->fontsize;
181
my $size = $fontsize * 0.75;
183
my $height= $size+40;
184
my $color = $self->color;
186
#starting x coordinate for pictogram
188
my $pos_y = $size * 2;
189
my $bit_y = $pos_y+40;
194
#input can be file or array ref of sequences
195
if(ref($input) eq 'ARRAY'){
196
@pwm = @{$self->_make_pwm($input)};
198
elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
199
@pwm = $self->_make_pwm_from_site_matrix($input);
202
my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
204
while (my $seq = $sio->next_seq){
207
@pwm = @{$self->_make_pwm(\@seq)};
211
my $svg = $self->svg_obj;
212
my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
215
#scale the svg if length greater than svg width
216
if($seq_length > $svg->{-document}->{'width'}){
217
my $ratio = $svg->{-document}->{'width'}/($seq_length);
218
$seq_grp = $svg->group(transform=>"scale($ratio,1)");
221
$seq_grp= $svg->group();
224
#do the drawing, each set is a base position
225
foreach my $set(@pwm){
226
my ($A,$C,$G,$T,$bits) = @$set;
228
push @array, ['a',($A)];
229
push @array, ['g',($G)];
230
push @array, ['c',($C)];
231
push @array, ['t',($T)];
232
@array = sort {$b->[1]<=>$a->[1]}@array;
234
my $pos_group = $seq_grp->group(id=>"bp $bp");
238
#draw each letter at each position
239
foreach my $letter(@array){
241
if($self->normalize){
242
$scale = $letter->[1];
244
$scale = $letter->[1] * ($bits / MAXBITS);
248
if($self->normalize){
251
$y_trans = (1 - ($bits / MAXBITS)) * $size;
255
$y_trans += $prev_size;
257
$pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
258
'width'=>$width,x=>$x,y=>$size,
259
'transform'=>"translate(0,$y_trans),scale(1,$scale)",
260
'style'=>{"font-size"=>$fontsize,
261
'text-anchor'=>'middle',
262
'font-family'=>'Verdana',
263
'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
265
$prev_size = $scale * $size;
268
#plot the bit if required
269
if($self->plot_bits){
270
$seq_grp->text('x'=>$x,
272
'style'=>{"font-size"=>'10',
273
'text-anchor'=>'middle',
274
'font-family'=>'Verdana',
275
'fill'=>'black'})->cdata($bits);
282
$seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
284
$seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
286
#plot the base positions
287
$x = 45+$size/2-int($width/2);
288
foreach my $nbr(1..($bp-1)){
289
$seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
294
# $seq_grp->transform("scale(2,2)");
296
return $self->svg_obj($svg);
299
sub _make_pwm_from_site_matrix{
300
my ($self,$matrix) = @_;
301
my $bgd = $self->background;
303
my $consensus = $matrix->consensus;
304
foreach my $i(1..length($consensus)){
305
my %base = $matrix->next_pos;
307
$bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
308
$bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
309
$bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
310
$bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
311
push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
317
my ($self,$input) = @_;
320
my $bgd = $self->background;
321
#sum up the frequencies at each base pair
322
foreach my $seq(@$input){
323
my $string = $seq->seq;
324
$string = uc $string;
325
my @motif = split('',$string);
327
foreach my $t(@motif){
334
#calculate relative freq
337
#decrement last count
339
foreach my $pos(sort{$a<=>$b} keys %hash){
341
push @array,($hash{$pos}{'A'}||0)/$count;
342
push @array,($hash{$pos}{'C'}||0)/$count;
343
push @array,($hash{$pos}{'G'}||0)/$count;
344
push @array,($hash{$pos}{'T'}||0)/$count;
347
# relative entropy (RelEnt) or Kullback-Liebler distance
348
# relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
349
# gk the background frequency of nucleotide k
352
$bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
353
$bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
354
$bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
355
$bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
356
push @array, abs(sprintf("%.3f",$bits));
360
return $self->pwm(\@pwm);
369
Usage : $picto->fontsize();
370
Function: get/set for fontsize
377
my ($self,$obj) = @_;
379
$self->{'_fontsize'} = $obj;
381
return $self->{'_fontsize'};
387
Usage : $picto->color();
388
Function: get/set for color
389
Returns : a hash reference
390
Arguments: a hash reference
395
my ($self,$obj) = @_;
397
$self->{'_color'} = $obj;
399
return $self->{'_color'};
405
Usage : $picto->svg_obj();
406
Function: get/set for svg_obj
413
my ($self,$obj) = @_;
415
$self->{'_svg_obj'} = $obj;
417
return $self->{'_svg_obj'};
423
Usage : $picto->plot_bits();
424
Function: get/set for plot_bits to indicate whether to plot
425
information content at each base position
432
my ($self,$obj) = @_;
434
$self->{'_plot_bits'} = $obj;
436
return $self->{'_plot_bits'};
442
Usage : $picto->normalize($newval)
443
Function: get/set to make all columns the same height.
444
default is to scale height with information
446
Returns : value of normalize (a scalar)
447
Args : on set, new value (a scalar or undef, optional)
455
return $self->{'normalize'} = shift if @_;
456
return $self->{'normalize'};
462
Usage : $picto->background();
463
Function: get/set for hash reference of nucleodtide bgd frequencies
464
Returns : hash reference
465
Arguments: hash reference
470
my ($self,$obj) = @_;
472
$self->{'_background'} = $obj;
474
return $self->{'_background'};
480
Usage : $picto->pwm();
481
Function: get/set for pwm
488
my ($self,$pwm) = @_;
490
$self->{'_pwm'} = $pwm;
492
return $self->{'_pwm'};
495
#utility method for returning log 2
499
return log($val)/log(2);