11
A CGI script using the Bio::Tools::SiRNA package to design RNAi reagents.
12
Retrieves sequences from NCBI and generates output in graphic and tabular form.
16
To use this script, place it in an appropriate cgi-bin directory on a web server.
17
The script needs to write its graphic maps to a temporary directory. Please update
18
$TMPDIR and $TMPURL to suit your local configuation.
22
Donald Jackson (donald.jackson@bms.com)
26
L<Bio::Tools::SIRNA>, L<Bio::Graphics::Panel>, L<Bio::DB::NCBIHelper>, L<CGI>
30
use Bio::Tools::SiRNA;
32
use Bio::Graphics::Panel;
33
use Bio::DB::NCBIHelper;
34
use Bio::Seq::RichSeq; # for hand-entry
35
use Bio::SeqFeature::Generic;
41
use CGI::Carp qw (fatalsToBrowser carpout);
47
# define a bunch of constants
48
my %COLORRANKS = ( 1 => 'red',
51
my $TMPDIR = '/var/www/htdocs/tmp/';
53
my $ATGPAD = 75; # how far from start do we wait?
56
my $log = $TMPDIR . 'RNAiFinder.log';
57
open (LOG, ">>$log") or die $!;
63
print $q->h1('RNAi Finder');
66
if ($q->param('Design')) {
67
if ($q->param('accession') and !$q->param('seq')) {
68
$target = get_target();
71
$target = make_target();
82
<P>Oligos are designed as described on the <A HREF="http://www.mpibpc.gwdg.de/abteilungen/100/105/sirna.html" TARGET="Tuschl">Tuschl lab web page</A> and are ranked as follows:
84
<LI><B>New:</B> Selecting 'Pol3-compatible targets' looks for oligos with the
85
pattern NAR(N17)YNN which can be synthesized or expressed from a Pol3 promoter.
86
<br>This selection <b>overrides</b> the 'Cutoff' rank.
87
<LI>Oligos with Rank = 1 (best) match the AAN(19)TT rule.
88
<LI>Oligos with Rank = 2 match the AAN(21) rule
89
<LI>Oligos with Rank = 3 match the NAN(21) rule.
91
<P>If percent GC and specificity are similar, Rank 1 oligos are better. All 3 prime overhangs are converted to TT; the rest of the sequence is transcribed into RNA</P>
93
<h3>Modifications to published rules:</h3>
96
Runs of 3 or more consecutive Gs on either strand are skipped - these can cause problems in synthesis.
97
<li>Users may choose to exclude oligos that overlap single nucleotide polymorphisms (ON by default). SNP data comes from the NCBI dbSNP database.
98
<li>'Low-complexity' regions (such as runs of a single nucleotide) are also excluded.
103
print $q->start_form;
104
print $q->h2('Enter your sequence and other parameters:'), "\n";
105
print $q->p('The values already here are DEFAULTS - you should change them to suit YOUR sequence');
106
print $q->start_table();
107
print $q->TR( $q->td({-align=> 'left'},
109
$q->textfield(-name => 'mingc', -default => '0.40'),
110
$q->textfield(-name => 'maxgc', -default => '0.60'),
113
$q->td({-align=> 'left'},
114
$q->popup_menu(-name => 'worstrank',
119
$q->checkbox(-name => 'pol3',
120
-label => 'Pol3 compatible',
125
print $q->TR( $q->th({-align=> 'left'}, 'Exclude oligos with SNPs?'),
126
$q->td($q->radio_group(-name => 'avoid_snps',
129
-labels => {1 => 'Yes', 0 => 'No'}
133
print $q->TR( $q->th({-align=> 'left'}, 'Sequence Name:'),
134
$q->td({-align=> 'left'},$q->textfield('accession')),
135
$q->td({-align=> 'left'},
136
$q->em( q(Enter an accession and you won't have to enter the <br>sequence or start/stop. Use accessions beginning with NM_ if possible.))),
139
print $q->TR( $q->th({-align=> 'left'}, ['Position of initiator ATG:',
140
'NT after start to exclude:',
141
'Position of Stop codon:' ]));
142
print $q->TR( $q->td({-align=> 'left'},
143
[$q->textfield(-name => 'cdstart', -default => 1),
144
$q->textfield(-name => 'atgpad', -default => $ATGPAD),
145
$q->textfield('cdend'), ]));
146
print $q->TR( $q->th({-align=> 'left'}, ['Minimum Fraction GC:',
147
'Maximum Fraction GC:',
150
print $q->TR($q->th({-align=> 'left', -colspan=>2},'cDNA Sequence in plain text or FASTA format'),
151
$q->td( $q->a({-href =>'Fasta_format.html', -target => 'Fasta_desc'}, 'What is FASTA format?')),
153
print $q->TR($q->td({-align => 'left', -colspan=>3},
154
$q->textarea( -name =>'seq',
159
print $q->TR( $q->th({-align => 'left', -colspan=>3},
160
'Output options: '));
161
print $q->TR( $q->td({-align=> 'left'},
162
[ $q->checkbox(-name => 'Graphic', -checked => 'checked'),
163
$q->checkbox(-name => 'Table', -checked => 'checked'),
165
print $q->TR($q->td({-align=> 'left', -colspan=>3}, $q->submit('Design')));
166
print $q->end_table();
172
# design and output RNAi reagents
175
my $factory = Bio::Tools::SiRNA->new( -target => $gene,
177
-cutoff => $q->param('worstrank') || 2,
178
-avoid_snps => $q->param('avoid_snps') || 1,
179
-min_gc => $q->param('min_gc') || 0.40,
180
-max_gc => $q->param('max_gc') || 0.60,
181
-pol3 => $q->param('pol3') || 0,
184
print $q->p('Designing Pol3-compatible oligos') if ($q->param('pol3'));
186
my @pairs = $factory->design;
188
draw_gene($gene) if ($q->param('Graphic'));
189
print_table($gene->accession, \@pairs) if ($q->param('Table'));
190
print_text($gene->accession, \@pairs) if ($q->param('Text'));
194
my ($acc) = $q->param('accession');
195
my $gb = Bio::DB::NCBIHelper->new();
196
my $seq = $gb->get_Seq_by_acc($acc);
202
print_error("Unable to retrieve sequence from GenBank using accession $acc");
209
# sanity chex - do we have the necessary info?
210
$q->param('seq') or print_error("Please supply a sequence", 1);
211
my $seq = $q->param('seq');
214
# is sequence in fasta format?
216
my ($head, $realseq) = split (/\n/, $seq, 2);
219
$realseq =~ s/[\n|\r|\s]//g;
222
elsif ($q->param('accession')) {
223
$name = $q->param('accession');
224
$seq =~ s/[\n|\r|\s]//g;
227
print_error('Please supply a sequence name!');
231
$cds_start = $q->param('cds_start') || 1;
232
$cds_end = $q->param('cds_end') || length($seq);
234
# create a new Bio::Seq::RichSeq object from parameters
235
my $seqobj = Bio::Seq::RichSeq->new( -seq => $seq,
236
-accession_number => $name,
240
my $cds = Bio::SeqFeature::Generic->new( -start => $cds_start,
243
$cds->primary_tag('CDS');
244
$seqobj->add_SeqFeature($cds);
249
# now draw a pretty picture
252
my $panel = Bio::Graphics::Panel->new( -segment => $gene,
258
-fontcolor => 'black',
259
-fontcolor2 => 'black',
260
-key_color => 'white',
262
-key_style => 'between',
263
#-gridcolor => 'lightgray',
266
my $genefeat = Bio::SeqFeature::Generic->new( -start => 1,
267
-end => $gene->length);
269
$panel->add_track( arrow => $genefeat,
277
foreach $feat($gene->top_SeqFeatures) {
278
$feature_classes{ $feat->primary_tag } ||= [];
280
push(@{ $feature_classes{ $feat->primary_tag } }, $feat);
283
# for some reason, Bio::Graphics insists on drawing subfeatures for SiRNA::Pair objects...
284
$cleanpairs = cleanup_feature($feature_classes{'SiRNA::Pair'});
287
$panel->add_track( transcript => $feature_classes{'gene'},
290
-fontcolor2 => 'black',
294
-label => \&feature_label,
298
$panel->add_track( transcript2 => $feature_classes{'CDS'},
300
-fontcolor2 => 'black',
305
-label => \&feature_label,
306
-description => \&feature_desc,
309
$panel->add_track( $feature_classes{'variation'},
312
-fontcolor2 => 'black',
316
-label => \&snp_label,
317
#-glyph => 'triangle',
319
-description => \&feature_desc,
322
$panel->add_track( generic => $feature_classes{'Excluded'},
323
-bgcolor => 'silver',
325
-fontcolor => 'black',
326
-fontcolor2 => 'black',
327
-key => 'Excluded Regions',
330
-label => \&feature_label,
331
-description => \&feature_desc,
335
generic => $cleanpairs,
336
-bgcolor => \&feature_color,
337
-fgcolor => \&feature_color,
338
-fontcolor => 'black',
339
-fontcolor2 => 'black',
340
-key => 'SiRNA Reagents',
343
-label => \&feature_label,
345
-description => \&feature_desc,
349
my $black = $gd->colorAllocate(0,0,0);
350
my $txt = GD::Text::Align->new($gd);
351
$txt->set( valign => 'center', align => 'center', color => $black);
352
#$txt->set_font(['/usr/share/fonts/truetype/VERDANA.TTF',gdGiantFont ], 10);
353
$txt->set_font(gdGiantFont);
354
$txt->set_text("RNAi Reagents for ".$gene->accession );
355
$txt->draw(200, 50, 0);
357
my $pngfile = $TMPDIR . $gene->accession . '.png';
358
my $pngurl = $TMPURL . $gene->accession . '.png';
359
open (IMG, ">$pngfile") or die $!;
364
# also get the imagemap boxes
365
my @pairboxes = extract_pairs($panel->boxes);
367
print $q->img({-src => $pngurl, -usemap=>"#MAP"});
368
print $q->p('Oligos are color coded: rank 1 in ',
369
$q->font({-color => $COLORRANKS{1}}, $COLORRANKS{1}),
371
$q->font({-color => $COLORRANKS{2}}, $COLORRANKS{2}),
373
$q->font({-color => $COLORRANKS{3}}, $COLORRANKS{3}),
374
'. Click on an oligo to bring it up in the table below');
376
print_imagemap(@pairboxes);
383
#$label = ucfirst($feature->primary_tag);
384
foreach (qw(note name product gene)) {
385
if ($feature->has_tag($_)) {
386
@notes = $feature->each_tag_value($_);
387
#$label .= ': ' . $notes[0];
388
push(@label, $notes[0]);
392
return join(': ', @label);
398
my ($rank) = $feature->each_tag_value('rank');
399
#print STDERR "Feature rank: $rank COLOR $COLORRANKS{$rank}\n";
400
return $COLORRANKS{$rank};
406
my ($accession, $pairs) = @_;
408
print $q->h2("RNAi Reagents for $accession");
409
print $q->start_table({-border => 1, -cellpadding => 2});
410
print $q->TR( $q->th(['Reagent #', 'Start', 'Stop', 'Rank', 'Fxn GC', 'Sense Oligo', 'Antisense Oligo', 'Target' ]) ), "\n";
415
foreach $pair ( sort { $a->start <=> $b->start } @$pairs ) {
416
my $sense = $pair->sense;
417
my $anti = $pair->antisense;
418
my $color = feature_color($pair);
420
# my $blasturl = "http://nunu.hpw.pri.bms.com/biocgi/versablast.pl?p=blastn&sequence=";
421
# $blasturl .= $pair->seq->seq;
422
# $blasturl .= "&action=Nucleotide Databases";
425
$q->TR( $q->td( [ $q->a({-name => 'RNAi' . $pair->start}) . $i,
428
$q->font({-color => $color},$pair->rank),
432
$q->tt($pair->seq->seq),
433
# $q->a({-href=>$blasturl,
434
# -target=>"blastn"},
435
# "BLAST this target"),
448
my ($accession, $pairs ) = @_;
451
print "RNAi reagents for $accession \n";
453
print join("\t", qw(Start Stop Rank Sense Antisense)), "\n";
454
foreach $pair (@$pairs ) {
455
my $sense = $pair->sense;
456
my $anti = $pair->antisense;
458
print join("\t", $pair->start, $pair->end, $pair->rank, $sense->seq, $anti->seq), "\n";
466
sub cleanup_feature {
469
my ($feat, @clean, $cfeat);
471
foreach $feat(@$flist) {
472
$cfeat = clone($feat);
473
# $cfeat = $feat->clone;
474
$cfeat->flush_sub_SeqFeature;
475
push (@clean, $cfeat); # will they
482
# get SiRNA::Pair features ONLY for imagemap
483
return ( grep {ref($_->[0]) eq "Bio::SeqFeature::SiRNA::Pair"} @_ );
489
print q(<MAP NAME="MAP">), "\n";
493
foreach $item (@items) {
494
my ($feature, $x1, $y1, $x2, $y2) = @$item;
495
my $fstart = $feature->start; # should be unique
496
my $text = 'RNAi #' . $i. ' Start=' . $feature->start . ' Rank='.$feature->rank;
497
print qq(<AREA SHAPE="RECT" COORDS="$x1,$y1,$x2,$y2" TITLE="$text" HREF="#RNAi$fstart">), "\n";
498
warn "Mouseover text: $text\n";
507
# print error messages in big red type. Provide more graceful die/warn to end user
508
my ($msg, $fatal) = @_;
509
print $q->h3($q->font({-color=>'RED'}, $msg));
523
foreach ($q->param) {
525
$q->ul($q->li([ $q->param($_) ]));
530
# special format for SNPs
534
if ( $feature->has_tag('db_xref') ) {
535
my @notes = $feature->each_tag_value('db_xref');
539
if ( $feature->has_tag('allele') ) {
540
my ($nt1, $nt2) = $feature->each_tag_value('allele');
541
$label .= $nt1 . '->' . $nt2;
548
my $desc = $feature->start;
549
$desc .= '-' . $feature->end unless ($feature->start == $feature->end);