3
# script to run genscan on all nucleotide sequences in a fasta file
4
# and save results as fasta, creates <file>.gs.pept and <file>.gs.cds
8
use Bio::Tools::Genscan;
12
# directory with GENSCAN matrices
13
my $genscanDir = "/dbs/genscan";
15
my $binDir = "/usr/local/bin";
17
my $matrix = "HumanIso.smat";
21
GetOptions( "f|file=s" => \$file );
22
usage() if ( !$file );
24
# create output files for predicted protein and CDS
25
open PEPT, ">>$file.gs.pept" or croak "Error opening $file.gs.pept: $!\n";
26
open DNA, ">>$file.gs.cds" or croak "Error opening $file.gs.cds: $!\n";
28
$in = Bio::SeqIO->new(-file => $file , -format => 'Fasta');
30
while ( my $seq = $in->next_seq() ) {
31
croak "Input sequence is protein\n" if ( $seq->moltype eq 'protein' );
34
$id = $1 if ( $id =~ /gi\|(\d+)/ );
35
# create temp file, input to GENSCAN
36
open OUT,">$id.temp.fa" or croak "Error opening $id.temp.fa: $!\n";
37
print OUT ">$id.fa\n$str\n\n";
39
# the contents of the *raw file will be parsed
40
system "genscan $genscanDir/$matrix $id.temp.fa -cds > $id.gs.raw";
42
my $genscan = Bio::Tools::Genscan->new( -file => "$id.gs.raw");
43
while ( my $gene = $genscan->next_prediction() ) {
44
my $prt = $gene->predicted_protein;
45
my $cds = $gene->predicted_cds;
48
$cds->display_id =~ /predicted_(CDS_\d+)/;
49
print DNA ">" . $id . "_" . $1 . " " . $cds->display_id . "\n"
53
$prt->display_id =~ /predicted_(peptide_\d+)/;
54
print PEPT ">" . $id . "_" . $1 . " " . $prt->display_id . "\n"
66
Function : run genscan on all nucleotide sequences in a multiple fasta file
67
Output : <file>.gs.pept and <file>.gs.cds