5
# random sequence generator #
6
# -c=1 option will cause prot sequences to be built
7
# using vertebrate aa frequencies,
8
# with option -a putting a 1st methionine residues on. Frequencies are
9
# calculated from the NCBI human RefSeq protein sequences
10
# -c and -a only affect protein sequences
11
# -a only works in conjunction with -c
12
# -n number of random sequences, default = 1
17
my ($length,$type,$filename,$comp,$met);
19
$USAGE = 'usage: generate_random_seq.pl --length=1000 --type=dna --filename=/tmp/test.seq --number=50';
21
my %alphabets = ( 'dna' => [qw(C A G T)],
22
'rna' => [qw(C A G U)],
23
'prot'=> [qw( A C D E F G H I K L M N P Q R S T V W Y)],
25
# make random num from 1-10000. numbers in this array reflect the frequency,
26
# e.g., a random number from 1.744 = A, 745-991 = C etc;
27
my @aa_frequencies = qw(744 991 1398 2017 2378 3104 3349 3726 4239 5273 5443
28
5749 6410 6848 7455 8263 8760 9340 9488 9713 10000);
33
'l|length:s' => \$length,
34
't|type|m|alphabet:s' => \$type,
35
'f|file|filename:s' => \$filename,
36
'c|composition:s' => \$comp,
37
'a|methionine:s' => \$met,
38
'n|number:s' => \$number
41
assert ( $type && defined ($alphabets{lc $type}),
43
assert ( $length && $length =~ /^\d+$/, $USAGE );
45
foreach my $num (1..$number) {
47
my $alphabet = $alphabets{lc $type};
48
my $sspace = scalar @$alphabet;
49
if (!$comp || $type ne 'prot') {
50
foreach ( 1..$length ) {
51
$sequence .= $alphabet->[ int rand($sspace) ];
53
}elsif ($type eq 'prot') {
54
$sequence = build_seq($length, \@aa_frequencies);
56
my $seq = Bio::PrimarySeq->new(-seq => $sequence,
57
-display_id => 'randomseq'.$num);
58
my %args = (-format => 'fasta');
59
if( $filename ) { $args{-file} = ">>$filename" }
60
my $seqio = Bio::SeqIO->new(%args);
61
$seqio->write_seq($seq);
64
sub assert { die $_[1] unless( $_[0] ); }
67
#takes seqlen and ref to frequency data as parameters
69
my $str = ($met)?'M':'';
72
my $aa = int(rand (10000)) ;
74
while ($pf->[$j] < $aa && $j <19) {
77
$str .= $alphabets{'prot'}[$j];
79
print "str is $str\n";
5
# random sequence generator #
6
# -c=1 option will cause prot sequences to be built
7
# using vertebrate aa frequencies,
8
# with option -a putting a 1st methionine residues on. Frequencies are
9
# calculated from the NCBI human RefSeq protein sequences
10
# -c and -a only affect protein sequences
11
# -a only works in conjunction with -c
12
# -n number of random sequences, default = 1
17
my ($length,$type,$filename,$comp,$met);
19
$USAGE = 'usage: generate_random_seq.pl --length=1000 --type=dna --filename=/tmp/test.seq --number=50';
21
my %alphabets = ( 'dna' => [qw(C A G T)],
22
'rna' => [qw(C A G U)],
23
'prot'=> [qw( A C D E F G H I K L M N P Q R S T V W Y)],
25
# make random num from 1-10000. numbers in this array reflect the frequency,
26
# e.g., a random number from 1.744 = A, 745-991 = C etc;
27
my @aa_frequencies = qw(744 991 1398 2017 2378 3104 3349 3726 4239 5273 5443
28
5749 6410 6848 7455 8263 8760 9340 9488 9713 10000);
33
'l|length:s' => \$length,
34
't|type|m|alphabet:s' => \$type,
35
'f|file|filename:s' => \$filename,
36
'c|composition:s' => \$comp,
37
'a|methionine:s' => \$met,
38
'n|number:s' => \$number
41
assert ( $type && defined ($alphabets{lc $type}),
43
assert ( $length && $length =~ /^\d+$/, $USAGE );
45
foreach my $num (1..$number) {
47
my $alphabet = $alphabets{lc $type};
48
my $sspace = scalar @$alphabet;
49
if (!$comp || $type ne 'prot') {
50
foreach ( 1..$length ) {
51
$sequence .= $alphabet->[ int rand($sspace) ];
53
}elsif ($type eq 'prot') {
54
$sequence = build_seq($length, \@aa_frequencies);
56
my $seq = Bio::PrimarySeq->new(-seq => $sequence,
57
-display_id => 'randomseq'.$num);
58
my %args = (-format => 'fasta');
59
if( $filename ) { $args{-file} = ">>$filename" }
60
my $seqio = Bio::SeqIO->new(%args);
61
$seqio->write_seq($seq);
64
sub assert { die $_[1] unless( $_[0] ); }
67
#takes seqlen and ref to frequency data as parameters
69
my $str = ($met)?'M':'';
72
my $aa = int(rand (10000)) ;
74
while ($pf->[$j] < $aa && $j <19) {
77
$str .= $alphabets{'prot'}[$j];
79
print "str is $str\n";