2
# Assume we already have two 10,000-base DNA sequences who are reverse
3
# complement to each other, generate random 100-base reads from these two
4
# sequences. The two sequences are expected to be in file "rando.seq" and
5
# "random_revcom.seq", as generated by the "gen_randome_sequence.pl" script.
6
# The created reads will be saved in file "random.fa" in the FASTA format.
7
# "random_revcom.seq" respectively.
11
my $sequence_length = 100000;
12
my $read_length = 100;
13
my $seq_file = "random.seq";
14
my $seq_revcom_file = "random_revcom.seq";
15
my $fasta_file = "random.fa";
25
# Check sequence_length argument
29
$sequence_length = $ARGV[0];
32
open SEQFILE, "$seq_file" or die $!;
34
print length($seq) . " bases are read from $seq_file.\n";
37
open SEQFILE, "$seq_revcom_file" or die $!;
38
$seq_revcom = <SEQFILE>;
39
print length($seq_revcom) . " bases are read from $seq_revcom_file.\n";
43
open FASTAFILE, ">$fasta_file" or die $!;
44
$num_of_reads = ($sequence_length / $read_length) * 10;
45
for($i = 0; $i < $num_of_reads; $i++) {
46
$random_number = int(rand($sequence_length - $read_length + 1));
47
$kmer = substr($seq, $random_number, $read_length);
48
print FASTAFILE ">$random_number\n$kmer\n";
51
for($i = 0; $i < $num_of_reads; $i++) {
52
$random_number = int(rand($sequence_length - $read_length + 1));
53
$kmer = substr($seq_revcom, $random_number, $read_length);
54
print FASTAFILE ">rc$random_number\n$kmer\n";
58
$num_of_reads = $num_of_reads * 2;
59
print "$num_of_reads random reads are generated successfully!\n";