1
Name "gt encseq encode|decode simple"
2
Keywords "gt_encseq_encode encseq gt_encseq_decode"
4
run "#{$bin}gt encseq encode #{$testdata}foobar.fas"
5
run "#{$bin}gt encseq decode foobar.fas"
6
run "diff #{last_stdout} #{$testdata}foobar.fas"
9
Name "gt encseq encode multiple files without indexname"
10
Keywords "encseq gt_encseq_encode"
12
run "#{$bin}gt encseq encode #{$testdata}foobar.fas"
13
run_test "#{$bin}gt encseq encode #{$testdata}foobar.fas " + \
14
"#{$testdata}foobar.fas", :retval => 1
15
grep(last_stderr, /if more than one input file is given/)
18
Name "gt encseq decode lossless without ois"
19
Keywords "encseq gt_encseq_decode lossless"
21
run "#{$bin}gt encseq encode #{$testdata}foobar.fas"
22
run_test "#{$bin}gt encseq decode -lossless foobar.fas", \
24
grep(last_stderr, /cannot open file.*ois/)
27
STDREADMODES = ["fwd", "rev"]
28
DNAREADMODES = STDREADMODES + ["cpl", "rcl"]
29
DNATESTSEQS = ["#{$testdata}foobar.fas",
30
"#{$testdata}gt_bioseq_succ_3.fas",
31
"#{$testdata}at100K1"]
32
AATESTSEQS = ["#{$testdata}trembl-eqlen.faa"]
40
seq.tr("aAcCgGtTnNrRyYmMkKwWsSbBdDhHvV","tTgGcCaAnNyYrRkKmMwWsSvVhHdDbB")
44
seq.downcase.tr("rRyYmMkKwWsSbBdDhHvV", "nNnNnNnNnNnNnNnNnNnN")
47
def getseq(filename, mirrored = false, rm = "fwd")
50
File.open(filename) do |file|
51
seqs = file.read.split(">")
54
seqarr = seq.split(/\n/)
55
header = seqarr.shift.chomp
56
seq = seqarr.collect{|v| v.chomp}.join('')
58
rcseqs.push(es_revcomp(seq))
61
ret = sequences.join("|")
63
ret += "|" + rcseqs.reverse.join("|")
75
raise "unknown readmode"
80
def run_encseq_comparison(filename, mirrored, lossless, readmode, singlechars,
81
numsamples = NUMSAMPLES)
82
seq = getseq(filename, mirrored, readmode)
85
len = rand((seq.length)/2)
86
start = rand(seq.length-1-len);
88
ranges.push([start, stop])
92
line = "#{$bin}gt encseq decode -output concat " + \
93
"-range #{rng[0]} #{rng[1]} " + \
94
"#{"-lossless" if lossless} " + \
95
"-dir #{readmode} #{"-mirrored" if mirrored} " + \
96
"#{"-singlechars" if singlechars} #{filename.split('/').last}"
97
if mirrored and AATESTSEQS.include?(filename)
98
# -mirroring should fail on proteins
99
run_test(line, :retval => 1)
102
File.open("seqout", "w+") do |f|
103
outseq = seq[rng[0]..rng[1]]
104
if DNATESTSEQS.include?(filename) and !lossless then
105
outseq = mapwildcards(outseq)
110
run "diff seqout #{last_stdout}"
115
def testformirrored(s, readmode)
116
Name "gt encseq decode #{s.split('/').last} #{readmode}"
117
Keywords "encseq gt_encseq_decode mirroring lossless"
119
[false, true].each do |lossless|
121
run "#{$bin}gt encseq encode " + \
122
"#{"-lossless" if lossless} " + \
124
[false, true].each do |mirrored|
125
[false, true].each do |singlechars|
126
run_encseq_comparison(s, mirrored, lossless, readmode, singlechars)
127
seq = getseq(s, mirrored, readmode)
128
line = "#{$bin}gt encseq decode -output concat -dir #{readmode} " + \
129
"#{"-mirrored" if mirrored} " + \
130
"#{"-lossless" if lossless} " + \
131
"#{"-singlechars" if singlechars} ./#{s.split('/').last}"
132
if mirrored and AATESTSEQS.include?(s)
133
# -mirroring should fail on proteins
134
run_test(line, :retval => 1)
137
File.open("seqout", "w+") do |f|
138
if DNATESTSEQS.include?(s) and !lossless then
139
seq = mapwildcards(seq)
144
run "diff seqout #{last_stdout}"
152
DNATESTSEQS.each do |s|
153
DNAREADMODES.each do |readmode|
154
testformirrored(s, readmode)
158
AATESTSEQS.each do |s|
159
STDREADMODES.each do |readmode|
160
testformirrored(s, readmode)
165
fastafiles = ["Atinsert.fna",
174
"trna_glutamine.fna",
177
"Arabidopsis-C99826.fna"]
178
genbankfiles = fastafiles.collect{ |f| f.gsub(".fna",".gbk") }
179
emblfiles = fastafiles.collect{ |f| f.gsub(".fna",".embl") }
181
[genbankfiles, emblfiles].each do |formatfiles|
182
formatfiles.each do |formatfile|
183
Name "gt sequence formats (#{formatfile})"
184
Keywords "gt_encseq formats"
186
fasta = formatfile.gsub(/\.[a-z]+$/, ".fna")
187
if File.exists?("#{$testdata}#{fasta}") then
188
run "cp #{$testdata}#{formatfile} infile"
189
run_test "#{$bin}gt encseq encode -v -indexname sfx infile"
190
run_test "#{$bin}gt encseq decode -output concat sfx > sfx.seq"
191
run_test "#{$bin}gt encseq info sfx > sfx.info"
192
run_test "#{$bin}gt encseq check sfx"
193
run "cp #{$testdata}#{fasta} infile"
194
run_test "#{$bin}gt encseq encode -v -indexname sfx infile"
195
run_test "#{$bin}gt encseq decode -output concat sfx > sfx2.seq"
196
run_test "#{$bin}gt encseq info sfx > sfx2.info"
197
run "diff sfx.seq sfx2.seq"
198
run "diff sfx.info sfx2.info"
204
Name "gt encseq mirrored trailing wildcard"
205
Keywords "encseq gt_encseq_encode wildcards mirror"
207
run "cp #{$testdata}wildcardatend.fna infile"
208
run_test "#{$bin}gt encseq encode infile"
209
run_test "#{$bin}gt encseq info -mirrored infile | grep range > mirr.info"
210
run "cp #{$testdata}wildcardatend_rev.fna infile"
211
run_test "#{$bin}gt encseq encode infile"
212
run_test "#{$bin}gt encseq info infile | grep range > rev.info"
213
run "diff mirr.info rev.info"
216
Name "gt encseq mirrored no trailing wildcard"
217
Keywords "encseq gt_encseq_encode wildcards mirror"
219
run "cp #{$testdata}nowildcardatend.fna infile"
220
run_test "#{$bin}gt encseq encode infile"
221
run_test "#{$bin}gt encseq info -mirrored infile | grep range > mirr.info"
222
run "cp #{$testdata}nowildcardatend_rev.fna infile"
223
run_test "#{$bin}gt encseq encode infile"
224
run_test "#{$bin}gt encseq info infile | grep range > rev.info"
225
run "diff mirr.info rev.info"
228
Name "gt encseq decode single sequence"
229
Keywords "encseq gt_encseq_decode single"
231
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
232
run_test "#{$bin}gt encseq decode -seq 3 foo"
233
run "diff #{last_stdout} #{$testdata}Atinsert_single_3.fna"
236
Name "gt encseq decode single sequence (reverse)"
237
Keywords "encseq gt_encseq_decode single"
239
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
240
run_test "#{$bin}gt encseq decode -dir rev -seq 17 foo"
241
run "diff #{last_stdout} #{$testdata}Atinsert_single_3_rev.fna"
244
Name "gt encseq decode single sequence (invalid seqnumber)"
245
Keywords "encseq gt_encseq_decode single"
247
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
248
run_test "#{$bin}gt encseq decode -seq 36 foo", :retval => 1
249
grep last_stderr, /exceeds/
252
Name "gt encseq decode single sequence (with -output concat)"
253
Keywords "encseq gt_encseq_decode single"
255
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
256
run_test "#{$bin}gt encseq decode -output concat -seq 36 foo", :retval => 1
257
grep last_stderr, /can only be used with the/
260
Name "gt encseq decode sequence range"
261
Keywords "encseq gt_encseq_decode seqrange"
263
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
264
run_test "#{$bin}gt encseq decode -seqrange 3 7 foo"
265
run "diff #{last_stdout} #{$testdata}Atinsert_seqrange_3-7.fna"
268
Name "gt encseq decode sequence range (reverse)"
269
Keywords "encseq gt_encseq_decode seqrange"
271
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
272
run_test "#{$bin}gt encseq decode -dir rev -seqrange 13 17 foo"
273
run "diff #{last_stdout} #{$testdata}Atinsert_seqrange_13-17_rev.fna"
276
Name "gt encseq decode sequence range (invalid range start)"
277
Keywords "encseq gt_encseq_decode seqrange"
279
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
280
run_test "#{$bin}gt encseq decode -seqrange 37 49 foo", :retval => 1
281
grep last_stderr, /exceeding/
284
Name "gt encseq decode sequence range (invalid range end)"
285
Keywords "encseq gt_encseq_decode seqrange"
287
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
288
run_test "#{$bin}gt encseq decode -seqrange 3 49 foo", :retval => 1
289
grep last_stderr, /exceeding/
292
Name "gt encseq decode sequence range (with -output concat)"
293
Keywords "encseq gt_encseq_decode seqrange"
295
run "#{$bin}gt encseq encode -indexname foo #{$testdata}Atinsert.fna"
296
run_test "#{$bin}gt encseq decode -output concat -seqrange 3 49 foo", \
298
grep last_stderr, /can only be used with the/
301
Name "gt encseq Lua bindings"
302
Keywords "encseq gt_scripts "
304
run_test "#{$bin}gt #{$testdata}gtscripts/encseq.lua #{$testdata}"
307
Name "gt encseq 64bit/32bit header (success)"
308
Keywords "encseq encseq_file_format"
310
is64 = Kernel.system("#{$bin}gt -64bit")
316
run_test "#{$bin}gt encseq info -noindexname #{$testdata}foo.#{bit}"
317
run "diff #{last_stdout} #{$testdata}foo.#{bit}.info_map"
318
run_test "#{$bin}gt encseq info -noindexname -nomap #{$testdata}foo.#{bit}"
319
run "diff #{last_stdout} #{$testdata}foo.#{bit}.info_nomap"
322
Name "gt encseq 64bit/32bit header (failure)"
323
Keywords "encseq encseq_file_format"
325
is64 = Kernel.system("#{$bin}gt -64bit")
331
run_test "#{$bin}gt encseq info -noindexname #{$testdata}foo.#{bit}", \
333
grep last_stderr, /please use correct index for this platform/
334
run_test "#{$bin}gt encseq info -noindexname -nomap #{$testdata}foo.#{bit}", \
336
grep last_stderr, /please use correct index for this platform/
339
Name "gt encseq incompatible file format version"
340
Keywords "encseq encseq_file_format"
342
is64 = Kernel.system("#{$bin}gt -64bit")
348
run_test "#{$bin}gt encseq info -noindexname #{$testdata}foo.#{bit}.ver0", \
350
grep last_stderr, /is format version 0/
351
run_test "#{$bin}gt encseq info -noindexname -nomap #{$testdata}foo.#{bit}.ver0", \
353
grep last_stderr, /is format version 0/