5
use enum qw(:u_ refmethod refsource refgroup refseq refstart refstop refscore refstrand refphase qrystart qrystop sizes starts);
6
use enum qw(:v_ refmethod refsource refgroup refseq refstrand refscore refphase txstart txstop cdsstart cdsstop exonstarts exonstops);
8
use enum qw(:all_bacends__ x matches misMatches repMatches nCount qNumInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
9
use enum qw(:all_est__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
10
use enum qw(:all_mrna__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
11
use enum qw(:all_sts_primer__ matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
12
use enum qw(:all_sts_seq__ matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
13
use enum qw(:bacEndPairs__ bin chrom chromStart chromEnd name score strand pslTable lfCount lfStarts lfSizes lfNames);
14
use enum qw(:blatFish__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
15
use enum qw(:gap__ bin chrom chromStart chromEnd ix n size type bridge);
16
use enum qw(:gl__ bin frag start end strand);
17
use enum qw(:gold__ bin chrom chromStart chromEnd ix type frag fragStart fragEnd strand);
18
use enum qw(:intronEst__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
19
use enum qw(:mrna__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
20
use enum qw(:rmsk__ bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEnd repLeft id);
21
use enum qw(:clonePos__ name seqSize phase chrom chromStart chromEnd stage faFile);
22
use enum qw(:ctgPos__ contig size chrom chromStart chromEnd);
23
use enum qw(:cytoBand__ chrom chromStart chromEnd name gieStain);
24
use enum qw(:fishClones__ chrom chromStart chromEnd name score placeCount bandStarts bandEnds labs placeType accCount accNames stsCount stsNames beCount beNames);
25
use enum qw(:gcPercent__ chrom chromStart chromEnd name gcPpt);
26
use enum qw(:genscan__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
27
use enum qw(:genscanSubopt__ bin chrom chromStart chromEnd name score strand);
28
use enum qw(:jaxOrtholog__ humanSymbol humanBand mgiId mouseSymbol mouseChr mouseCm mouseBand);
29
use enum qw(:refGene__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
30
use enum qw(:refLink__ name product mrnaAcc protAcc geneName prodName locusLinkID omimId);
31
use enum qw(:refSeqAli__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
32
use enum qw(:simpleRepeat__ bin chrom chromStart chromEnd name period copyNum consensusSize perMatch perIndel score A C G T entropy sequence);
33
use enum qw(:stsAlias__ alias identNo trueName);
34
use enum qw(:stsInfo__ identNo name gbCount genbank gdbCount gdb nameCount otherNames dbSTSid otherDbstsCount otherDbSTS leftPrimer rightPrimer distance organism sequence otherUCSCcount otherUCSC mergeUCSCcount mergeUCSC genethonName genethonChr genethonPos genethonLOD marshfieldName marshfieldChr marshfieldPos marshfieldLOD wiyacName wiyacChr wiyacPos wiyacLOD wirhName wirhChr wirhPos wirhLOD gm99gb4Name gm99gb4Chr gm99gb4Pos gm99gb4LOD gm99g3Name gm99g3Chr gm99g3Pos gm99g3LOD tngName tngChr tngPos tngLOD);
35
use enum qw(:stsMap__ chrom chromStart chromEnd name score identNo ctgAcc otherAcc genethonChrom genethonPos marshfieldChrom marshfieldPos gm99Gb4Chrom gm99Gb4Pos shgcTngChrom shgcTngPos shgcG3Chrom shgcG3Pos wiYacChrom wiYacPos wiRhChrom wiRhPos fishChrom beginBand endBand lab);
36
use enum qw(:uniGene_2__ bin chrom chromStart chromEnd name score strand txStart txEnd reserved exonCount exonStarts exonEnds);
37
###############################################
39
###############################################
42
my %nolandmark = map {$_=>1} qw(gap cpgIsland recombRate_decode recombRate_marshfield recombRate_genethon
43
humMusL zoom1_humMusL zoom50_humMusL zoom2500_humMusL
44
genscanSubopt simpleRepeat snpNih snpTsc
47
foreach my $filename (@ARGV){
48
my $newfilename = $filename;
49
$newfilename =~ s/txt\.gz/gff/;
50
open(my $fhi, "zcat $filename |");
51
open(my $fho, ">$newfilename");
53
while(my $line = <$fhi>){
55
#these three should work the same way as unigene, but the fields are different order
56
# $filename =~ /affyRatio/ ? toGFF($line,$fho,['affyRatio', '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
57
# $filename =~ /nci60/ ? toGFF($line,$fho,['nci60', '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
58
# $filename =~ /rnaCluster/ ? toGFF($line,$fho,['rnaCluster', '', 4, 1, 2, 3, 5, 6,-1, 7, 8,11,12]) :
60
#these two are not yet handled
61
# $filename =~ /cpgIsland/ ? toGFF($line,$fho,['cpgIsland', '', 3, 0, 1, 2,-1,-1,-1]) :
62
# $filename =~ /estOrientInfo/ ? toGFF($line,$fho,['estOrientInfo', '',]) :
65
$filename =~ /uniGene_2/ ? toGFF($line,$fho,['uniGene_2', '', 4, 1, 2, 3, 5, 6,-1,-1,-1,11,12]) :
67
$filename =~ /all_bacends/ ? toGFF($line,$fho,['bacends', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
68
$filename =~ /all_est/ ? toGFF($line,$fho,['est', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
69
$filename =~ /all_mrna/ ? toGFF($line,$fho,['mrna', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
70
$filename =~ /all_sts_primer/ ? toGFF($line,$fho,['sts_primer', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
71
$filename =~ /all_sts_seq/ ? toGFF($line,$fho,['sts_seq', '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
72
$filename =~ /blastzBestMouse/ ? toGFF($line,$fho,['blastzBestMouse', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
73
$filename =~ /blastzMm2/ ? toGFF($line,$fho,['blastzMm2', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
74
$filename =~ /blastzTightMouse/ ? toGFF($line,$fho,['blastzTightMouse', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
75
$filename =~ /blatFish/ ? toGFF($line,$fho,['blatFish', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
76
$filename =~ /chimpBac/ ? toGFF($line,$fho,['chimpBac', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
77
$filename =~ /chimpBlat/ ? toGFF($line,$fho,['chimpBlat', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
78
$filename =~ /clonePos/ ? toGFF($line,$fho,['clonePos', '', 0, 3, 4, 5,-1,-1, 2]) :
79
$filename =~ /ctgPos/ ? toGFF($line,$fho,['ctgPos', '', 0, 2, 3, 4,-1,-1,-1]) :
80
$filename =~ /cytoBand/ ? toGFF($line,$fho,['cytoBand', '', 3, 0, 1, 2,-1,-1,-1]) :
81
$filename =~ /est/ ? toGFF($line,$fho,['est', '',10,14,16,17,-1,9,-1,12,13,19,21]) :
82
$filename =~ /fishClones/ ? toGFF($line,$fho,['fishClones', '', 3, 0, 1, 2, 4,-1,-1]) :
83
$filename =~ /gap/ ? toGFF($line,$fho,['gap', '', 7, 1, 2, 3,-1,-1,-1]) :
84
$filename =~ /gcPercent/ ? toGFF($line,$fho,['gcPercent', '', 3, 0, 1, 2, 4,-1,-1]) :
85
$filename =~ /genMapDb/ ? toGFF($line,$fho,['genMapDb', '', 3, 0, 1, 2, 4, 5,-1]) :
86
$filename =~ /genscanSubopt/ ? toGFF($line,$fho,['genscanSubopt', '', 4, 1, 2, 3, 5, 6,-1]) :
87
$filename =~ /gold/ ? toGFF($line,$fho,['gold', '', 6, 1, 2, 3,-1, 9,-1, 7, 8]) :
88
$filename =~ /intronEst/ ? toGFF($line,$fho,['intron_est', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
89
$filename =~ /recombRate/ ? eval { toGFF($line,$fho,['recombRate_decode', '', 3, 0, 1, 2, 4,-1,-1]);
90
toGFF($line,$fho,['recombRate_marshfield', '', 3, 0, 1, 2, 7,-1,-1]);
91
toGFF($line,$fho,['recombRate_genethon', '', 3, 0, 1, 2,10,-1,-1]); } :
93
$filename =~ /refSeqAli/ ? toGFF($line,$fho,['refSeqAli', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
94
$filename =~ /rmsk/ ? toGFF($line,$fho,['rmsk', '',10, 5, 6, 7,-1, 9,-1,13,14]) :
95
$filename =~ /simpleRepeat/ ? toGFF($line,$fho,['simpleRepeat', '', 4, 1, 2, 3,10,-1,-1]) :
96
$filename =~ /snpNih/ ? toGFF($line,$fho,['snpNih', '', 4, 1, 2, 3]) :
97
$filename =~ /snpTsc/ ? toGFF($line,$fho,['snpTsc', '', 4, 1, 2, 3]) :
98
$filename =~ /stsMap/ ? toGFF($line,$fho,['stsMap', '', 3, 0, 1, 2, 4,-1,-1]) :
99
$filename =~ /xenoEst/ ? toGFF($line,$fho,['xenoEst', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
100
$filename =~ /xenoMrna/ ? toGFF($line,$fho,['xenoMrna', '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
101
$filename =~ /zoom1_humMusL/ ? toGFF($line,$fho,['zoom1_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
102
$filename =~ /zoom2500_humMusL/ ? toGFF($line,$fho,['zoom2500_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
103
$filename =~ /zoom50_humMusL/ ? toGFF($line,$fho,['zoom50_humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
104
$filename =~ /humMusL/ ? toGFF($line,$fho,['humMusL', '', 4, 1, 2, 3, 5, 6,-1]) :
106
$filename =~ /refGene/ ? toGFF2($line,$fho,['refGene', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
107
$filename =~ /genscan/ ? toGFF2($line,$fho,['genscan', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
108
$filename =~ /acembly/ ? toGFF2($line,$fho,['acembly', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
109
$filename =~ /ensGene/ ? toGFF2($line,$fho,['ensGene', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
110
$filename =~ /refFlat/ ? toGFF2($line,$fho,['refFlat', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
111
$filename =~ /sanger22pseudo/ ? toGFF2($line,$fho,['sanger22pseudo', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
112
$filename =~ /sanger22/ ? toGFF2($line,$fho,['sanger22', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
113
$filename =~ /softberryGene/ ? toGFF2($line,$fho,['softberryGene', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
114
$filename =~ /twinscan/ ? toGFF2($line,$fho,['twinscan', '', 0, 1, 2,-1,-1, 3, 4, 5, 6, 8, 9]) :
123
###############################################
124
# begin filetype-specific subroutines
125
###############################################
128
my($line,$fho,$maps) = @_;
130
chomp $line; my @fields = split /\t/, $line;
132
if(!$nolandmark{render($maps->[v_refmethod],\@fields)}){
133
print join "\t", map {render($maps->[$_],\@fields)} (v_refseq,
142
print "Sequence " . render($maps->[v_refgroup],\@fields);
146
if(!$nolandmark{render($maps->[v_refmethod],\@fields)}){
147
print join "\t", map {render($maps->[$_],\@fields)} (v_refseq,
156
print "CDS " . render($maps->[v_refgroup],\@fields);
160
if(defined($maps->[v_exonstarts]) and defined($maps->[v_exonstops])){
161
my @starts = split /,/, render($maps->[v_exonstarts],\@fields);
162
my @stops = split /,/, render($maps->[v_exonstops],\@fields);
164
while(my $start = shift @starts){
165
my $stop = shift @stops;
166
print join "\t", (render($maps->[v_refseq],\@fields),
167
render($maps->[v_refsource],\@fields),
168
render($maps->[v_refmethod],\@fields),
171
render($maps->[v_refscore],\@fields),
172
render($maps->[v_refstrand],\@fields),
173
render($maps->[v_refphase],\@fields),
174
render($maps->[v_refmethod],\@fields) . " " . render($maps->[v_refgroup],\@fields)
182
my($line,$fho,$maps) = @_;
184
chomp $line; my @fields = split /\t/, $line;
186
if(!$maps->[u_qrystart] and !$nolandmark{render($maps->[u_refmethod],\@fields)}){
187
print join "\t", map {render($maps->[$_],\@fields)} (u_refseq,
196
print "Sequence " . render($maps->[u_refgroup],\@fields);
199
print join "\t", map {render($maps->[$_],\@fields)} (u_refseq,
208
if($maps->[u_qrystart] >= 0){
209
print "Target:" . render($maps->[u_refmethod],\@fields) . " ";
210
print render($maps->[u_refgroup],\@fields) . " " .
211
render($maps->[u_qrystart],\@fields) . " " .
212
render($maps->[u_qrystop], \@fields);
214
print "Sequence " . render($maps->[u_refgroup],\@fields) . " ";
218
if(defined($maps->[u_starts]) and defined($maps->[u_sizes])){
219
my @starts = split /,/, render($maps->[u_starts],\@fields);
220
my @sizes = split /,/, render($maps->[u_sizes],\@fields);
223
while(defined($start = shift @starts)){
224
my $size = shift @sizes;
226
if($maps->[u_qrystart] < 1 and $maps->[u_qrystop] < 1){
227
print join "\t", (render($maps->[u_refseq],\@fields),
228
render($maps->[u_refsource],\@fields),
229
render($maps->[u_refmethod],\@fields),
230
render($maps->[u_refstart],\@fields) + $start,
231
render($maps->[u_refstart],\@fields) + $start + $size,
232
render($maps->[u_refscore],\@fields),
233
render($maps->[u_refstrand],\@fields),
234
render($maps->[u_refphase],\@fields),
235
render($maps->[u_refmethod],\@fields) . " " . render($maps->[u_refgroup],\@fields)
238
print join "\t", (render($maps->[u_refseq],\@fields),
239
render($maps->[u_refsource],\@fields),
240
render($maps->[u_refmethod],\@fields),
243
render($maps->[u_refscore],\@fields),
244
render($maps->[u_refstrand],\@fields),
245
render($maps->[u_refphase],\@fields),
246
render($maps->[u_refmethod],\@fields) . " " . render($maps->[u_refgroup],\@fields)
254
my($index,$fields) = @_;
255
return '.' if $index == -1;
256
return $index unless $index =~ /^\d+$/;
257
return $fields->[$index];