2
Accessing UCSC Table Browser.
4
__docformat__ = 'javadoc en'
10
from interPopula import Config
12
ftp_site = 'hgdownload.cse.ucsc.edu'
13
ftp_root = 'goldenPath'
15
def prepareDataDir(db):
16
tableDir = Config.dataDir + os.sep + 'TableBrowser'
20
pass #Probably OK, dir already exists
21
dbConn = sqlite3.connect(tableDir + os.sep + db + '.db')
23
dbConn.execute('''CREATE TABLE known_gene (
25
chromosome VARCHAR(2),
33
dbConn.execute('CREATE INDEX kg_asc_id ON known_gene(asc_id)')
34
dbConn.execute('CREATE INDEX kg_prot_id ON known_gene(prot_id)')
35
dbConn.execute('''CREATE TABLE gene_exons (
40
dbConn.execute('CREATE INDEX ge_asc_id ON gene_exons(asc_id)')
41
except sqlite3.OperationalError:
46
def loadFile(db, dir, file, unzip = False):
47
tableDir = Config.dataDir + os.sep + 'TableBrowser'
48
tempTBFile = tableDir + os.sep + 'tmp_tb'
49
tempTBFileGz = tempTBFile + '.gz'
52
ftpDir = ftp_root + '/' + db + '/' + dir
53
fname = ftpDir + '/' + file
56
ftp.retrbinary('RETR ' + fname, open(tempTBFileGz, 'wb').write)
57
gz = gzip.open(tempTBFileGz, 'rb')
58
uncomp = open(tempTBFile, 'w')
66
ftp.retrbinary('RETR ' + fname, open(tempTBFile, 'wb').write)
70
def countLines(dbConn, table):
72
c.execute('SELECT count(*) FROM ' + table)
78
'''Wrapper class for KnownGene table
80
def __init__(self, db):
81
'''Inits the object. NOTE: will load the database, if not loaded!
83
@param db UCSC table, like hg18 or bosTau1
85
self.dbConn = prepareDataDir(db)
89
'''Loads the database (also does cleanup).
92
fName = loadFile(self.db, 'database', 'knownGene.txt.gz' , True)
95
c = self.dbConn.cursor()
96
c.execute('DELETE FROM known_gene')
97
c.execute('DELETE FROM gene_exons')
99
toks = l.rstrip().split('\t')
103
txStart = int(toks[3])
105
cdsStart = int(toks[5])
106
cdsEnd = int(toks[6])
107
numExons = int(toks[7])
108
exonsStart = toks[8].split(',')
109
exonsEnd = toks[9].split(',')
112
INSERT INTO known_gene (
113
asc_id, chromosome, strand, tx_start, tx_end,
114
cds_start, cds_end, prot_id)
115
VALUES (?,?,?,?,?,?,?,?)''',
116
(ascId, chr, strand, txStart, txEnd,
117
cdsStart, cdsEnd, protId))
118
for i in range(len(exonsStart)):
119
exonStart = exonsStart[i]
120
if exonStart == '': continue
121
exonEnd = exonsEnd[i]
122
print 'es', exonStart
124
INSERT INTO gene_exons (asc_id, start, finish)
126
''', (ascId, exonStart, exonEnd))
132
"""Cleans the database.
134
Deletes all data from gene_exons and known_gene.
136
self.dbConn.execute('DELETE FROM gene_exons')
137
self.dbConn.execute('DELETE FROM known_gene')
140
'''Closes the database.
145
def getAscIdsFromProtId(self, protId):
146
'''Returns a list of ascIds for a certain protId.
148
@param protId Protein Id, like P53_HUMAN.
150
@return List of ascIds.
152
c = self.dbConn.cursor()
156
WHERE prot_id = ?''', (protId,))
159
asc_list.append(asc[0])
162
def getAscId(self, ascId):
163
'''Gets all info (except exons) for a certain ascId.
166
@return a tuple with all data.
168
c = self.dbConn.cursor()
172
WHERE asc_id = ?''', (ascId,))