~ubuntu-branches/ubuntu/utopic/r-bioc-cummerbund/utopic

« back to all changes in this revision

Viewing changes to R/methods-CuffSet.R

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2013-12-28 17:17:25 UTC
  • mfrom: (1.1.2)
  • Revision ID: package-import@ubuntu.com-20131228171725-polmzo8go4m371c6
Tags: 2.4.1-1
* New upstream version
* debian/rules: Remove useless creation of ${R-Depends}
* debian/control: Versioned Build-Depends: r-base-dev (>= 3.0)
* debian/README.test: add hint how to test the package

Show diffs side-by-side

added added

removed removed

Lines of Context:
11
11
setMethod("initialize","CuffSet",
12
12
                function(.Object,
13
13
                                DB,
 
14
                                runInfo=data.frame(),
 
15
                                phenoData=data.frame(),
14
16
                                conditions=data.frame(),
15
17
                                genes,
16
18
                                isoforms,
22
24
                                ...){
23
25
                        .Object<-callNextMethod(.Object,
24
26
                                        DB = DB,
 
27
                                        runInfo=runInfo,
 
28
                                        phenoData=phenoData,
25
29
                                        conditions = conditions,
26
30
                                        genes = genes,
27
31
                                        isoforms = isoforms,
88
92
 
89
93
setMethod("samples",signature(object="CuffSet"),.samples)
90
94
 
 
95
.replicates<-function(object){
 
96
        replicateQuery<-"SELECT * FROM replicates r"
 
97
        dbGetQuery(object@DB,replicateQuery)
 
98
}
 
99
 
 
100
setMethod("replicates",signature(object="CuffSet"),.replicates)
 
101
 
91
102
setMethod("DB","CuffSet",function(object){
92
103
                return(object@DB)
93
104
                })
94
105
 
 
106
.runInfo<-function(object){
 
107
        runInfoQuery<-"SELECT * FROM runInfo"
 
108
        dbGetQuery(object@DB,runInfoQuery)
 
109
}
 
110
 
 
111
setMethod("runInfo","CuffSet",.runInfo)
 
112
 
 
113
#setMethod("phenoData","CuffSet",function(object){
 
114
#                       return(object@phenoData)
 
115
#               })
 
116
 
95
117
setMethod("conditions","CuffSet",function(object){
96
118
                return(object@conditions)
97
119
                })
124
146
                return(object@relCDS)
125
147
                })
126
148
 
 
149
.getGenome<-function(object){
 
150
        genomeQuery<-"SELECT value FROM runInfo WHERE param='genome'"
 
151
        genome<-dbGetQuery(object@DB,genomeQuery)
 
152
        genome<-unique(genome[,1])
 
153
        genome
 
154
}
127
155
 
128
156
#make CuffGene objects from a gene_ids
129
157
.getGene<-function(object,geneId,sampleIdList=NULL){
130
158
        
 
159
        #Get gene_id from geneId (can use any identifier now to select gene)
 
160
        geneId<-getGeneId(object,geneId)
 
161
        
 
162
        if(length(geneId)>1){
 
163
                stop("More than one gene_id found for given query. Please use getGenes() instead.")
 
164
        }
 
165
        
131
166
        #Sample subsetting
132
167
        if(!is.null(sampleIdList)){
133
168
                if(.checkSamples(object@DB,sampleIdList)){
150
185
        whereString = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",sep="")
151
186
        whereStringFPKM = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_name IN ',sampleString,')',sep="")
152
187
        whereStringDiff = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
 
188
        whereStringRep = paste("JOIN replicates r ON y.rep_name=r.rep_name WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (r.sample_name IN ',sampleString,')',sep="")
153
189
        
154
190
        #dbQueries
155
 
        geneAnnotationQuery<-paste("SELECT * from genes x ",whereString,sep="")
 
191
        geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING(gene_id) ",whereString,sep="")
156
192
        geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
157
193
        #print(geneFPKMQuery)
158
194
        geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ",whereStringDiff,sep="")
159
195
        #print(geneDiffQuery)
 
196
        geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y ON x.gene_id=y.gene_id ",whereStringRep,sep="")
 
197
        geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
 
198
        geneFeatureQuery<-paste("SELECT y.* FROM features y JOIN genes x on y.gene_id = x.gene_id ",whereString,sep="")
160
199
        
161
200
        isoformAnnotationQuery<-paste("SELECT * from isoforms i JOIN genes x ON i.gene_id = x.gene_id ",whereString,sep="")
162
201
        isoformFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
163
202
        isoformDiffQuery<-paste("SELECT y.* from isoforms i JOIN isoformExpDiffData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringDiff,sep="")
 
203
        isoformRepFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformReplicateData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringRep,sep="")
 
204
        isoformCountQuery<-paste("SELECT y.* from isoforms i JOIN isoformCount y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
164
205
        
165
206
        TSSAnnotationQuery<-paste("SELECT * from TSS t JOIN genes x ON t.gene_id = x.gene_id ",whereString,sep="")
166
207
        TSSFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
167
208
        TSSDiffQuery<-paste("SELECT y.* from TSS t JOIN TSSExpDiffData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringDiff,sep="")
 
209
        TSSRepFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSReplicateData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringRep,sep="")
 
210
        TSSCountQuery<-paste("SELECT y.* from TSS t JOIN TSSCount y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
 
211
        
168
212
        
169
213
        CDSAnnotationQuery<-paste("SELECT * from CDS c JOIN genes x ON c.gene_id = x.gene_id ",whereString,sep="")
170
214
        CDSFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
171
215
        CDSDiffQuery<-paste("SELECT y.* from CDS c JOIN CDSExpDiffData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringDiff,sep="")
 
216
        CDSRepFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSReplicateData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringRep,sep="")
 
217
        CDSCountQuery<-paste("SELECT y.* from CDS c JOIN CDSCount y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
172
218
        
173
219
        begin<-dbSendQuery(object@DB,"BEGIN;")
174
220
        
179
225
        genes.diff<-dbGetQuery(object@DB,geneDiffQuery)
180
226
        genes.diff$sample_1<-factor(genes.diff$sample_1,levels=myLevels)
181
227
        genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
 
228
        genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
 
229
        genes.count<-dbGetQuery(object@DB,geneCountQuery)
 
230
        genes.annotation<-dbGetQuery(object@DB,geneAnnotationQuery)
 
231
        genes.features<-dbGetQuery(object@DB,geneFeatureQuery)
182
232
        
183
233
        #isoforms
184
234
        isoform.fpkm<-dbGetQuery(object@DB,isoformFPKMQuery)
186
236
        isoform.diff<-dbGetQuery(object@DB,isoformDiffQuery)
187
237
        isoform.diff$sample_1<-factor(isoform.diff$sample_1,levels=myLevels)
188
238
        isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
 
239
        isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
 
240
        isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
 
241
        isoform.annotation<-dbGetQuery(object@DB,isoformAnnotationQuery)
189
242
        
190
243
        #CDS
191
244
        CDS.fpkm<-dbGetQuery(object@DB,CDSFPKMQuery)
193
246
        CDS.diff<-dbGetQuery(object@DB,CDSDiffQuery)
194
247
        CDS.diff$sample_1<-factor(CDS.diff$sample_1,levels=myLevels)
195
248
        CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
 
249
        CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
 
250
        CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
 
251
        CDS.annotation<-dbGetQuery(object@DB,CDSAnnotationQuery)
196
252
        
197
253
        #TSS
198
254
        TSS.fpkm<-dbGetQuery(object@DB,TSSFPKMQuery)
200
256
        TSS.diff<-dbGetQuery(object@DB,TSSDiffQuery)
201
257
        TSS.diff$sample_1<-factor(TSS.diff$sample_1,levels=myLevels)
202
258
        TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
203
 
        
 
259
        TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
 
260
        TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
 
261
        TSS.annotation<-dbGetQuery(object@DB,TSSAnnotationQuery)
 
262
        
 
263
        end<-dbSendQuery(object@DB,"END;")
 
264
        
 
265
        #write(.getGenome(object),stderr())
204
266
        res<-new("CuffGene",
205
267
                        id=geneId,
206
 
                        annotation=dbGetQuery(object@DB,geneAnnotationQuery),
 
268
                        features=genes.features,
 
269
                        annotation=genes.annotation,
207
270
                        fpkm=genes.fpkm,
208
271
                        diff=genes.diff,
 
272
                        repFpkm=genes.repFpkm,
 
273
                        count=genes.count,
 
274
                        genome=.getGenome(object),
209
275
                        isoforms=new("CuffFeature",
210
 
                                        annotation=dbGetQuery(object@DB,isoformAnnotationQuery),
 
276
                                        annotation=isoform.annotation,
211
277
                                        fpkm=isoform.fpkm,
212
 
                                        diff=isoform.diff
 
278
                                        diff=isoform.diff,
 
279
                                        repFpkm=isoform.repFpkm,
 
280
                                        count=isoform.count
213
281
                                        ),
214
282
                        TSS=new("CuffFeature",
215
 
                                        annotation=dbGetQuery(object@DB,TSSAnnotationQuery),
 
283
                                        annotation=TSS.annotation,
216
284
                                        fpkm=TSS.fpkm,
217
 
                                        diff=TSS.diff
218
 
                        ),
 
285
                                        diff=TSS.diff,
 
286
                                        repFpkm=TSS.repFpkm,
 
287
                                        count=TSS.count
 
288
                                        ),
219
289
                        CDS=new("CuffFeature",
220
 
                                        annotation=dbGetQuery(object@DB,CDSAnnotationQuery),
 
290
                                        annotation=CDS.annotation,
221
291
                                        fpkm=CDS.fpkm,
222
 
                                        diff=CDS.diff
223
 
                        )
224
 
 
 
292
                                        diff=CDS.diff,
 
293
                                        repFpkm=CDS.repFpkm,
 
294
                                        count=CDS.count
 
295
                                        )
225
296
                        
226
 
                )
227
 
        end<-dbSendQuery(object@DB,"END;")
228
 
                
 
297
                        )
229
298
                res
230
299
}
231
300
 
233
302
        
234
303
.getGenes<-function(object,geneIdList,sampleIdList=NULL){
235
304
        
 
305
        #Determine gene_id from geneIdList
 
306
        #This is useful so that people can pass, for example, isoform_id to geneIdList and getGenes will return full genes
 
307
        geneIdList<-getGeneId(object=object,geneIdList)
 
308
        
236
309
        #Sample subsetting
237
310
        if(!is.null(sampleIdList)){
238
311
                if(.checkSamples(object@DB,sampleIdList)){
280
353
        #dbQueries
281
354
        idQuery<-paste("SELECT DISTINCT gene_id from genes x ",whereStringGene,sep="")
282
355
        
283
 
        geneAnnotationQuery<-paste("SELECT * from genes x ", whereStringGene,sep="")
 
356
        #geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf ON x.gene_id=xf.gene_id ", whereStringGene,sep="")
 
357
        geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING (gene_id) ", whereStringGene,sep="")
284
358
        geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
285
359
        geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ", whereStringGeneDiff,sep="")
 
360
        geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
 
361
        geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
286
362
        
287
363
        isoformAnnotationQuery<-paste("SELECT x.* from isoforms x LEFT JOIN isoformFeatures xf ON x.isoform_id=xf.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
288
364
        isoformFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
289
365
        isoformDiffQuery<-paste("SELECT y.* from isoforms x JOIN isoformExpDiffData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
 
366
        isoformRepFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformReplicateData y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
 
367
        isoformCountQuery<-paste("SELECT y.* from isoforms x JOIN isoformCount y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
290
368
        
291
369
        TSSAnnotationQuery<-paste("SELECT x.* from TSS x LEFT JOIN TSSFeatures xf ON x.TSS_group_id=xf.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
292
370
        TSSFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
293
371
        TSSDiffQuery<-paste("SELECT y.* from TSS x JOIN TSSExpDiffData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
 
372
        TSSRepFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSReplicateData y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
 
373
        TSSCountQuery<-paste("SELECT y.* from TSS x JOIN TSSCount y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
294
374
        
295
375
        CDSAnnotationQuery<-paste("SELECT x.* from CDS x LEFT JOIN CDSFeatures xf ON x.CDS_id=xf.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
296
376
        CDSFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
297
377
        CDSDiffQuery<-paste("SELECT y.* from CDS x JOIN CDSExpDiffData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
 
378
        CDSRepFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSReplicateData y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
 
379
        CDSCountQuery<-paste("SELECT y.* from CDS x JOIN CDSCount y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
298
380
        
299
381
        promotersDistQuery<-paste("SELECT x.* FROM promoterDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
300
382
        splicingDistQuery<-paste("SELECT x.* FROM splicingDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
314
396
        genes.diff$sample_2<-factor(genes.diff$sample_2,levels=myLevels)
315
397
        write("\tAnnotation Data",stderr())
316
398
        genes.annot<-dbGetQuery(object@DB,geneAnnotationQuery)
 
399
        write("\tReplicate FPKMs",stderr())
 
400
        genes.repFpkm<-dbGetQuery(object@DB,geneRepFPKMQuery)
 
401
        write("\tCounts",stderr())
 
402
        genes.count<-dbGetQuery(object@DB,geneCountQuery)
317
403
        
318
404
        #isoforms
319
405
        write("Getting isoforms information:",stderr())
326
412
        isoform.diff$sample_2<-factor(isoform.diff$sample_2,levels=myLevels)
327
413
        write("\tAnnotation Data",stderr())
328
414
        isoform.annot<-dbGetQuery(object@DB,isoformAnnotationQuery)
 
415
        write("\tReplicate FPKMs",stderr())
 
416
        isoform.repFpkm<-dbGetQuery(object@DB,isoformRepFPKMQuery)
 
417
        write("\tCounts",stderr())
 
418
        isoform.count<-dbGetQuery(object@DB,isoformCountQuery)
329
419
        
330
420
        #CDS
331
421
        write("Getting CDS information:",stderr())
338
428
        CDS.diff$sample_2<-factor(CDS.diff$sample_2,levels=myLevels)
339
429
        write("\tAnnotation Data",stderr())
340
430
        CDS.annot<-dbGetQuery(object@DB,CDSAnnotationQuery)
341
 
        
 
431
        write("\tReplicate FPKMs",stderr())
 
432
        CDS.repFpkm<-dbGetQuery(object@DB,CDSRepFPKMQuery)
 
433
        write("\tCounts",stderr())
 
434
        CDS.count<-dbGetQuery(object@DB,CDSCountQuery)
342
435
        
343
436
        #TSS
344
437
        write("Getting TSS information:",stderr())
351
444
        TSS.diff$sample_2<-factor(TSS.diff$sample_2,levels=myLevels)
352
445
        write("\tAnnotation Data",stderr())
353
446
        TSS.annot<-dbGetQuery(object@DB,TSSAnnotationQuery)
 
447
        write("\tReplicate FPKMs",stderr())
 
448
        TSS.repFpkm<-dbGetQuery(object@DB,TSSRepFPKMQuery)
 
449
        write("\tCounts",stderr())
 
450
        TSS.count<-dbGetQuery(object@DB,TSSCountQuery)
 
451
        
 
452
        end<-dbSendQuery(object@DB,"END;")
354
453
        
355
454
        #Promoters
356
455
        write("Getting promoter information:", stderr())
372
471
        CDS.distData<-dbGetQuery(object@DB,CDSDistQuery)
373
472
        CDS.distData$sample_1<-factor(CDS.distData$sample_1,levels=myLevels)
374
473
        CDS.distData$sample_2<-factor(CDS.distData$sample_2,levels=myLevels)
375
 
 
376
474
        
377
475
        res<-new("CuffGeneSet",
378
476
                        #TODO: Fix ids so that it only displays those genes in CuffGeneSet
380
478
                        annotation=genes.annot,
381
479
                        fpkm=genes.fpkm,
382
480
                        diff=genes.diff,
 
481
                        repFpkm=genes.repFpkm,
 
482
                        count=genes.count,
 
483
                        genome=.getGenome(object),
383
484
                        isoforms=new("CuffFeatureSet",
384
485
                                        annotation=isoform.annot,
385
486
                                        fpkm=isoform.fpkm,
386
 
                                        diff=isoform.diff
 
487
                                        diff=isoform.diff,
 
488
                                        repFpkm=isoform.repFpkm,
 
489
                                        count=isoform.count
387
490
                                        ),
388
491
                        TSS=new("CuffFeatureSet",
389
492
                                        annotation=TSS.annot,
390
493
                                        fpkm=TSS.fpkm,
391
 
                                        diff=TSS.diff
 
494
                                        diff=TSS.diff,
 
495
                                        repFpkm=TSS.repFpkm,
 
496
                                        count=TSS.count
392
497
                                        ),
393
498
                        CDS=new("CuffFeatureSet",
394
499
                                        annotation=CDS.annot,
395
500
                                        fpkm=CDS.fpkm,
396
 
                                        diff=CDS.diff
 
501
                                        diff=CDS.diff,
 
502
                                        repFpkm=CDS.repFpkm,
 
503
                                        count=CDS.count
397
504
                                        ),
398
505
                        promoters=new("CuffFeatureSet",
399
506
                                        annotation=genes.annot,
410
517
                                        fpkm=genes.fpkm,
411
518
                                        diff=CDS.distData
412
519
                                        )
413
 
                        )
 
520
                        )       
 
521
        res
 
522
}
 
523
 
 
524
setMethod("getGenes",signature(object="CuffSet"),.getGenes)
 
525
 
 
526
.getGeneId<-function(object,idList){
 
527
        #Query that takes list of any identifier and retrieves gene_id values from db (does not report missing finds)
 
528
        searchString<-"("
 
529
        for(i in idList){
 
530
                searchString<-paste(searchString,"'",i,"',",sep="")
 
531
        }
 
532
        searchString<-substr(searchString,1,nchar(searchString)-1)
 
533
        searchString<-paste(searchString,")",sep="")
 
534
        
 
535
        geneIdQuery<-paste("SELECT DISTINCT g.gene_id FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id IN ",searchString," OR g.gene_short_name IN ",searchString," OR i.isoform_id IN ",searchString," OR t.tss_group_id IN ",searchString," OR c.CDS_id IN ",searchString,")",sep="")
 
536
        #print(geneIdQuery)
 
537
        res<-dbGetQuery(object@DB,geneIdQuery)
 
538
        as.vector(res[,1])
 
539
}
 
540
 
 
541
setMethod("getGeneId",signature(object="CuffSet"),.getGeneId)
 
542
 
 
543
.findGene<-function(object,query){
 
544
        #Utility to search for gene_id and gene_short_name given a single 'query' string (e.g. query='pink1' will return all genes with 'pink1' (case-insensitive) in the gene_short_name field.
 
545
        geneQuery<-paste("SELECT DISTINCT g.gene_id,g.gene_short_name FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id = '",query,"' OR g.gene_short_name = '",query,"' OR i.isoform_id = '",query,"' OR t.tss_group_id = '",query,"' OR c.CDS_id ='",query,"') OR g.gene_short_name LIKE '%",query,"%'",sep="")
 
546
        res<-dbGetQuery(object@DB,geneQuery)
 
547
        res
 
548
}
 
549
 
 
550
setMethod("findGene",signature(object="CuffSet"),.findGene)
 
551
 
 
552
 
 
553
.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
 
554
        #Sample subsetting
 
555
        if(!is.null(sampleIdList)){
 
556
                if(.checkSamples(object@DB,sampleIdList)){
 
557
                        myLevels<-sampleIdList
 
558
                }else{
 
559
                        stop("Sample does not exist!")
 
560
                }
 
561
        }else{
 
562
                myLevels<-getLevels(object)
 
563
        }
 
564
        
 
565
        sampleString<-'('
 
566
        for (i in myLevels){
 
567
                sampleString<-paste(sampleString,"'",i,"',",sep="")
 
568
        }
 
569
        sampleString<-substr(sampleString,1,nchar(sampleString)-1)
 
570
        sampleString<-paste(sampleString,")",sep="")
 
571
        
 
572
        #ID Search String (SQL)
 
573
        idString<-'('
 
574
        for (i in featureIdList){
 
575
                idString<-paste(idString,"'",i,"',",sep="")
 
576
        }
 
577
        idString<-substr(idString,1,nchar(idString)-1)
 
578
        idString<-paste(idString,")",sep="")
 
579
        
 
580
        whereString<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
 
581
        whereStringFPKM<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
 
582
        whereStringDiff<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
 
583
        
 
584
        if(!is.null(sampleIdList)){
 
585
                whereString<-whereString
 
586
                whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
 
587
                whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
 
588
        }
 
589
        
 
590
        
 
591
        AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField, whereString,sep="")
 
592
        FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
 
593
        DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringDiff,sep="")
 
594
        repFPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$replicateTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
 
595
        countQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$countTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
 
596
        
 
597
        #print(AnnotationQuery)
 
598
        #print(FPKMQuery)
 
599
        #print(DiffQuery)
 
600
        #print(repFPKMQuery)
 
601
        #print(countQuery)
 
602
        
 
603
        begin<-dbSendQuery(object@DB,"BEGIN;")  
 
604
        res<-new("CuffFeatureSet",
 
605
                        annotation=dbGetQuery(object@DB,AnnotationQuery),
 
606
                        fpkm=dbGetQuery(object@DB,FPKMQuery),
 
607
                        diff=dbGetQuery(object@DB,DiffQuery),
 
608
                        repFpkm=dbGetQuery(object@DB,repFPKMQuery),
 
609
                        count=dbGetQuery(object@DB,countQuery),
 
610
                        genome=.getGenome(object)
 
611
                )
414
612
        end<-dbSendQuery(object@DB,"END;")              
415
613
        res
 
614
        
416
615
}
417
616
 
418
 
setMethod("getGenes",signature(object="CuffSet"),.getGenes)
419
 
 
420
 
 
421
 
#.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
422
 
#       #Sample subsetting
423
 
#       if(!is.null(sampleIdList)){
424
 
#               if(.checkSamples(object@DB,sampleIdList)){
425
 
#                       myLevels<-sampleIdList
426
 
#               }else{
427
 
#                       stop("Sample does not exist!")
428
 
#               }
429
 
#       }else{
430
 
#               myLevels<-getLevels(object)
431
 
#       }
432
 
#       
433
 
#       sampleString<-'('
434
 
#       for (i in myLevels){
435
 
#               sampleString<-paste(sampleString,"'",i,"',",sep="")
436
 
#       }
437
 
#       sampleString<-substr(sampleString,1,nchar(sampleString)-1)
438
 
#       sampleString<-paste(sampleString,")",sep="")
439
 
#       
440
 
#       #ID Search String (SQL)
441
 
#       idString<-'('
442
 
#       for (i in featureIdList){
443
 
#               idString<-paste(idString,"'",i,"',",sep="")
444
 
#       }
445
 
#       idString<-substr(idString,1,nchar(idString)-1)
446
 
#       idString<-paste(idString,")",sep="")
447
 
#       
448
 
#       whereString<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
449
 
#       whereStringFPKM<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
450
 
#       whereStringDiff<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
451
 
#       
452
 
#       if(!is.null(sampleIdList)){
453
 
#               whereString<-whereString
454
 
#               whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
455
 
#               whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
456
 
#       }
457
 
#       
458
 
#       
459
 
#       AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
460
 
#       FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable,"  y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereStringFPKM,sep="")
461
 
#       DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable,"  y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereStringDiff,sep="")
462
 
#       
463
 
#       begin<-dbSendQuery(object@DB,"BEGIN;")  
464
 
#       res<-isoforms=new("CuffFeatureSet",
465
 
#                       annotation=dbGetQuery(object@DB,AnnotationQuery),
466
 
#                       fpkm=dbGetQuery(object@DB,FPKMQuery),
467
 
#                       diff=dbGetQuery(object@DB,DiffQuery)
468
 
#               )
469
 
#       end<-dbSendQuery(object@DB,"END;")              
470
 
#       res
471
 
#       
472
 
#}
473
 
#
474
 
#setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)
475
 
        
476
 
 
 
617
setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)
477
618
 
478
619
 
479
620
#getGeneIds from featureIds
627
768
 
628
769
setMethod("getSigTable",signature(object="CuffSet"),.getSigTable)
629
770
 
 
771
.sigMatrix<-function(object,alpha=0.05,level='genes',orderByDist=FALSE){
 
772
        if(level %in% c('promoters','splicing','relCDS')){
 
773
                diffTable<-slot(object,level)@table
 
774
        }else{
 
775
                diffTable<-slot(object,level)@tables$expDiffTable
 
776
        }
 
777
        
 
778
        sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
 
779
        sig<-dbGetQuery(object@DB,sql)
 
780
        sig$q_value<-p.adjust(sig$p_value,method='BH')
 
781
        
 
782
        sig<-sig[sig$q_value<=alpha,]
 
783
        
 
784
        fieldsNeeded<-c('sample_1','sample_2')
 
785
        sig<-sig[,fieldsNeeded]
 
786
        
 
787
        if(orderByDist){
 
788
                #This does not work yet...
 
789
                mySamples<-colnames(fpkmMatrix(slot(object,level)))
 
790
                sampleOrder<-mySamples[order.dendrogram(as.dendrogram(hclust(JSdist(makeprobs(log10(fpkmMatrix(slot(object,level))))))))]
 
791
        }
 
792
        else {
 
793
                sampleOrder<-rev(samples(object)$sample_name)
 
794
        }
 
795
        sig$sample_1<-factor(sig$sample_1,levels=sampleOrder)
 
796
        sig$sample_2<-factor(sig$sample_2,levels=sampleOrder)
 
797
        
 
798
        p<-ggplot(sig,aes(x=sample_1,y=sample_2))
 
799
        
 
800
        p<- p + stat_sum(aes(fill=..n..),color="black",size=0.3, geom="tile") + scale_fill_continuous(low="white",high="green") + expand_limits(fill=0)
 
801
        
 
802
        p<- p + stat_sum(aes(label=..n..),geom="text",size=6,legend=FALSE)
 
803
        
 
804
        #p <- p + geom_tile(aes(fill=..n..))
 
805
        
 
806
        p + theme_bw() + labs(title=paste("Significant ",slot(object,level)@tables$mainTable,"\n at alpha ",alpha,sep="")) +theme(axis.text.x=element_text(angle=-90, hjust=0)) + coord_equal(1)
 
807
        
 
808
}
 
809
 
 
810
setMethod("sigMatrix",signature(object="CuffSet"),.sigMatrix)
 
811
 
630
812
#Find similar genes
631
 
.findSimilar<-function(object,x,n){
 
813
.findSimilar<-function(object,x,n,distThresh,returnGeneSet=TRUE,...){
632
814
        #x can be either a gene_id, gene_short_name or a vector of FPKM values (fake gene expression profile)
 
815
        #TODO: make findSimilar work with all levels
 
816
        #TODO: Possibly add FPKM thresholding
 
817
        
633
818
        if(is.character(x)){
634
819
                myGene<-getGene(object,x)
635
 
                sig<-makeprobsvec(fpkmMatrix(myGene)[1,])
 
820
                sig<-makeprobsvec(fpkmMatrix(myGene,...)[1,])
636
821
        }else if(is.vector(x)){
637
822
                sig<-makeprobsvec(x)
638
823
        }
639
 
        allGenes<-fpkmMatrix(object@genes)
 
824
        allGenes<-fpkmMatrix(object@genes,...)
640
825
        allGenes<-t(makeprobs(t(allGenes)))
641
826
        compare<-function(q){
642
827
                JSdistVec(sig,q)
643
828
        }
644
829
        myDist<-apply(allGenes,MARGIN=1,compare)
645
 
        mySimilarIds<-names(sort(myDist))[1:n]
646
 
        mySimilarGenes<-getGenes(object,mySimilarIds)
 
830
        
 
831
        if(!missing(distThresh)){
 
832
                myDist<-myDist[myDist<=distThresh]
 
833
        }
 
834
        myDist<-sort(myDist)
 
835
        
 
836
        if(!missing(n)){
 
837
                myDist<-myDist[1:n]
 
838
        }
 
839
        
 
840
        mySimilarIds<-names(myDist)
 
841
        
 
842
        if(returnGeneSet){
 
843
                mySimilarGenes<-getGenes(object,mySimilarIds,...)
 
844
                return(mySimilarGenes)
 
845
        }else{
 
846
                res<-as.data.frame(myDist)
 
847
                colnames(res)<-c("distance")
 
848
                return(res)
 
849
        }
 
850
        
647
851
}
648
852
setMethod("findSimilar",signature(object="CuffSet"),.findSimilar)
649
853
 
663
867
 
664
868
setMethod("getLevels",signature(object="CuffSet"),.getLevels)
665
869
 
 
870
.getRepLevels<-function(object){
 
871
        levelsQuery<-'SELECT r.rep_name FROM replicates r LEFT JOIN samples s ON r.sample_name=s.sample_name ORDER BY s.sample_index ASC'
 
872
        levels<-dbGetQuery(object@DB,levelsQuery)$rep_name
 
873
        levels
 
874
}
 
875
 
 
876
setMethod("getRepLevels",signature(object="CuffSet"),.getRepLevels)
 
877
 
666
878
.checkSamples<-function(dbConn,sampleIdList){
667
879
        dbSamples<-dbReadTable(dbConn,"samples")
668
880
        if (all(sampleIdList %in% dbSamples$sample_name)){
672
884
        }
673
885
}
674
886
 
 
887
#####################
 
888
#GenomicRanges
 
889
#####################
 
890
#.makeGRanges<-function(dbConn,id,idField='transcript_id'){
 
891
#       txQuery<-paste("SELECT * FROM features WHERE ",idField,"='",id,"'",sep="")
 
892
#       print(txQuery)
 
893
#       features<-dbGetQuery(dbConn,txQuery)
 
894
#       #res<-GRanges(seqnames=features$seqnames,ranges=IRanges(start=features$start,end=features$end,names=features$exon_number),strand=features$strand)
 
895
#       res<-GRanges(features[,-1])
 
896
#       res
 
897
#}
 
898
 
 
899
.makeGRanges<-function(object,id,idField='transcript_id'){
 
900
        txQuery<-paste("SELECT * from features WHERE ",idField,"='",id,"'",sep="")
 
901
        myFeat<-dbGetQuery(object@DB,txQuery)
 
902
        #res<-GRanges(myFeat[,-1])
 
903
        res<-GRanges(seqnames=myFeat$seqnames,ranges=IRanges(start=myFeat$start,end=myFeat$end,names=myFeat$exon_number),strand=myFeat$strand)
 
904
        res
 
905
}
 
906
 
 
907
setMethod("makeGRanges",signature(object="CuffSet"),.makeGRanges)
 
908
 
 
909
#.makeGRangesList<-function(object,id,idField="gene_id"){
 
910
#       #use .makeGRanges for each sub-feature of id to create GRangesList
 
911
#}
 
912
#
 
913
#setMethod("makeGRangesList",signature(object="CuffSet"),.makeGRangesList)
675
914
 
676
915
#####################
677
916
#Add FeatureData    #
690
929
setMethod("addFeatures",signature(object="CuffSet"),.addFeatures)
691
930
 
692
931
#TODO: Add method to purge existing feature data table to allow 'refresh' of feature level data
 
932
 
 
933
##############
 
934
#Reporting
 
935
##############
 
936
#runReport<-function(){
 
937
#       if(!file.exists(".output")){
 
938
#               dir.create(".output")
 
939
#       }
 
940
#       file.copy(system.file("reports/runReport.Rnw", package="cummeRbund"),paste(".output/","runReport.Rnw",sep=""),overwrite=T)
 
941
#       myWD<-getwd()
 
942
#       setwd(".output")
 
943
#       Sweave("runReport.Rnw")
 
944
#       tools::texi2dvi("runReport.tex",pdf=TRUE)
 
945
#       setwd(myWD)
 
946
#}
 
947
 
 
948
###################
 
949
# Coersion methods
 
950
###################
 
951
#As ExpressionSet