150
185
whereString = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",sep="")
151
186
whereStringFPKM = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_name IN ',sampleString,')',sep="")
152
187
whereStringDiff = paste("WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
188
whereStringRep = paste("JOIN replicates r ON y.rep_name=r.rep_name WHERE (x.gene_id ='",geneId,"' OR x.gene_short_name = '",geneId,"')",' AND (r.sample_name IN ',sampleString,')',sep="")
155
geneAnnotationQuery<-paste("SELECT * from genes x ",whereString,sep="")
191
geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING(gene_id) ",whereString,sep="")
156
192
geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
157
193
#print(geneFPKMQuery)
158
194
geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ",whereStringDiff,sep="")
159
195
#print(geneDiffQuery)
196
geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y ON x.gene_id=y.gene_id ",whereStringRep,sep="")
197
geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y ON x.gene_id=y.gene_id ",whereStringFPKM,sep="")
198
geneFeatureQuery<-paste("SELECT y.* FROM features y JOIN genes x on y.gene_id = x.gene_id ",whereString,sep="")
161
200
isoformAnnotationQuery<-paste("SELECT * from isoforms i JOIN genes x ON i.gene_id = x.gene_id ",whereString,sep="")
162
201
isoformFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
163
202
isoformDiffQuery<-paste("SELECT y.* from isoforms i JOIN isoformExpDiffData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringDiff,sep="")
203
isoformRepFPKMQuery<-paste("SELECT y.* from isoforms i JOIN isoformReplicateData y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringRep,sep="")
204
isoformCountQuery<-paste("SELECT y.* from isoforms i JOIN isoformCount y ON i.isoform_id = y.isoform_id JOIN genes x ON i.gene_id = x.gene_id ",whereStringFPKM,sep="")
165
206
TSSAnnotationQuery<-paste("SELECT * from TSS t JOIN genes x ON t.gene_id = x.gene_id ",whereString,sep="")
166
207
TSSFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
167
208
TSSDiffQuery<-paste("SELECT y.* from TSS t JOIN TSSExpDiffData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringDiff,sep="")
209
TSSRepFPKMQuery<-paste("SELECT y.* from TSS t JOIN TSSReplicateData y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringRep,sep="")
210
TSSCountQuery<-paste("SELECT y.* from TSS t JOIN TSSCount y ON t.TSS_group_id=y.TSS_group_id JOIN genes x ON t.gene_id = x.gene_id ",whereStringFPKM,sep="")
169
213
CDSAnnotationQuery<-paste("SELECT * from CDS c JOIN genes x ON c.gene_id = x.gene_id ",whereString,sep="")
170
214
CDSFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
171
215
CDSDiffQuery<-paste("SELECT y.* from CDS c JOIN CDSExpDiffData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringDiff,sep="")
216
CDSRepFPKMQuery<-paste("SELECT y.* from CDS c JOIN CDSReplicateData y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringRep,sep="")
217
CDSCountQuery<-paste("SELECT y.* from CDS c JOIN CDSCount y ON c.CDS_id = y.CDS_id JOIN genes x ON c.gene_id = x.gene_id ",whereStringFPKM,sep="")
173
219
begin<-dbSendQuery(object@DB,"BEGIN;")
281
354
idQuery<-paste("SELECT DISTINCT gene_id from genes x ",whereStringGene,sep="")
283
geneAnnotationQuery<-paste("SELECT * from genes x ", whereStringGene,sep="")
356
#geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf ON x.gene_id=xf.gene_id ", whereStringGene,sep="")
357
geneAnnotationQuery<-paste("SELECT * from genes x LEFT JOIN geneFeatures xf USING (gene_id) ", whereStringGene,sep="")
284
358
geneFPKMQuery<-paste("SELECT y.* from genes x JOIN geneData y ON x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
285
359
geneDiffQuery<-paste("SELECT y.* from genes x JOIN geneExpDiffData y ON x.gene_id=y.gene_id ", whereStringGeneDiff,sep="")
360
geneRepFPKMQuery<-paste("SELECT y.* from genes x JOIN geneReplicateData y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
361
geneCountQuery<-paste("SELECT y.* from genes x JOIN geneCount y on x.gene_id=y.gene_id ", whereStringGeneFPKM,sep="")
287
363
isoformAnnotationQuery<-paste("SELECT x.* from isoforms x LEFT JOIN isoformFeatures xf ON x.isoform_id=xf.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
288
364
isoformFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
289
365
isoformDiffQuery<-paste("SELECT y.* from isoforms x JOIN isoformExpDiffData y ON x.isoform_id = y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
366
isoformRepFPKMQuery<-paste("SELECT y.* from isoforms x JOIN isoformReplicateData y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
367
isoformCountQuery<-paste("SELECT y.* from isoforms x JOIN isoformCount y on x.isoform_id=y.isoform_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
291
369
TSSAnnotationQuery<-paste("SELECT x.* from TSS x LEFT JOIN TSSFeatures xf ON x.TSS_group_id=xf.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
292
370
TSSFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
293
371
TSSDiffQuery<-paste("SELECT y.* from TSS x JOIN TSSExpDiffData y ON x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
372
TSSRepFPKMQuery<-paste("SELECT y.* from TSS x JOIN TSSReplicateData y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
373
TSSCountQuery<-paste("SELECT y.* from TSS x JOIN TSSCount y on x.TSS_group_id=y.TSS_group_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
295
375
CDSAnnotationQuery<-paste("SELECT x.* from CDS x LEFT JOIN CDSFeatures xf ON x.CDS_id=xf.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereString,sep="")
296
376
CDSFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
297
377
CDSDiffQuery<-paste("SELECT y.* from CDS x JOIN CDSExpDiffData y ON x.CDS_id = y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringDiff,sep="")
378
CDSRepFPKMQuery<-paste("SELECT y.* from CDS x JOIN CDSReplicateData y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
379
CDSCountQuery<-paste("SELECT y.* from CDS x JOIN CDSCount y on x.CDS_id=y.CDS_id JOIN genes g on x.gene_id=g.gene_id ", whereStringFPKM,sep="")
299
381
promotersDistQuery<-paste("SELECT x.* FROM promoterDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
300
382
splicingDistQuery<-paste("SELECT x.* FROM splicingDiffData x LEFT JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
411
518
diff=CDS.distData
524
setMethod("getGenes",signature(object="CuffSet"),.getGenes)
526
.getGeneId<-function(object,idList){
527
#Query that takes list of any identifier and retrieves gene_id values from db (does not report missing finds)
530
searchString<-paste(searchString,"'",i,"',",sep="")
532
searchString<-substr(searchString,1,nchar(searchString)-1)
533
searchString<-paste(searchString,")",sep="")
535
geneIdQuery<-paste("SELECT DISTINCT g.gene_id FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id IN ",searchString," OR g.gene_short_name IN ",searchString," OR i.isoform_id IN ",searchString," OR t.tss_group_id IN ",searchString," OR c.CDS_id IN ",searchString,")",sep="")
537
res<-dbGetQuery(object@DB,geneIdQuery)
541
setMethod("getGeneId",signature(object="CuffSet"),.getGeneId)
543
.findGene<-function(object,query){
544
#Utility to search for gene_id and gene_short_name given a single 'query' string (e.g. query='pink1' will return all genes with 'pink1' (case-insensitive) in the gene_short_name field.
545
geneQuery<-paste("SELECT DISTINCT g.gene_id,g.gene_short_name FROM genes g LEFT JOIN isoforms i on g.gene_id=i.gene_id LEFT JOIN TSS t on g.gene_id=t.gene_id LEFT JOIN CDS c ON g.gene_id=c.gene_id WHERE (g.gene_id = '",query,"' OR g.gene_short_name = '",query,"' OR i.isoform_id = '",query,"' OR t.tss_group_id = '",query,"' OR c.CDS_id ='",query,"') OR g.gene_short_name LIKE '%",query,"%'",sep="")
546
res<-dbGetQuery(object@DB,geneQuery)
550
setMethod("findGene",signature(object="CuffSet"),.findGene)
553
.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
555
if(!is.null(sampleIdList)){
556
if(.checkSamples(object@DB,sampleIdList)){
557
myLevels<-sampleIdList
559
stop("Sample does not exist!")
562
myLevels<-getLevels(object)
567
sampleString<-paste(sampleString,"'",i,"',",sep="")
569
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
570
sampleString<-paste(sampleString,")",sep="")
572
#ID Search String (SQL)
574
for (i in featureIdList){
575
idString<-paste(idString,"'",i,"',",sep="")
577
idString<-substr(idString,1,nchar(idString)-1)
578
idString<-paste(idString,")",sep="")
580
whereString<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
581
whereStringFPKM<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
582
whereStringDiff<-paste(' WHERE (x.',slot(object,level)@idField,' IN ',idString,')',sep="")
584
if(!is.null(sampleIdList)){
585
whereString<-whereString
586
whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
587
whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
591
AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField, whereString,sep="")
592
FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
593
DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringDiff,sep="")
594
repFPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$replicateTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
595
countQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$countTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField,whereStringFPKM,sep="")
597
#print(AnnotationQuery)
603
begin<-dbSendQuery(object@DB,"BEGIN;")
604
res<-new("CuffFeatureSet",
605
annotation=dbGetQuery(object@DB,AnnotationQuery),
606
fpkm=dbGetQuery(object@DB,FPKMQuery),
607
diff=dbGetQuery(object@DB,DiffQuery),
608
repFpkm=dbGetQuery(object@DB,repFPKMQuery),
609
count=dbGetQuery(object@DB,countQuery),
610
genome=.getGenome(object)
414
612
end<-dbSendQuery(object@DB,"END;")
418
setMethod("getGenes",signature(object="CuffSet"),.getGenes)
421
#.getFeatures<-function(object,featureIdList,sampleIdList=NULL,level='isoforms'){
423
# if(!is.null(sampleIdList)){
424
# if(.checkSamples(object@DB,sampleIdList)){
425
# myLevels<-sampleIdList
427
# stop("Sample does not exist!")
430
# myLevels<-getLevels(object)
434
# for (i in myLevels){
435
# sampleString<-paste(sampleString,"'",i,"',",sep="")
437
# sampleString<-substr(sampleString,1,nchar(sampleString)-1)
438
# sampleString<-paste(sampleString,")",sep="")
440
# #ID Search String (SQL)
442
# for (i in featureIdList){
443
# idString<-paste(idString,"'",i,"',",sep="")
445
# idString<-substr(idString,1,nchar(idString)-1)
446
# idString<-paste(idString,")",sep="")
448
# whereString<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
449
# whereStringFPKM<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
450
# whereStringDiff<-paste('WHERE (x.gene_id IN ',idString,' OR g.gene_short_name IN ',idString,')',sep="")
452
# if(!is.null(sampleIdList)){
453
# whereString<-whereString
454
# whereStringFPKM<-paste(whereStringFPKM, ' AND y.sample_name IN ',sampleString,sep="")
455
# whereStringDiff<-paste(whereStringDiff,' AND (y.sample_1 IN ',sampleString,' AND y.sample_2 IN ',sampleString,')',sep="")
459
# AnnotationQuery<-paste("SELECT x.* from ",slot(object,level)@tables$mainTable," x LEFT JOIN ",slot(object,level)@tables$featureTable," xf ON x.",slot(object,level)@idField,"=xf.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereString,sep="")
460
# FPKMQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$dataTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereStringFPKM,sep="")
461
# DiffQuery<-paste("SELECT y.* from ",slot(object,level)@tables$mainTable," x JOIN ",slot(object,level)@tables$expDiffTable," y ON x.",slot(object,level)@idField," = y.",slot(object,level)@idField," JOIN genes g ON x.gene_id=g.gene_id ", whereStringDiff,sep="")
463
# begin<-dbSendQuery(object@DB,"BEGIN;")
464
# res<-isoforms=new("CuffFeatureSet",
465
# annotation=dbGetQuery(object@DB,AnnotationQuery),
466
# fpkm=dbGetQuery(object@DB,FPKMQuery),
467
# diff=dbGetQuery(object@DB,DiffQuery)
469
# end<-dbSendQuery(object@DB,"END;")
474
#setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)
617
setMethod("getFeatures",signature(object="CuffSet"),.getFeatures)
479
620
#getGeneIds from featureIds
628
769
setMethod("getSigTable",signature(object="CuffSet"),.getSigTable)
771
.sigMatrix<-function(object,alpha=0.05,level='genes',orderByDist=FALSE){
772
if(level %in% c('promoters','splicing','relCDS')){
773
diffTable<-slot(object,level)@table
775
diffTable<-slot(object,level)@tables$expDiffTable
778
sql<-paste("SELECT ",slot(object,level)@idField,", sample_1, sample_2, p_value from ", diffTable," WHERE status='OK';")
779
sig<-dbGetQuery(object@DB,sql)
780
sig$q_value<-p.adjust(sig$p_value,method='BH')
782
sig<-sig[sig$q_value<=alpha,]
784
fieldsNeeded<-c('sample_1','sample_2')
785
sig<-sig[,fieldsNeeded]
788
#This does not work yet...
789
mySamples<-colnames(fpkmMatrix(slot(object,level)))
790
sampleOrder<-mySamples[order.dendrogram(as.dendrogram(hclust(JSdist(makeprobs(log10(fpkmMatrix(slot(object,level))))))))]
793
sampleOrder<-rev(samples(object)$sample_name)
795
sig$sample_1<-factor(sig$sample_1,levels=sampleOrder)
796
sig$sample_2<-factor(sig$sample_2,levels=sampleOrder)
798
p<-ggplot(sig,aes(x=sample_1,y=sample_2))
800
p<- p + stat_sum(aes(fill=..n..),color="black",size=0.3, geom="tile") + scale_fill_continuous(low="white",high="green") + expand_limits(fill=0)
802
p<- p + stat_sum(aes(label=..n..),geom="text",size=6,legend=FALSE)
804
#p <- p + geom_tile(aes(fill=..n..))
806
p + theme_bw() + labs(title=paste("Significant ",slot(object,level)@tables$mainTable,"\n at alpha ",alpha,sep="")) +theme(axis.text.x=element_text(angle=-90, hjust=0)) + coord_equal(1)
810
setMethod("sigMatrix",signature(object="CuffSet"),.sigMatrix)
630
812
#Find similar genes
631
.findSimilar<-function(object,x,n){
813
.findSimilar<-function(object,x,n,distThresh,returnGeneSet=TRUE,...){
632
814
#x can be either a gene_id, gene_short_name or a vector of FPKM values (fake gene expression profile)
815
#TODO: make findSimilar work with all levels
816
#TODO: Possibly add FPKM thresholding
633
818
if(is.character(x)){
634
819
myGene<-getGene(object,x)
635
sig<-makeprobsvec(fpkmMatrix(myGene)[1,])
820
sig<-makeprobsvec(fpkmMatrix(myGene,...)[1,])
636
821
}else if(is.vector(x)){
637
822
sig<-makeprobsvec(x)
639
allGenes<-fpkmMatrix(object@genes)
824
allGenes<-fpkmMatrix(object@genes,...)
640
825
allGenes<-t(makeprobs(t(allGenes)))
641
826
compare<-function(q){
644
829
myDist<-apply(allGenes,MARGIN=1,compare)
645
mySimilarIds<-names(sort(myDist))[1:n]
646
mySimilarGenes<-getGenes(object,mySimilarIds)
831
if(!missing(distThresh)){
832
myDist<-myDist[myDist<=distThresh]
840
mySimilarIds<-names(myDist)
843
mySimilarGenes<-getGenes(object,mySimilarIds,...)
844
return(mySimilarGenes)
846
res<-as.data.frame(myDist)
847
colnames(res)<-c("distance")
648
852
setMethod("findSimilar",signature(object="CuffSet"),.findSimilar)