95
95
setMethod("samples","CuffData",.samples)
97
.fpkm<-function(object,features=FALSE){
97
.replicates<-function(object){
98
res<-dbReadTable(object@DB,'replicates')
103
setMethod("replicates","CuffData",.replicates)
105
.fpkm<-function(object,features=FALSE,sampleIdList){
107
if(!missing(sampleIdList)){
108
if(.checkSamples(object@DB,sampleIdList)){
109
myLevels<-sampleIdList
111
stop("Sample does not exist!")
114
myLevels<-getLevels(object)
116
#Sample Search String (SQL)
119
sampleString<-paste(sampleString,"'",i,"',",sep="")
121
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
122
sampleString<-paste(sampleString,")",sep="")
99
FPKMQuery<-paste("SELECT * FROM",object@tables$dataTable)
125
FPKMQuery<-paste("SELECT * FROM ",object@tables$dataTable," WHERE sample_name IN ",sampleString,sep="")
101
FPKMQuery<-paste("SELECT xf.*,xm.*,x.sample_name,x.fpkm,x.conf_hi,x.conf_lo FROM ",object@tables$dataTable," x LEFT JOIN ",object@tables$featureTable," xf ON x.",object@idField,"=xf.",object@idField," LEFT JOIN ",object@tables$mainTable," xm ON x.",object@idField,"=xm.",object@idField,sep="")
127
FPKMQuery<-paste("SELECT xf.*,xm.*,x.sample_name,x.fpkm,x.conf_hi,x.conf_lo FROM ",object@tables$dataTable," x LEFT JOIN ",object@tables$featureTable," xf ON x.",object@idField,"=xf.",object@idField," LEFT JOIN ",object@tables$mainTable," xm ON x.",object@idField,"=xm.",object@idField," WHERE x.sample_name IN ",sampleString,sep="")
104
130
res<-dbGetQuery(object@DB,FPKMQuery)
105
131
res$sample_name<-factor(res$sample_name,levels=getLevels(object))
132
res$stdev<-(res$conf_hi-res$fpkm)/2
109
136
setMethod("fpkm","CuffData",.fpkm)
111
.fpkmMatrix<-function(object){
138
.repFpkm<-function(object,features=FALSE,repIdList){
140
if(!missing(repIdList)){
141
if(.checkReps(object@DB,repIdList)){
144
stop("Replicate does not exist!")
147
myLevels<-getRepLevels(object)
149
#Sample Search String (SQL)
152
sampleString<-paste(sampleString,"'",i,"',",sep="")
154
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
155
sampleString<-paste(sampleString,")",sep="")
158
FPKMQuery<-paste("SELECT * FROM ",object@tables$replicateTable," WHERE rep_name IN ",sampleString,sep="")
160
FPKMQuery<-paste("SELECT xf.*,xm.*,x.rep_name,x.raw_frags,x.internal_scaled_frags,x.external_scaled_frags,x.fpkm,x.effective_length,x.status FROM ",object@tables$replicateTable," x LEFT JOIN ",object@tables$featureTable," xf on x.",object@idField,"=xf.",object@idField," LEFT JOIN ",object@tables$mainTable," xm ON x.",object@idField,"=xm.",object@idField," WHERE x.rep_name IN ",sampleString,sep="")
163
res<-dbGetQuery(object@DB,FPKMQuery)
164
res$rep_name<-factor(res$rep_name,levels=getRepLevels(object))
165
#res$stdev<-(res$conf_hi-res$fpkm)/2 #Not really available yet since conf_hi and conf_lo are not
169
setMethod("repFpkm","CuffData",.repFpkm)
171
.count<-function(object,sampleIdList){
173
if(!missing(sampleIdList)){
174
if(.checkSamples(object@DB,sampleIdList)){
175
myLevels<-sampleIdList
177
stop("Sample does not exist!")
180
myLevels<-getLevels(object)
182
#Sample Search String (SQL)
185
sampleString<-paste(sampleString,"'",i,"',",sep="")
187
sampleString<-substr(sampleString,1,nchar(sampleString)-1)
188
sampleString<-paste(sampleString,")",sep="")
190
CountQuery<-FPKMQuery<-paste("SELECT * FROM ",object@tables$countTable," WHERE sample_name IN ",sampleString,sep="")
192
res<-dbGetQuery(object@DB,CountQuery)
193
res$sample_name<-factor(res$sample_name,levels=getLevels(object))
198
setMethod("count","CuffData",.count)
200
.fpkmMatrix<-function(object,fullnames=FALSE,sampleIdList){
201
#TODO: fix fullnames for CuffData::fpkmMatrix
203
if(!missing(sampleIdList)){
204
if(.checkSamples(object@DB,sampleIdList)){
205
myLevels<-sampleIdList
207
stop("Sample does not exist!")
210
myLevels<-getLevels(object)
112
213
samp<-samples(object)
113
214
FPKMMatQuery<-paste("select x.",object@idField,", ",sep="")
117
218
FPKMMatQuery<-substr(FPKMMatQuery, 1, nchar(FPKMMatQuery)-1)
118
219
FPKMMatQuery<-paste(FPKMMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$dataTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
119
220
res<-dbGetQuery(object@DB,FPKMMatQuery)
120
data.frame(res[,-1],row.names=res[,1])
221
res<-data.frame(res[,-1],row.names=res[,1])
222
if(!missing(sampleIdList)){
223
res<-data.frame(res[,sampleIdList],row.names=rownames(res))
224
colnames(res)<-sampleIdList
123
229
setMethod("fpkmMatrix","CuffData",.fpkmMatrix)
231
.repFpkmMatrix<-function(object,fullnames=FALSE,repIdList){
233
if(!missing(repIdList)){
234
if(.checkReps(object@DB,repIdList)){
237
stop("Replicate does not exist!")
240
myLevels<-getRepLevels(object)
243
samp<-replicates(object)
244
FPKMMatQuery<-paste("select x.",object@idField,", ",sep="")
246
FPKMMatQuery<-paste(FPKMMatQuery,"sum(case when xd.rep_name ='",i,"' then fpkm end) as ",i,",",sep="")
248
FPKMMatQuery<-substr(FPKMMatQuery, 1, nchar(FPKMMatQuery)-1)
249
FPKMMatQuery<-paste(FPKMMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$replicateTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
250
res<-dbGetQuery(object@DB,FPKMMatQuery)
251
res<-data.frame(res[,-1],row.names=res[,1])
252
if(!missing(repIdList)){
253
res<-data.frame(res[,repIdList],row.names=rownames(res))
254
colnames(res)<-repIdList
259
setMethod("repFpkmMatrix","CuffData",.repFpkmMatrix)
261
.countMatrix<-function(object,fullnames=FALSE,sampleIdList){
263
if(!missing(sampleIdList)){
264
if(.checkSamples(object@DB,sampleIdList)){
265
myLevels<-sampleIdList
267
stop("Sample does not exist!")
270
myLevels<-getLevels(object)
273
samp<-samples(object)
274
CountMatQuery<-paste("select x.",object@idField,", ",sep="")
276
CountMatQuery<-paste(CountMatQuery,"sum(case when xd.sample_name ='",i,"' then count end) as ",i,",",sep="")
278
CountMatQuery<-substr(CountMatQuery, 1, nchar(CountMatQuery)-1)
279
CountMatQuery<-paste(CountMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$countTable," xd on x.",object@idField," = xd.",object@idField," group by x.",object@idField,sep="")
280
res<-dbGetQuery(object@DB,CountMatQuery)
281
res<-data.frame(res[,-1],row.names=res[,1])
282
if(!missing(sampleIdList)){
283
res<-data.frame(res[,sampleIdList],row.names=rownames(res))
284
colnames(res)<-sampleIdList
289
setMethod("countMatrix","CuffData",.countMatrix)
291
.repCountMatrix<-function(object,fullnames=FALSE,repIdList){
293
if(!missing(repIdList)){
294
if(.checkReps(object@DB,repIdList)){
297
stop("Replicate does not exist!")
300
myLevels<-getRepLevels(object)
302
reps<-replicates(object)
303
repCountMatQuery<-paste("select x.",object@idField,", ",sep="")
305
repCountMatQuery<-paste(repCountMatQuery,"sum(case when xr.rep_name ='",i,"' then external_scaled_frags end) as ",i,",",sep="")
307
repCountMatQuery<-substr(repCountMatQuery, 1, nchar(repCountMatQuery)-1)
308
repCountMatQuery<-paste(repCountMatQuery," from ",object@tables$mainTable," x LEFT JOIN ",object@tables$replicateTable," xr on x.",object@idField," = xr.",object@idField," group by x.",object@idField,sep="")
309
res<-dbGetQuery(object@DB,repCountMatQuery)
310
res<-data.frame(res[,-1],row.names=res[,1])
311
if(!missing(repIdList)){
312
res<-data.frame(res[,repIdList],row.names=rownames(res))
313
colnames(res)<-repIdList
318
setMethod("repCountMatrix","CuffData",.repCountMatrix)
320
.statsMatrix<-function(object){
321
statsQuery<-paste("SELECT xd.*, xc.count, xc.variance as count_variance , xc.uncertainty as count_uncertainty, xc.dispersion as count_dispersion, (xd.conf_hi-xd.fpkm)/2 as fpkm_stdev,((xd.conf_hi-xd.fpkm)/2)/xd.fpkm AS 'CV' FROM ",object@tables$dataTable," xd LEFT JOIN ",object@tables$countTable," xc ON xd.",object@idField,"=xc.",object@idField," AND xd.sample_name=xc.sample_name",sep="")
322
res<-dbGetQuery(object@DB,statsQuery)
323
res$sample_name<-factor(res$sample_name,levels=samples(object))
126
327
#This needs a lot of work...
127
328
#TODO: Change this to remove lnFcCutoff but make sure that functions that rely on diffData have their own FC cutoff so that plotting doesn't suffer
139
340
diffQuery<-paste("SELECT x.",object@idField,", xed.* FROM ",object@tables$mainTable," x LEFT JOIN ",object@tables$expDiffTable," xed on x.",object@idField," = xed.",object@idField," WHERE ((sample_1 = '",x,"' AND sample_2 = '",y,"') OR (sample_1 = '",y,"' AND sample_2 = '",x,"'))",sep="")
141
diffQuery<-paste("SELECT x.",object@idField,", xed.*, xf.* FROM ",object@tables$mainTable," x LEFT JOIN ",object@tables$expDiffTable," xed on x.",object@idField," = xed.",object@idField," LEFT JOIN ",object@tables$featureTable," xf ON x.",object@idField,"=xf.",object@idField," WHERE ((sample_1 = '",x,"' AND sample_2 = '",y,"') OR (sample_1 = '",y,"' AND sample_2 = '",x,"'))",sep="")
342
diffQuery<-paste("SELECT xm.*, xed.*, xf.* FROM ",object@tables$mainTable," xm LEFT JOIN ",object@tables$expDiffTable," xed on xm.",object@idField," = xed.",object@idField," LEFT JOIN ",object@tables$featureTable," xf ON xm.",object@idField,"=xf.",object@idField," WHERE ((sample_1 = '",x,"' AND sample_2 = '",y,"') OR (sample_1 = '",y,"' AND sample_2 = '",x,"'))",sep="")
144
345
dat<-dbGetQuery(object@DB,diffQuery)
149
350
setMethod("diffData",signature(object="CuffData"),.diffData)
352
.diffTable<-function(object,logCutoffValue=99999){
353
measureVars<-c('status','value_1','value_2','log2_fold_change','test_stat','p_value','q_value','significant')
354
all.diff<-diffData(object,features=TRUE)
355
all.diff$log2_fold_change[all.diff$log2_fold_change>=logCutoffValue]<-Inf
356
all.diff$log2_fold_change[all.diff$log2_fold_change<=-logCutoffValue]<--Inf
357
all.diff.melt<-melt(all.diff,measure.vars=measureVars)
358
#all.diff.melt<-all.diff.melt[!grepl("^value_",all.diff.melt$variable),]
359
all.diff.cast<-dcast(all.diff.melt,formula=...~sample_2+sample_1+variable)
363
setMethod("diffTable",signature(object="CuffData"),.diffTable)
365
.QCTable<-function(object){
366
qcSQL<-paste("SELECT d.*, (d.conf_hi-d.fpkm)/2 as stdev, c.count, c.variance, c.uncertainty, c.dispersion, (c.variance/d.fpkm) AS 'IOD', ((d.conf_hi-d.fpkm)/2)/d.fpkm AS 'CV' FROM ",object@tables$dataTable," d LEFT JOIN ",object@tables$countTable," c ON d.gene_id=c.gene_id AND d.sample_name=c.sample_name",sep="")
367
res<-dbGetQuery(object@DB,qcSQL)
151
371
.getMA<-function(object,x,y,logMode=T,pseudocount=1){
152
372
if (missing(x) || missing(y)){
153
373
stop("You must supply both x and y.")
155
375
sql<-paste("SELECT x.",object@idField,", sum(case when x.sample_name = '",x,"' then x.fpkm end) AS 'x', sum(case when x.sample_name = '",y,"' then x.fpkm end) AS 'y' FROM ",object@tables$dataTable," x GROUP BY x.",object@idField,";",sep="")
157
dat<-dbGetQuery(object@DB,sql)
160
dat$x<-log10(dat$x+pseudocount)
161
dat$y<-log10(dat$y+pseudocount)
163
dat$A<-(dat$x+dat$y)/2
377
dat<-dbGetQuery(object@DB,sql)
380
dat$x<-log10(dat$x+pseudocount)
381
dat$y<-log10(dat$y+pseudocount)
383
dat$A<-(dat$x+dat$y)/2
390
.getCountMA<-function(object,x,y,logMode=T,pseudocount=1){
391
if (missing(x) || missing(y)){
392
stop("You must supply both x and y.")
394
sql<-paste("SELECT x.",object@idField,", sum(case when x.sample_name = '",x,"' then x.count end) AS 'x', sum(case when x.sample_name = '",y,"' then x.count end) AS 'y' FROM ",object@tables$countTable," x GROUP BY x.",object@idField,";",sep="")
395
dat<-dbGetQuery(object@DB,sql)
398
dat$x<-log10(dat$x+pseudocount)
399
dat$y<-log10(dat$y+pseudocount)
401
dat$A<-(dat$x+dat$y)/2
408
#.getRankOrder<-function(object,x,y,logMode=TRUE,pseudocount=1,ratio=TRUE){
409
# if (missing(x) || missing(y)){
410
# stop("You must supply both x and y.")
170
416
setMethod("DB","CuffData",function(object){
171
417
return(object@DB)
220
474
##################
222
.density<-function(object, logMode = TRUE, pseudocount=1.0, labels, features=FALSE, ...){
476
.density<-function(object, logMode = TRUE, pseudocount=0.0, labels, features=FALSE, replicates=FALSE,...){
223
477
if(is(object,'CuffData')) {
224
dat<-fpkm(object,features=features)
479
dat<-repFpkm(object,features=features)
480
colnames(dat)[colnames(dat)=="rep_name"]<-"condition"
482
dat<-fpkm(object,features=features)
483
colnames(dat)[colnames(dat)=="sample_name"]<-"condition"
226
486
stop('Un-supported class of object.')
228
488
if(logMode) dat$fpkm<-dat$fpkm+pseudocount
231
p<-p+geom_density(aes(x= log10(fpkm),group=sample_name,color=sample_name,fill=sample_name),alpha=I(1/3))
491
p<-p+geom_density(aes(x= log10(fpkm),group=condition,color=condition,fill=condition),alpha=I(1/3))
233
p<-p+geom_density(aes(x=fpkm,group=sample_name,color=sample_name,fill=sample_name),alpha=I(1/3))
493
p<-p+geom_density(aes(x=fpkm,group=condition,color=condition,fill=condition),alpha=I(1/3))
236
p<-p + opts(title=object@tables$mainTable)
496
p<-p + labs(title=object@tables$mainTable)
238
498
#Default cummeRbund colorscheme
239
499
p<-p + scale_fill_hue(l=50,h.start=200) + scale_color_hue(l=50,h.start=200)
301
561
#Add title & Return value
302
p<- p + opts(title=object@tables$mainTable)
562
p<- p + labs(title=object@tables$mainTable)
306
566
setMethod("csScatter",signature(object="CuffData"), .scatter)
308
.volcano<-function(object,x,y,features=FALSE,xlimits=c(-20,20),...){
568
.scatterMat<-function(object,replicates=FALSE,logMode=TRUE,pseudocount=1.0,hexbin=FALSE,useCounts=FALSE,...){
571
dat<-repCountMatrix(object)
573
dat<-repFpkmMatrix(object)
577
dat<-countMatrix(object)
579
dat<-fpkmMatrix(object)
588
myLab = "Normalized Counts"
593
myLab = paste("log10 ",myLab,sep="")
594
p <- .plotmatrix(log10(dat),hexbin=hexbin,...)
596
p <- .plotmatrix(dat,hexbin=hexbin,...)
599
p<- p + geom_abline(intercept=0,slope=1,linetype=2)
601
p <- p + theme_bw() + ylab(myLab) + xlab(myLab)
603
#p<- p + aes(alpha=0.01)
609
setMethod("csScatterMatrix",signature(object="CuffData"),.scatterMat)
611
.volcano<-function(object,x,y,alpha=0.05,showSignificant=TRUE,features=FALSE,xlimits=c(-20,20),...){
309
612
samp<-samples(object)
311
614
#check to make sure x and y are in samples
339
647
setMethod("csVolcano",signature(object="CuffData"), .volcano)
341
.boxplot<-function(object,logMode=TRUE,pseudocount=0.0001,...){
649
.volcanoMatrix<-function(object,alpha=0.05,xlimits=c(-20,20),mapping=aes(),...){
650
dat<-diffData(object)
651
part1<-dat[,c('sample_1','sample_2','value_1','value_2','test_stat','p_value','q_value')]
652
part2<-data.frame(sample_1=part1$sample_2,sample_2=part1$sample_1,value_1=part1$value_2,value_2=part1$value_1,test_stat=-part1$test_stat,p_value=part1$p_value,q_value=part1$q_value)
653
dat<-rbind(part1,part2)
655
myLevels<-union(dat$sample_1,dat$sample_2)
656
dat$sample_1<-factor(dat$sample_1,levels=myLevels)
657
dat$sample_2<-factor(dat$sample_2,levels=myLevels)
658
dat$log2_fold_change<-log2(dat$value_2/dat$value_1)
660
#Set significance value
661
dat$significant<-"no"
662
dat$significant[dat$q_value<=alpha]<-"yes"
664
#May need to expand filler for time-series data (when there aren't always all pairwise comparisons on which to facet
665
filler<-data.frame(sample_1=factor(myLevels,levels=myLevels),sample_2=factor(myLevels,levels=myLevels),label="")
666
filler$label<-as.character(filler$label)
667
mapping <- defaults(mapping, aes_string(x = "log2_fold_change", y = "-log10(p_value)", color="significant"))
668
class(mapping) <- "uneval"
670
p <-ggplot(dat) + geom_point(mapping,na.rm=TRUE,size=0.8) + scale_colour_manual(values = c("black","red")) + geom_text(aes(x=0,y=15,label=label),data=filler) + facet_grid(sample_1~sample_2)
672
p<- p + geom_vline(aes(x=0),linetype=2)
674
p <- p + theme_bw() + xlab(bquote(paste(log[2],"(fold change)",sep=""))) + ylab(bquote(paste(-log[10],"(p value)",sep="")))
680
setMethod("csVolcanoMatrix",signature(object="CuffData"),.volcanoMatrix)
682
.distheat<-function(object, replicates=F, samples.not.genes=T, logMode=T, pseudocount=1.0, heatscale=c(low='lightyellow',mid='orange',high='darkred'), heatMidpoint=NULL, ...) {
683
# get expression from a sample or gene perspective
685
obj.fpkm<-repFpkmMatrix(object,fullnames=T)
687
obj.fpkm<-fpkmMatrix(object,fullnames=T)
690
if(samples.not.genes) {
691
obj.fpkm.pos = obj.fpkm[rowSums(obj.fpkm)>0,]
693
obj.fpkm = t(obj.fpkm)
694
obj.fpkm.pos = obj.fpkm[,colSums(obj.fpkm)>0]
698
obj.fpkm.pos = log10(obj.fpkm.pos+pseudocount)
702
obj.dists = JSdist(makeprobs(obj.fpkm.pos))
705
obj.hc = hclust(obj.dists)
708
dist.df = melt(as.matrix(obj.dists),varnames=c("X1","X2"))
711
g = ggplot(dist.df, aes(x=X1, y=X2, fill=value))
714
labels = labels(obj.dists)
715
g = g + geom_tile(color="black") + scale_x_discrete("", limits=labels[obj.hc$order]) + scale_y_discrete("", limits=labels[obj.hc$order])
718
g = g + theme(axis.text.x=element_text(angle=-90, hjust=0), axis.text.y=element_text(angle=0, hjust=1))
720
# drop grey panel background and gridlines
721
g = g + theme(panel.grid.minor=element_line(colour=NA), panel.grid.major=element_line(colour=NA), panel.background=element_rect(fill=NA, colour=NA))
724
if (length(heatscale) == 2) {
725
g = g + scale_fill_gradient(low=heatscale[1], high=heatscale[3], name="JS Distance")
727
else if (length(heatscale) == 3) {
728
if (is.null(heatMidpoint)) {
729
heatMidpoint = max(obj.dists) / 2.0
731
g = g + scale_fill_gradient2(low=heatscale[1], mid=heatscale[2], high=heatscale[3], midpoint=heatMidpoint, name="JS Distance")
733
if(samples.not.genes){
734
g <- g + geom_text(aes(label=format(value,digits=3)))
740
setMethod("csDistHeat", signature("CuffData"), .distheat)
743
.boxplot<-function(object,logMode=TRUE,pseudocount=0.0001,replicates=FALSE,...){
746
colnames(dat)[colnames(dat)=="rep_name"]<-"condition"
749
colnames(dat)[colnames(dat)=="sample_name"]<-"condition"
344
752
dat$fpkm<-dat$fpkm+pseudocount
346
p<-p+geom_boxplot(aes(x=sample_name,y=log10(fpkm),fill=sample_name),size=0.3,alpha=I(1/3))
754
p<-p+geom_boxplot(aes(x=condition,y=log10(fpkm),fill=condition),size=0.3,alpha=I(1/3))
349
p<-p+geom_boxplot(aes(x=sample_name,y=fpkm,fill=sample_name),alpha=I(1/3),size=0.3)
757
p<-p+geom_boxplot(aes(x=condition,y=fpkm,fill=condition),alpha=I(1/3),size=0.3)
351
p<- p + opts(axis.text.x=theme_text(angle=-90, hjust=0))
759
p<- p + theme(axis.text.x=element_text(angle=-90, hjust=0))
353
761
#Default cummeRbund colorscheme
354
762
p<-p + scale_fill_hue(l=50,h.start=200)
369
781
#res<-as.dist(res)
370
782
res<-as.dendrogram(hclust(res))
371
plot(res,main=paste("All",deparse(substitute(object)),sep=" "))
783
plot(res,main=paste("All",deparse(substitute(object)),sep=" "),...)
375
787
setMethod("csDendro",signature(object="CuffData"),.dendro)
377
.MAplot<-function(object,x,y,logMode=T,pseudocount=1,smooth=F){
378
dat<-.getMA(object,x,y,logMode=logMode,pseudocount=pseudocount)
789
.MAplot<-function(object,x,y,logMode=T,pseudocount=1,smooth=FALSE,useCount=FALSE){
791
dat<-.getCountMA(object,x,y,logMode=logMode,pseudocount=pseudocount)
793
dat<-.getMA(object,x,y,logMode=logMode,pseudocount=pseudocount)
380
p<-p+geom_point(aes(x=A,y=log2(M)))
796
p<-p+geom_point(aes(x=A,y=log2(M)),size=0.8)
392
808
setMethod("MAplot",signature(object="CuffData"),.MAplot)
810
.dispersionPlot<-function(object){
813
p<-p+geom_point(aes(x=count,y=dispersion,color=sample_name)) + facet_wrap('sample_name') + scale_x_log10() + scale_y_log10()
817
setMethod("dispersionPlot",signature(object="CuffData"),.dispersionPlot)
819
.MDSplot<-function(object,replicates=FALSE,logMode=TRUE,pseudocount=1.0){
821
dat<-repFpkmMatrix(object)
823
dat<-fpkmMatrix(object)
827
dat<-log10(dat+pseudocount)
830
d<-JSdist(makeprobs(dat))
831
fit <- cmdscale(d,eig=TRUE, k=2)
832
res<-data.frame(names=rownames(fit$points),M1=fit$points[,1],M2=fit$points[,2])
834
p <- p + geom_point(aes(x=M1,y=M2,color=names)) + geom_text(aes(x=M1,y=M2,label=names,color=names)) + theme_bw()
838
setMethod("MDSplot",signature(object="CuffData"),.MDSplot)
840
#Not sure if I want to include this or not..
841
.PCAplot<-function(object,x="PC1", y="PC2",replicates=FALSE,pseudocount=1.0,scale=TRUE,...){
843
fpkms<-repFpkmMatrix(object)
845
fpkms<-fpkmMatrix(object)
847
fpkms<-log10(fpkms+pseudocount)
848
PC<-prcomp(fpkms,scale=scale,...)
849
dat <- data.frame(obsnames=row.names(PC$x), PC$x)
851
#dat$shoutout[matchpt(PC$rotation,PC$x)$index]<-rownames(pca$x[matchpt(pca$rotation,pca$x)$index,])
852
plot <- ggplot(dat, aes_string(x=x, y=y)) + geom_point(alpha=.4, size=0.8, aes(label=obsnames))
853
plot <- plot + geom_hline(aes(0), size=.2) + geom_vline(aes(0), size=.2) #+ geom_text(aes(label=shoutout),size=2,color="red")
854
datapc <- data.frame(varnames=rownames(PC$rotation), PC$rotation)
856
(max(dat[,y]) - min(dat[,y])/(max(datapc[,y])-min(datapc[,y]))),
857
(max(dat[,x]) - min(dat[,x])/(max(datapc[,x])-min(datapc[,x])))
859
datapc <- transform(datapc,
860
v1 = .7 * mult * (get(x)),
861
v2 = .7 * mult * (get(y))
865
geom_text(data=datapc, aes(x=v1, y=v2, label=varnames), size = 3, vjust=1, color="red")
866
plot <- plot + geom_segment(data=datapc, aes(x=0, y=0, xend=v1, yend=v2), arrow=arrow(length=unit(0.2,"cm")), alpha=0.75, color="red") + theme_bw()
870
setMethod('PCAplot',signature(object="CuffData"),.PCAplot)
872
#This is most definitely a work in progress
873
.confidencePlot<-function(object,percentCutoff=20){
874
res<-.statsMatrix(object)
875
#res$CV[is.na(res$CV)]<-0.0
876
res$CV<-as.numeric(res$CV)
878
p<-p + geom_point(aes(x=log10(fpkm),y=log10(CV)),size=1.5,alpha=0.3)
879
p<-p + geom_smooth(aes(x=log10(fpkm),y=log10(CV),color=sample_name)) +
880
#p<-p + geom_point(aes(x=log10(fpkm),y=log10(count),color=-log10(CV))) +
881
#p<-p + geom_point(aes(x=log10(fpkm),y=log2(fpkm/count),color=-log10(CV))) + geom_hline(aes(0),linetype=2) +
882
#facet_wrap('sample_name') +
883
geom_abline(intercept=0,slope=1,linetype=2,size=0.3) +
884
scale_color_gradient(name="%CV",low="darkblue",high="white",limits=c(0,percentCutoff), na.value = "white") +
885
labs(title=object@type)
889
.CVdensity<-function(object){
890
dat<-.statsMatrix(object)
892
p <- p + geom_density(aes(x=CV,fill=sample_name),alpha=0.3,position="dodge") + scale_x_log10()
896
.fpkmSCVPlot<-function(object,FPKMLowerBound=1){
898
colnames(dat)[1]<-"tracking_id"
899
dat<-dat[,c('tracking_id','sample_name','fpkm')]
900
dat<-dat[dat$fpkm>0,]
902
#Option 3 (tapply on log10(replicateFPKM) values)
903
dat.means<-tapply(dat$fpkm,dat[,c('tracking_id','sample_name')],function(x){mean(x,na.rm=T)})
904
dat.sd<-tapply(dat$fpkm,dat[,c('tracking_id','sample_name')],function(x){sd(x,na.rm=T)})
905
#write("Calculating replicate fpkm mean...",stderr())
906
dat.means<-melt(dat.means)
907
#write("Calculating replicate fpkm stdev...",stderr())
909
colnames(dat.means)[colnames(dat.means)=="value"]<-'fpkm'
910
colnames(dat.sd)[colnames(dat.sd)=="value"]<-'stdev'
911
dat<-cbind(dat.means,dat.sd$stdev)
912
colnames(dat)[colnames(dat)=="dat.sd$stdev"]<-'stdev'
913
dat<-dat[!is.na(dat$stdev) & !is.na(dat$fpkm),]
914
dat<-dat[dat$fpkm>0 & dat$stdev>0,]
915
dat$sample_name<-factor(dat$sample_name,levels=samples(object))
917
p <-ggplot(dat,aes(x=fpkm,y=(stdev/fpkm)^2),na.rm=T)
918
#p <-ggplot(dat,aes(x=log10(fpkm+1),y=log10(stdev)),na.rm=T)
919
p <- p + #geom_point(aes(color=sample_name),size=1,na.rm=T) +
920
stat_smooth(aes(color=sample_name,fill=sample_name),na.rm=T,method='auto',fullrange=T) +
922
scale_y_continuous(name=bquote(CV^2)) +
923
xlab(bquote(paste(log[10],"FPKM",sep=" "))) +
924
theme_bw() + xlim(c(log10(FPKMLowerBound),max(log10(dat$fpkm)))) + labs(title=object@type)
929
setMethod("fpkmSCVPlot",signature(object="CuffData"),.fpkmSCVPlot)
931
.sensitivityPlot<-function(object){
935
#TODO:Log2FC vs Test-statistic
937
#TODO:log2FPKM vs log2(stdev) (color by sample)
939
#TODO: Index of dispersion alpha (FPKM vs ?)
940
#SELECT gd.*, (gd.conf_hi-gd.fpkm)/2 as fpkm_stdev, gc.count, gc.variance as count_variance , gc.uncertainty, gc.dispersion, ((gd.conf_hi-gd.fpkm)/2)/gd.fpkm AS 'CV' FROM geneData gd LEFT JOIN geneCount gc ON gd.gene_id=gc.gene_id AND gd.sample_name=gc.sample_name;
415
965
setMethod("csSpecificity",signature(object="CuffData"),.specificity)
968
# GSEA helper methods
971
.makeRnk<-function(object,x,y,filename,...){
972
#Creates a log2-fold change .rnk file for all genes given an x/y comparison.
973
#While this method will work for any cuffData level, it really only makes sense for 'genes' as this is what is required for GSEA...
974
#Must provide 'x' and 'y' so as to provide a log2 fold change.
975
samp<-samples(object)
976
#check to make sure x and y are in samples
977
if (!all(c(x,y) %in% samp)){
978
stop("One or more values of 'x' or 'y' are not valid sample names!")
980
query<-paste("SELECT gd.gene_id,g.gene_short_name,(sum(CASE WHEN gd.sample_name='",x,"' THEN gd.fpkm+1 END))/(sum(CASE WHEN gd.sample_name='",y,"' THEN gd.fpkm+1 END)) as 'ratio' FROM genes g LEFT JOIN geneData gd ON g.gene_id=gd.gene_id GROUP BY g.gene_id ORDER BY ratio DESC",sep="")
981
res<-dbGetQuery(object@DB,query)
982
res$ratio<-log2(res$ratio)
983
#Remove gene_id field
985
#Remove rows with "NA" for gene_short_name
986
res<-res[!is.na(res$gene_short_name),]
988
write.table(res,file=filename,sep="\t",quote=F,...,row.names=F,col.names=F)
992
setMethod("makeRnk",signature(object="CuffData"),.makeRnk)
998
.checkSamples<-function(dbConn,sampleIdList){
999
dbSamples<-dbReadTable(dbConn,"samples")
1000
if (all(sampleIdList %in% dbSamples$sample_name)){
1007
.checkReps<-function(dbConn,repIdList){
1008
dbReps<-dbReadTable(dbConn,"replicates")
1009
if (all(repIdList %in% dbReps$rep_name)){
1025
#.volcanoMatrix <- function(data){
1026
# densities <- do.call('rbind',lapply(1:))
1027
# p <-ggplot(data) + facet_grid(sample1~sample2,scales="free") + geom_point(aes(x=log2_fold_change,y=-log10(p_value))) + stat_density(aes(x = log2_fold_change,
1028
# y = ..scaled.. * diff(range(log2_fold_change)) + min(log2_fold_change)), data = densities,
1029
# position = "identity", colour = "grey20", geom = "line")