1
### R code from vignette source 'summarizeOverlaps.Rnw'
3
###################################################
4
### code chunk number 1: style
5
###################################################
9
###################################################
10
### code chunk number 2: options
11
###################################################
13
options("showHeadLines" = 3)
14
options("showTailLines" = 3)
17
###################################################
18
### code chunk number 3: firstExample
19
###################################################
24
fls <- list.files(system.file("extdata",package="GenomicRanges"),
25
recursive=TRUE, pattern="*bam$", full=TRUE)
26
bfl <- BamFileList(fls, index=character())
29
seqnames = c(rep("chr2L", 4), rep("chr2R", 5), rep("chr3L", 2)),
30
ranges = IRanges(c(1000, 3000, 4000, 7000, 2000, 3000, 3600, 4000,
31
7500, 5000, 5400), width=c(rep(500, 3), 600, 900, 500, 300, 900,
33
group_id=c(rep("A", 4), rep("B", 5), rep("C", 2)))
35
olap <- summarizeOverlaps(features, bfl)
36
deseq <- newCountDataSet(assays(olap)$counts, rownames(colData(olap)))
37
edger <- DGEList(assays(olap)$counts, group=rownames(colData(olap)))
40
###################################################
41
### code chunk number 4: simple
42
###################################################
43
rd <- GAlignments("a", seqnames = Rle("chr1"), pos = as.integer(100),
44
cigar = "300M", strand = strand("+"))
46
gr1 <- GRanges("chr1", IRanges(start=50, width=150), strand="+")
47
gr2 <- GRanges("chr1", IRanges(start=350, width=150), strand="+")
50
###################################################
51
### code chunk number 5: simpleGRanges
52
###################################################
54
data.frame(union = assays(summarizeOverlaps(gr, rd))$counts,
55
intStrict = assays(summarizeOverlaps(gr, rd,
56
mode="IntersectionStrict"))$counts,
57
intNotEmpty = assays(summarizeOverlaps(gr, rd,
58
mode="IntersectionNotEmpty"))$counts)
61
###################################################
62
### code chunk number 6: simpleGRangesList
63
###################################################
64
grl <- GRangesList(c(gr1, gr2))
65
data.frame(union = assays(summarizeOverlaps(grl, rd))$counts,
66
intStrict = assays(summarizeOverlaps(grl, rd,
67
mode="IntersectionStrict"))$counts,
68
intNotEmpty = assays(summarizeOverlaps(grl, rd,
69
mode="IntersectionNotEmpty"))$counts)
72
###################################################
73
### code chunk number 7: data
74
###################################################
75
group_id <- c("A", "B", "C", "C", "D", "D", "E", "F", "G", "G", "H", "H")
77
seqnames = Rle(c("chr1", "chr2", "chr1", "chr1", "chr2", "chr2",
78
"chr1", "chr1", "chr2", "chr2", "chr1", "chr1")),
79
strand = strand(rep("+", length(group_id))),
81
start=c(1000, 2000, 3000, 3600, 7000, 7500, 4000, 4000, 3000, 3350, 5000, 5400),
82
width=c(500, 900, 500, 300, 600, 300, 500, 900, 150, 200, 500, 500)),
87
names = c("a","b","c","d","e","f","g"),
88
seqnames = Rle(c(rep(c("chr1", "chr2"), 3), "chr1")),
89
pos = as.integer(c(1400, 2700, 3400, 7100, 4000, 3100, 5200)),
90
cigar = c("500M", "100M", "300M", "500M", "300M", "50M200N50M", "50M150N50M"),
91
strand = strand(rep.int("+", 7L)))
95
###################################################
96
### code chunk number 8: GRanges
97
###################################################
98
data.frame(union = assays(summarizeOverlaps(features, reads))$counts,
99
intStrict = assays(summarizeOverlaps(features, reads,
100
mode="IntersectionStrict"))$counts,
101
intNotEmpty = assays(summarizeOverlaps(features, reads,
102
mode="IntersectionNotEmpty"))$counts)
105
###################################################
106
### code chunk number 9: lst
107
###################################################
108
lst <- split(features, mcols(features)[["group_id"]])
112
###################################################
113
### code chunk number 10: GRangesList
114
###################################################
115
data.frame(union = assays(summarizeOverlaps(lst, reads))$counts,
116
intStrict = assays(summarizeOverlaps(lst, reads,
117
mode="IntersectionStrict"))$counts,
118
intNotEmpty = assays(summarizeOverlaps(lst, reads,
119
mode="IntersectionNotEmpty"))$counts)
122
###################################################
123
### code chunk number 11: gff (eval = FALSE)
124
###################################################
125
## library(rtracklayer)
126
## fl <- paste0("ftp://ftp.ensembl.org/pub/release-62/",
127
## "gtf/drosophila_melanogaster/",
128
## "Drosophila_melanogaster.BDGP5.25.62.gtf.gz")
129
## gffFile <- file.path(tempdir(), basename(fl))
130
## download.file(fl, gffFile)
131
## gff0 <- import(gffFile, asRangedData=FALSE)
134
###################################################
135
### code chunk number 12: gff_parse (eval = FALSE)
136
###################################################
137
## idx <- mcols(gff0)$source == "protein_coding" &
138
## mcols(gff0)$type == "exon" &
139
## seqnames(gff0) == "4"
141
## ## adjust seqnames to match Bam files
142
## seqlevels(gff) <- paste("chr", seqlevels(gff), sep="")
143
## chr4genes <- split(gff, mcols(gff)$gene_id)
146
###################################################
147
### code chunk number 13: pasilla_param
148
###################################################
149
param <- ScanBamParam(
151
which=GRanges("chr4", IRanges(1, 1e6)),
152
flag=scanBamFlag(isUnmappedQuery=FALSE, isPaired=NA),
156
###################################################
157
### code chunk number 14: pasilla_count (eval = FALSE)
158
###################################################
159
## fls <- c("treated1.bam", "untreated1.bam", "untreated2.bam")
160
## path <- "pathToBAMFiles"
161
## bamlst <- BamFileList(fls, index=character())
162
## genehits <- summarizeOverlaps(chr4genes, bamlst, mode="Union")
165
###################################################
166
### code chunk number 15: pasilla_exoncountset (eval = FALSE)
167
###################################################
168
## expdata = new("MIAME",
169
## name="pasilla knockdown",
170
## lab="Genetics and Developmental Biology, University of
171
## Connecticut Health Center",
172
## contact="Dr. Brenton Graveley",
173
## title="modENCODE Drosophila pasilla RNA Binding Protein RNAi
174
## knockdown RNA-Seq Studies",
175
## url="http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GSE18508",
176
## abstract="RNA-seq of 3 biological replicates of from the Drosophila
177
## melanogaster S2-DRSC cells that have been RNAi depleted of mRNAs
178
## encoding pasilla, a mRNA binding protein and 4 biological replicates
179
## of the the untreated cell line.")
180
## pubMedIds(expdata) <- "20921232"
182
## design <- data.frame(
183
## condition=c("treated", "untreated", "untreated"),
184
## replicate=c(1,1,2),
185
## type=rep("single-read", 3),
186
## countfiles=path(colData(genehits)[,1]), stringsAsFactors=TRUE)
188
## geneCDS <- newCountDataSet(
189
## countData=assays(genehits)$counts,
190
## conditions=design)
192
## experimentData(geneCDS) <- expdata
193
## sampleNames(geneCDS) = colnames(genehits)
196
###################################################
197
### code chunk number 16: pasilla_genes (eval = FALSE)
198
###################################################
199
## chr4tx <- split(gff, mcols(gff)$transcript_id)
200
## txhits <- summarizeOverlaps(chr4tx, bamlst)
201
## txCDS <- newCountDataSet(assays(txhits)$counts, design)
202
## experimentData(txCDS) <- expdata