1
### =========================================================================
2
### GappedAlignmentPairs objects
3
### -------------------------------------------------------------------------
6
### TODO: Implement a GappedAlignmentList class (CompressedList subclass)
7
### and derive GappedAlignmentPairs from it.
9
### "first" and "last" GappedAlignments must have identical seqinfo.
10
setClass("GappedAlignmentPairs",
13
NAMES="characterORNULL", # R doesn't like @names !!
14
first="GappedAlignments", # of length N, no names, no elt metadata
15
last="GappedAlignments", # of length N, no names, no elt metadata
16
isProperPair="logical", # of length N
17
elementMetadata="DataFrame" # N rows
20
elementType="GappedAlignments"
25
### length(x) - single integer N. Nb of pairs in 'x'.
26
### names(x) - NULL or character vector.
27
### first(x) - returns "first" slot.
28
### last(x) - returns "last" slot.
29
### left(x) - GappedAlignments made of the "left alignments" (if first
30
### alignment is on + strand then it's considered to be the
31
### "left alignment", otherwise, it's considered the "right
33
### right(x) - GappedAlignments made of the "right alignments".
34
### The strand of the last alignments is inverted before they
35
### are stored in the GappedAlignments returned by left(x) or
37
### seqnames(x) - same as 'seqnames(first(x))' or 'seqnames(last(x))'.
38
### strand(x) - same as 'strand(first(x))' (opposite of 'strand(last(x))').
39
### ngap(x) - same as 'ngap(first(x)) + ngap(last(x))'.
40
### isProperPair(x) - returns "isProperPair" slot.
41
### seqinfo(x) - returns 'seqinfo(first(x))' (same as 'seqinfo(last(x))').
42
### granges(x) - GRanges object of the same length as 'x'.
43
### grglist(x) - GRangesList object of the same length as 'x'.
44
### introns(x) - Extract the N gaps in a GRangesList object of the same
46
### show(x) - compact display in a data.frame-like fashion.
47
### GappedAlignmentPairs(x) - constructor.
48
### x[i] - GappedAlignmentPairs object of the same class as 'x'
52
setGeneric("first", function(x, ...) standardGeneric("first"))
53
setGeneric("last", function(x, ...) standardGeneric("last"))
54
setGeneric("left", function(x, ...) standardGeneric("left"))
55
setGeneric("right", function(x, ...) standardGeneric("right"))
56
setGeneric("isProperPair", function(x) standardGeneric("isProperPair"))
59
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63
setMethod("length", "GappedAlignmentPairs",
64
function(x) length(x@first)
67
setMethod("names", "GappedAlignmentPairs",
71
setMethod("first", "GappedAlignmentPairs",
72
function(x, invert.strand=FALSE)
74
if (!isTRUEorFALSE(invert.strand))
75
stop("'invert.strand' must be TRUE or FALSE")
76
ans <- setNames(x@first, names(x))
78
ans <- invertRleStrand(ans)
83
setMethod("last", "GappedAlignmentPairs",
84
function(x, invert.strand=FALSE)
86
if (!isTRUEorFALSE(invert.strand))
87
stop("'invert.strand' must be TRUE or FALSE")
88
ans <- setNames(x@last, names(x))
90
ans <- invertRleStrand(ans)
95
setMethod("left", "GappedAlignmentPairs",
99
x_last <- invertRleStrand(x@last)
101
left_is_last <- which(strand(x_first) == "-")
102
idx <- seq_len(length(x))
103
idx[left_is_last] <- idx[left_is_last] + length(x)
105
ans <- c(x_first, x_last)[idx]
106
setNames(ans, names(x))
110
setMethod("right", "GappedAlignmentPairs",
114
x_last <- invertRleStrand(x@last)
116
right_is_first <- which(strand(x_first) == "-")
117
idx <- seq_len(length(x))
118
idx[right_is_first] <- idx[right_is_first] + length(x)
120
ans <- c(x_last, x_first)[idx]
121
setNames(ans, names(x))
125
setMethod("seqnames", "GappedAlignmentPairs",
126
function(x) seqnames(x@first)
129
setMethod("strand", "GappedAlignmentPairs",
130
function(x) strand(x@first)
133
setMethod("ngap", "GappedAlignmentPairs",
134
function(x) {ngap(x@first) + ngap(x@last)}
137
setMethod("isProperPair", "GappedAlignmentPairs",
138
function(x) x@isProperPair
141
setMethod("seqinfo", "GappedAlignmentPairs",
142
function(x) seqinfo(x@first)
146
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
150
setReplaceMethod("names", "GappedAlignmentPairs",
154
value <- as.character(value)
161
setReplaceMethod("strand", "GappedAlignmentPairs",
164
same_strand <- strand(x@first) == strand(x@last)
165
## Set the first strand.
166
strand(x@first) <- value
167
## Then set the last strand to preserve the original relationship
168
## between first and last strand (i.e. if they were the same, they
169
## remain the same, if they were opposite, they remain opposite).
170
strand(x@last) <- strand(same_strand == (strand(x@first) == "-"))
175
setReplaceMethod("elementMetadata", "GappedAlignmentPairs",
176
function(x, ..., value)
178
x@elementMetadata <- mk_elementMetadataReplacementValue(x, value)
183
setReplaceMethod("seqinfo", "GappedAlignmentPairs",
184
function(x, new2old=NULL, force=FALSE, value)
186
if (!is(value, "Seqinfo"))
187
stop("the supplied 'seqinfo' must be a Seqinfo object")
190
dangling_seqlevels_in_first <- getDanglingSeqlevels(first,
191
new2old=new2old, force=force,
193
dangling_seqlevels_in_last <- getDanglingSeqlevels(last,
194
new2old=new2old, force=force,
196
dangling_seqlevels <- union(dangling_seqlevels_in_first,
197
dangling_seqlevels_in_last)
198
if (length(dangling_seqlevels) != 0L) {
199
dropme_in_first <- seqnames(first) %in% dangling_seqlevels_in_first
200
dropme_in_last <- seqnames(last) %in% dangling_seqlevels_in_last
201
dropme <- dropme_in_first | dropme_in_last
204
seqinfo(x@first, new2old=new2old) <- value
205
seqinfo(x@last, new2old=new2old) <- value
211
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
215
.valid.GappedAlignmentPairs.names <- function(x)
218
if (is.null(x_names))
220
if (!is.character(x_names) || !is.null(attributes(x_names))) {
221
msg <- c("'names(x)' must be NULL or a character vector ",
222
"with no attributes")
223
return(paste(msg, collapse=""))
225
if (length(x_names) != length(x))
226
return("'names(x)' and 'x' must have the same length")
230
.valid.GappedAlignmentPairs.first <- function(x)
233
if (class(x_first) != "GappedAlignments")
234
return("'x@first' must be a GappedAlignments instance")
238
.valid.GappedAlignmentPairs.last <- function(x)
241
if (class(x_last) != "GappedAlignments")
242
return("'x@last' must be a GappedAlignments instance")
244
if (length(x_last) != length(x_first))
245
return("'x@last' and 'x@first' must have the same length")
246
if (!identical(seqinfo(x_last), seqinfo(x_first)))
247
return("'seqinfo(x@last)' and 'seqinfo(x@first)' must be identical")
251
.valid.GappedAlignmentPairs.isProperPair <- function(x)
253
x_isProperPair <- x@isProperPair
254
if (!is.logical(x_isProperPair) || !is.null(attributes(x_isProperPair))) {
255
msg <- c("'x@isProperPair' must be a logical vector ",
256
"with no attributes")
257
return(paste(msg, collapse=""))
259
if (length(x_isProperPair) != length(x))
260
return("'x@isProperPair' and 'x' must have the same length")
261
if (IRanges:::anyMissing(x_isProperPair))
262
return("'x@isProperPair' cannot contain NAs")
266
.valid.GappedAlignmentPairs <- function(x)
268
c(.valid.GappedAlignmentPairs.names(x),
269
.valid.GappedAlignmentPairs.first(x),
270
.valid.GappedAlignmentPairs.last(x),
271
.valid.GappedAlignmentPairs.isProperPair(x))
274
setValidity2("GappedAlignmentPairs", .valid.GappedAlignmentPairs,
275
where=asNamespace("GenomicRanges"))
278
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
282
GappedAlignmentPairs <- function(first, last, isProperPair, names=NULL)
284
new2("GappedAlignmentPairs",
286
first=first, last=last,
287
isProperPair=isProperPair,
288
elementMetadata=new("DataFrame", nrows=length(first)),
292
readGappedAlignmentPairs <- function(file, format="BAM", use.names=FALSE, ...)
294
if (!isSingleString(file))
295
stop("'file' must be a single string")
296
if (!isSingleString(format))
297
stop("'format' must be a single string")
298
if (!isTRUEorFALSE(use.names))
299
stop("'use.names' must be TRUE or FALSE")
300
if (format == "BAM") {
301
suppressMessages(library("Rsamtools"))
302
ans <- readBamGappedAlignmentPairs(file=file, use.names=use.names, ...)
305
stop("only BAM format is supported at the moment")
309
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
313
setMethod("[", "GappedAlignmentPairs",
314
function(x, i, j, ... , drop=TRUE)
316
if (!missing(j) || length(list(...)) > 0L)
317
stop("invalid subsetting")
320
i <- IRanges:::normalizeSingleBracketSubscript(i, x)
321
x@NAMES <- x@NAMES[i]
322
x@first <- x@first[i]
324
x@isProperPair <- x@isProperPair[i]
325
x@elementMetadata <- x@elementMetadata[i, , drop=FALSE]
331
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
335
### TODO: Remove the "[[" method below after the definition of the
336
### GappedAlignmentPairs class is changed to derive from CompressedList.
337
### (The "[[" method for CompressedList objects should do just fine i.e. it
338
### should do something like x@unlistData[x@partitioning[[i]]] and that
339
### should be optimal.)
340
.GappedAlignmentPairs.getElement <- function(x, i)
342
c(x@first[i], x@last[i])
345
setMethod("[[", "GappedAlignmentPairs",
346
function(x, i, j, ... , drop=TRUE)
348
if (missing(i) || !missing(j) || length(list(...)) > 0L)
349
stop("invalid subsetting")
350
i <- IRanges:::checkAndTranslateDbleBracketSubscript(x, i)
351
.GappedAlignmentPairs.getElement(x, i)
355
.makePickupIndex <- function(N)
357
ans <- rep(seq_len(N), each=2L)
358
if (length(ans) != 0L)
359
ans[c(FALSE, TRUE)] <- ans[c(FALSE, TRUE)] + N
363
### TODO: Remove this method after the definition of the GappedAlignmentPairs
364
### class is changed to derive from CompressedList.
365
setMethod("unlist", "GappedAlignmentPairs",
366
function(x, recursive=TRUE, use.names=TRUE)
368
if (!isTRUEorFALSE(use.names))
369
stop("'use.names' must be TRUE or FALSE")
372
ans <- c(x_first, x_last)[.makePickupIndex(length(x))]
374
names(ans) <- rep(names(x), each=2L)
380
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
384
### Shrink CompressedList 'x' (typically a GRangesList) by half by combining
385
### pairs of consecutive top-level elements.
386
.shrinkByHalf <- function(x)
388
if (length(x) %% 2L != 0L)
389
stop("'x' must have an even length")
390
x_elt_lens <- elementLengths(x)
391
if (length(x_elt_lens) == 0L) {
392
ans_nelt1 <- ans_nelt2 <- integer(0)
394
ans_nelt1 <- x_elt_lens[c(TRUE, FALSE)]
395
ans_nelt2 <- x_elt_lens[c(FALSE, TRUE)]
397
ans_elt_lens <- ans_nelt1 + ans_nelt2
398
ans_partitioning <- PartitioningByEnd(cumsum(ans_elt_lens))
399
ans <- relist(x@unlistData, ans_partitioning)
400
mcols(ans) <- DataFrame(nelt1=ans_nelt1, nelt2=ans_nelt2)
404
setMethod("grglist", "GappedAlignmentPairs",
405
function(x, order.as.in.query=FALSE, drop.D.ranges=FALSE)
407
if (!isTRUEorFALSE(order.as.in.query))
408
stop("'order.as.in.query' must be TRUE or FALSE")
410
if ("query.break" %in% colnames(x_mcols))
411
stop("'mcols(x)' cannot have reserved column \"query.break\"")
413
x_last <- invertRleStrand(x@last)
414
## Not the same as doing 'unlist(x, use.names=FALSE)'.
415
x_unlisted <- c(x_first, x_last)[.makePickupIndex(length(x))]
416
grl <- grglist(x_unlisted,
417
order.as.in.query=TRUE,
418
drop.D.ranges=drop.D.ranges)
419
ans <- .shrinkByHalf(grl)
420
ans_nelt1 <- mcols(ans)$nelt1
421
if (!order.as.in.query) {
422
ans_nelt2 <- mcols(ans)$nelt2
423
## Yes, we reorder *again* when 'order.as.in.query' is FALSE.
424
i <- which(strand(x) == "-")
425
ans <- revElements(ans, i)
426
ans_nelt1[i] <- ans_nelt2[i]
428
names(ans) <- names(x)
429
x_mcols$query.break <- ans_nelt1
430
mcols(ans) <- x_mcols
435
setMethod("granges", "GappedAlignmentPairs",
438
rg <- range(grglist(x))
439
if (!all(elementLengths(rg) == 1L))
440
stop("For some pairs in 'x', the first and last alignments ",
441
"are not aligned to the same chromosome and strand. ",
442
"Cannot extract a single range for them.")
447
setMethod("introns", "GappedAlignmentPairs",
450
first_introns <- introns(x@first)
451
last_introns <- introns(invertRleStrand(x@last))
452
## Fast way of doing mendoapply(c, first_introns, last_introns)
453
## on 2 CompressedList objects.
454
ans <- c(first_introns, last_introns)
455
ans <- ans[.makePickupIndex(length(x))]
456
ans <- .shrinkByHalf(ans)
462
setAs("GappedAlignmentPairs", "GRangesList", function(from) grglist(from))
463
setAs("GappedAlignmentPairs", "GRanges", function(from) granges(from))
466
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
469
### Not exported. Used in the SplicingGraphs package.
472
fillGaps <- function(x)
474
if (!is(x, "GRangesList"))
475
stop("'x' must be a GRangesList object")
476
query.breaks <- mcols(x)$query.break
477
if (is.null(query.breaks))
478
stop("'x' must be a GRangesList object with a \"query.breaks\" ",
480
offsets <- end(x@partitioning)
482
offsets <- c(0L, offsets[-length(offsets)])
483
idx <- IRanges:::fancy_mseq(query.breaks, offsets)
484
half1_partitioning <- PartitioningByEnd(cumsum(query.breaks))
485
half1 <- relist(x@unlistData[idx], half1_partitioning)
486
half1 <- range(half1)@unlistData
487
half2_eltlens <- elementLengths(x) - query.breaks
488
half2_partitioning <- PartitioningByEnd(cumsum(half2_eltlens))
489
half2 <- relist(x@unlistData[-idx], half2_partitioning)
490
half2 <- range(half2)@unlistData
491
idx <- .makePickupIndex(length(x))
492
ans_unlistData <- c(half1, half2)[idx]
493
ans_partitioning <- PartitioningByEnd(2L * seq_along(x),
495
relist(ans_unlistData, ans_partitioning)
499
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
503
.makeNakedMatFromGappedAlignmentPairs <- function(x)
507
pair_cols <- cbind(seqnames=as.character(seqnames(x)),
508
strand=as.character(strand(x)))
510
first_cols <- cbind(ranges=IRanges:::showAsCell(ranges(x_first)))
512
last_cols <- cbind(ranges=IRanges:::showAsCell(ranges(x_last)))
513
ans <- cbind(pair_cols,
514
`:`=rep.int(":", lx),
516
`--`=rep.int("--", lx),
519
tmp <- do.call(data.frame, lapply(mcols(x),
520
IRanges:::showAsCell))
521
ans <- cbind(ans, `|`=rep.int("|", lx), as.matrix(tmp))
526
showGappedAlignmentPairs <- function(x, margin="",
527
with.classinfo=FALSE,
528
print.seqlengths=FALSE)
532
cat(class(x), " with ",
533
lx, " alignment ", ifelse(lx == 1L, "pair", "pairs"),
535
nc, " metadata ", ifelse(nc == 1L, "column", "columns"),
537
out <- makePrettyMatrixForCompactPrinting(x,
538
.makeNakedMatFromGappedAlignmentPairs)
539
if (with.classinfo) {
540
.PAIR_COL2CLASS <- c(
544
.HALVES_COL2CLASS <- c(
547
.COL2CLASS <- c(.PAIR_COL2CLASS,
552
classinfo <- makeClassinfoRowForCompactPrinting(x, .COL2CLASS)
553
## A sanity check, but this should never happen!
554
stopifnot(identical(colnames(classinfo), colnames(out)))
555
out <- rbind(classinfo, out)
558
rownames(out) <- paste0(margin, rownames(out))
559
print(out, quote=FALSE, right=TRUE)
560
if (print.seqlengths) {
561
cat(margin, "---\n", sep="")
562
showSeqlengths(x, margin=margin)
566
setMethod("show", "GappedAlignmentPairs",
568
showGappedAlignmentPairs(object,
569
with.classinfo=TRUE, print.seqlengths=TRUE)
573
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
577
### TODO: Support 'use.names=TRUE'.
578
unlist_list_of_GappedAlignmentPairs <- function(x, use.names=TRUE,
582
stop("'x' must be a list")
583
if (!isTRUEorFALSE(use.names))
584
stop("'use.names' must be TRUE or FALSE")
586
stop("'use.names=TRUE' is not supported yet")
587
if (!isTRUEorFALSE(ignore.mcols))
588
stop("'ignore.mcols' must be TRUE or FALSE")
590
## TODO: Implement (in C) fast elementIsNull(x) in IRanges, that does
591
## 'sapply(x, is.null)' on list 'x', and use it here.
592
null_idx <- which(sapply(x, is.null))
593
if (length(null_idx) != 0L)
596
return(new("GappedAlignmentPairs"))
598
if (!is(x1, "GappedAlignmentPairs"))
599
stop("first non-NULL element in 'x' must be a GappedAlignmentPairs object")
602
## TODO: Implement (in C) fast elementIs(x, class) in IRanges, that does
603
## 'sapply(x, is, class)' on list 'x', and use it here.
604
## 'elementIs(x, "NULL")' should work and be equivalent to
605
## 'elementIsNull(x)'.
607
if (!all(sapply(x, is, class1)))
608
stop("all elements in 'x' must be ", class1, " objects (or NULLs)")
612
## Combine "NAMES" slots.
613
NAMES_slots <- lapply(x, function(xi) xi@NAMES)
614
## TODO: Use elementIsNull() here when it becomes available.
615
has_no_names <- sapply(NAMES_slots, is.null)
616
if (all(has_no_names)) {
619
noname_idx <- which(has_no_names)
620
if (length(noname_idx) != 0L)
621
NAMES_slots[noname_idx] <- lapply(elementLengths(x[noname_idx]),
623
ans_NAMES <- unlist(NAMES_slots, use.names=FALSE)
626
## Combine "first" slots.
627
first_slots <- lapply(x, function(xi) xi@first)
628
ans_first <- unlist_list_of_GappedAlignments(first_slots, use.names=FALSE,
629
ignore.mcols=ignore.mcols)
631
## Combine "last" slots.
632
last_slots <- lapply(x, function(xi) xi@last)
633
ans_last <- unlist_list_of_GappedAlignments(last_slots, use.names=FALSE,
634
ignore.mcols=ignore.mcols)
636
## Combine "isProperPair" slots.
637
isProperPair_slots <- lapply(x, function(xi) xi@isProperPair)
638
ans_isProperPair <- unlist(isProperPair_slots, use.names=FALSE)
640
## Combine "mcols" slots. We don't need to use fancy
641
## IRanges:::rbind.mcols() for this because the "mcols" slot of a
642
## GappedAlignmentPairs object is guaranteed to be a DataFrame.
644
ans_mcols <- new("DataFrame", nrows=length(ans_first))
646
mcols_slots <- lapply(x, function(xi) xi@elementMetadata)
647
## Will fail if not all the GappedAlignmentPairs objects in 'x' have
648
## exactly the same metadata cols.
649
ans_mcols <- do.call(rbind, mcols_slots)
652
## Make 'ans' and return it.
653
new(class(x1), NAMES=ans_NAMES,
656
isProperPair=ans_isProperPair,
657
elementMetadata=ans_mcols)
660
setMethod("c", "GappedAlignmentPairs",
661
function(x, ..., ignore.mcols=FALSE, recursive=FALSE)
663
if (!identical(recursive, FALSE))
664
stop("\"c\" method for GappedAlignmentPairs objects ",
665
"does not support the 'recursive' argument")
667
args <- unname(list(...))
669
args <- unname(list(x, ...))
671
unlist_list_of_GappedAlignmentPairs(args, use.names=FALSE,
672
ignore.mcols=ignore.mcols)