524
524
merge(x, y)[intersect(seqnames(x), seqnames(y))]
527
## Long comment by Marc
528
## --------------------
530
## To make this work I need to do some kind of 'fuzzy matching'. What that means mostly is just that a lot of things DO NOT MATTER HERE. For example, I don't care if I have the organism wrong for this getter! Not only am I not returning the organism, but I only need to match to *A* seqnameStyle such that all the things in my seqinfo are accounted for. Also, it doesn't matter if my seqnameStyle is not the perfect one, because I only need one that works for all the fields that the user has in their seqinfo.
532
## But what are "all" the things in my seqinfo? Because sometimes things are in there that the user does NOT need renamed (I don't want to try to match those!). How do I know which things are the ones I have to match? In the case of determineDefaultSeqnameStyle() I was able to rely on the fact that all the fields in the DB would have to be present in the incoming data, but now I don't have the luxury of that test. I no longer know which things are the ones I need to be matching. Maybe this too doesn't really matter. Those other things are NEVER going to match, so all I really need is the best match? But that won't quite cut it since my best match might kind of suck. Suppose that there really is no good match in the DB, shouldn't the user be told that???
534
## proposed strategy:
535
## 1) Go down the seqnames vector and find things that match NOTHING in the DB. Throw those things away. We don't want to even try to match those. If the vector is not length 0, then error out.
536
## 2) Find the best match you can of the remaining things. If there are multiple best matches, keep the 1st one of them and return the name of that seqnameStyle. If the set of things that each have the same number of best matches have "match indices" that are not identical, then issue a warning.
538
.getSeqnameStyle <- function(x)
540
## We load AnnotationDbi to get supportedSeqnames() and
541
## listAllSupportedSeqnameStyles().
542
library(AnnotationDbi)
543
seqnames <- seqnames(x)
544
## Get vector of ALL possible chromosome names
545
allNames <- AnnotationDbi::supportedSeqnames()
546
## compare to seqnames and filter out things that can never match
547
seqnames <- seqnames[seqnames %in% allNames]
548
if(length(seqnames)==0){
549
stop("It is not possibe to match the seqnameStyle of this object to the seqnames.db database")
551
## now compare to all, one style at a time.
552
allseqnameStyles <- AnnotationDbi::listAllSupportedSeqnameStyles()
553
## (get match indices for each style)
554
.getIndices <- function(style, seqnames){
555
match(seqnames, style)
557
indices <- lapply(allseqnameStyles,.getIndices,seqnames=seqnames)
558
## I need to count the number that have some match (are not NA)
559
cnts <- unlist(lapply(indices, function(x){x<-x[!is.na(x)];length(x)}))
560
## now I can calculate the result
561
res <- names(cnts)[cnts==max(cnts)]
562
res <- unique(sub("^.*__","",res,perl=TRUE))
563
## if things are matching poorly in a way that may be consequential give a
565
if(length(res) > 1 && length(cnts[cnts==max(cnts)]) > 1){
566
## more than one match means things are ambiguous
567
warning("The seqnames for this object matches multiple different seqname styles equally well. You may wish to verify that the seqnames.db database contains records that match your data.")
572
setMethod("seqnameStyle", "Seqinfo", .getSeqnameStyle)
574
setReplaceMethod("seqnameStyle", "Seqinfo",
577
## We load AnnotationDbi to get findSequenceRenamingMaps().
578
library(AnnotationDbi)
579
x_seqnames <- seqnames(x)
580
renaming_maps <- AnnotationDbi::findSequenceRenamingMaps(x_seqnames,
583
if (nrow(renaming_maps) == 0L) {
584
msg <- c("found no sequence renaming map compatible ",
585
"with seqname style \"", value, "\" for this object")
588
## Use 1st best renaming map.
589
if (nrow(renaming_maps) != 1L) {
590
msg <- c("found more than one best sequence renaming map ",
591
"compatible with seqname style \"", value, "\" for ",
592
"this object, using the first one")
594
renaming_maps <- renaming_maps[1L, , drop=FALSE]
596
new_seqnames <- as.vector(renaming_maps)
597
na_idx <- which(is.na(new_seqnames))
598
new_seqnames[na_idx] <- x_seqnames[na_idx]
599
seqnames(x) <- new_seqnames