2
function (formula, family = gaussian(), data, snpsubset, idsubset, bcast=50) {
3
if (!is(data,"gwaa.data")) {
4
stop("wrong data class: should be gwaa.data")
6
if (!is.character(formula)) stop("formula must be character object (apply \"s)")
7
if (is.character(family))
8
family <- get(family, mode = "function", envir = parent.frame())
9
if (is.function(family))
11
if (is.null(family$family)) {
13
stop("'family' not recognized")
15
if (grep("CRSNP",formula,ignore.case=TRUE)!=1) stop("formula must contain CRSNP variable to be replaced with the analysis SNPs")
16
if (missing(snpsubset)) snpsubset <- data@gtdata@snpnames
17
if (missing(idsubset)) idsubset <- data@gtdata@idnames
18
if (is.logical(snpsubset) || is.numeric(snpsubset)) snpsubset <- data@gtdata@snpnames[snpsubset]
19
gtdata <- data[idsubset,snpsubset]@gtdata
20
phdata <- data[idsubset,snpsubset]@phdata
21
allnams <- gtdata@snpnames
22
chsize <- ceiling(length(allnams)/80)
25
chunks <- ceiling(length(allnams)/chsize)-1
27
ch[[i]] = allnams[((i-1)*chsize+1):(i*chsize)]
29
ch[[chunks+1]] = allnams[(chunks*chsize+1):length(allnams)]
30
} else {ch[[1]] <- allnams;chunks=0}
32
fla2 <- as.formula(sub("CRSNP","as.factor(mygt1)*as.factor(mygt2)",formula,ignore.case=TRUE))
33
fla2.i0 <- as.formula(sub("CRSNP","as.factor(mygt1)+as.factor(mygt2)",formula,ignore.case=TRUE))
34
fla2.1 <- as.formula(sub("CRSNP","as.factor(mygt1)*mygt2",formula,ignore.case=TRUE))
35
fla2.1.i0 <- as.formula(sub("CRSNP","as.factor(mygt1)+mygt2",formula,ignore.case=TRUE))
36
fla2.2 <- as.formula(sub("CRSNP","mygt1*as.factor(mygt2)",formula,ignore.case=TRUE))
37
fla2.2.i0 <- as.formula(sub("CRSNP","mygt1+as.factor(mygt2)",formula,ignore.case=TRUE))
38
fla1 <- as.formula(sub("CRSNP","mygt1*mygt2",formula,ignore.case=TRUE))
39
fla1.i0 <- as.formula(sub("CRSNP","mygt1+mygt2",formula,ignore.case=TRUE))
40
fla0 <- as.formula(sub("CRSNP","DuMmY1*DuMmY2",formula,ignore.case=TRUE))
43
P1df <- matrix(rep(NA,(nsnps*nsnps)),nrow=nsnps)
2
function (formula, family = gaussian(), data, snpsubset, idsubset, bcast=50) {
3
if (!is(data,"gwaa.data")) {
4
stop("wrong data class: should be gwaa.data")
6
if (!is.character(formula)) stop("formula must be character object (apply \"s)")
7
if (is.character(family))
8
family <- get(family, mode = "function", envir = parent.frame())
9
if (is.function(family))
11
if (is.null(family$family)) {
13
stop("'family' not recognized")
15
if (grep("CRSNP",formula,ignore.case=TRUE)!=1) stop("formula must contain CRSNP variable to be replaced with the analysis SNPs")
16
if (missing(snpsubset)) snpsubset <- data@gtdata@snpnames
17
if (missing(idsubset)) idsubset <- data@gtdata@idnames
18
if (is.logical(snpsubset) || is.numeric(snpsubset)) snpsubset <- data@gtdata@snpnames[snpsubset]
19
gtdata <- data[idsubset,snpsubset]@gtdata
20
phdata <- data[idsubset,snpsubset]@phdata
21
allnams <- gtdata@snpnames
22
chsize <- ceiling(length(allnams)/80)
25
chunks <- ceiling(length(allnams)/chsize)-1
27
ch[[i]] = allnams[((i-1)*chsize+1):(i*chsize)]
29
ch[[chunks+1]] = allnams[(chunks*chsize+1):length(allnams)]
30
} else {ch[[1]] <- allnams;chunks=0}
32
fla2 <- as.formula(sub("CRSNP","as.factor(mygt1)*as.factor(mygt2)",formula,ignore.case=TRUE))
33
fla2.i0 <- as.formula(sub("CRSNP","as.factor(mygt1)+as.factor(mygt2)",formula,ignore.case=TRUE))
34
fla2.1 <- as.formula(sub("CRSNP","as.factor(mygt1)*mygt2",formula,ignore.case=TRUE))
35
fla2.1.i0 <- as.formula(sub("CRSNP","as.factor(mygt1)+mygt2",formula,ignore.case=TRUE))
36
fla2.2 <- as.formula(sub("CRSNP","mygt1*as.factor(mygt2)",formula,ignore.case=TRUE))
37
fla2.2.i0 <- as.formula(sub("CRSNP","mygt1+as.factor(mygt2)",formula,ignore.case=TRUE))
38
fla1 <- as.formula(sub("CRSNP","mygt1*mygt2",formula,ignore.case=TRUE))
39
fla1.i0 <- as.formula(sub("CRSNP","mygt1+mygt2",formula,ignore.case=TRUE))
40
fla0 <- as.formula(sub("CRSNP","DuMmY1*DuMmY2",formula,ignore.case=TRUE))
43
P1df <- matrix(rep(NA,(nsnps*nsnps)),nrow=nsnps)
50
for (i1 in 1:(length(snpsubset)-1)) {
51
for (i2 in (i1+1):length(snpsubset)) {
52
mygt1 <- as.numeric(gtdata[,snpsubset[i1]])
53
mygt2 <- as.numeric(gtdata[,snpsubset[i2]])
54
polym1 <- length(levels(as.factor(mygt1)))
55
polym2 <- length(levels(as.factor(mygt2)))
56
if (polym1<=1 || polym2<=1) {
57
cat("One of markers",snpsubset[i1],snpsubset[i2],"is (are) monomorphic; skipping in analysis\n")
63
DuMmY1 <- rep(0,length(mygt1))
64
DuMmY1 <- replace(DuMmY1,is.na(mygt1),NA)
65
DuMmY2 <- rep(0,length(mygt2))
66
DuMmY2 <- replace(DuMmY2,is.na(mygt2),NA)
67
if (family$family != "gaussian") {
68
m1 <- glm(fla1,family = family,data=phdata)
69
m1.i0 <- glm(fla1.i0,family = family,data=phdata)
70
if (polym1>2 && polym2>2) {
71
m2 <- glm(fla2,family = family,data=phdata)
72
m2.i0 <- glm(fla2.i0,family = family,data=phdata)
73
} else if (polym1>2) {
74
m2 <- glm(fla2.1,family = family,data=phdata)
75
m2.i0 <- glm(fla2.1.i0,family = family,data=phdata)
76
} else if (polym2>2) {
77
m2 <- glm(fla2.2,family = family,data=phdata)
78
m2.i0 <- glm(fla2.2.i0,family = family,data=phdata)
83
m0 <- glm(fla0,family = family,data=phdata)
84
anv1 <- anova(m0,m1,test="Chisq")
85
anv2 <- anova(m0,m2,test="Chisq")
86
anv1.i <- anova(m1.i0,m1,test="Chisq")
87
anv2.i <- anova(m2.i0,m2,test="Chisq")
88
P1df[i1,i2] <- anv1$"P(>|Chi|)"[2]
89
P2df[i1,i2] <- anv2$"P(>|Chi|)"[2]
90
Pint1df[i1,i2] <- anv1.i$"P(>|Chi|)"[2]
91
Pint2df[i1,i2] <- anv2.i$"P(>|Chi|)"[2]
92
if (is.na(P1df[i1,i2])) P1df[i1,i2] = 1.0
93
if (is.na(P2df[i1,i2])) P2df[i1,i2] = 1.0
94
if (is.na(Pint1df[i1,i2])) Pint1df[i1,i2] = 1.0
95
if (is.na(Pint2df[i1,i2])) Pint2df[i1,i2] = 1.0
97
m1 <- lm(fla1,data=phdata)
98
m1.i0 <- lm(fla1.i0,data=phdata)
99
if (polym1>2 && polym2>2) {
100
m2 <- lm(fla2,data=phdata)
101
m2.i0 <- lm(fla2.i0,data=phdata)
102
} else if (polym1>2) {
103
m2 <- lm(fla2.1,data=phdata)
104
m2.i0 <- lm(fla2.1.i0,data=phdata)
105
} else if (polym2>2) {
106
m2 <- lm(fla2.2,data=phdata)
107
m2.i0 <- lm(fla2.2.i0,data=phdata)
112
m0 <- lm(fla0,data=phdata)
113
anv1 <- anova(m0,m1,test="Chisq")
114
anv2 <- anova(m0,m2,test="Chisq")
115
anv1.i <- anova(m1.i0,m1,test="Chisq")
116
anv2.i <- anova(m2.i0,m2,test="Chisq")
117
P1df[i1,i2] <- anv1$"P(>|Chi|)"[2]
118
P2df[i1,i2] <- anv2$"P(>|Chi|)"[2]
119
Pint1df[i1,i2] <- anv1.i$"P(>|Chi|)"[2]
120
Pint2df[i1,i2] <- anv2.i$"P(>|Chi|)"[2]
121
if (is.na(P1df[i1,i2])) P1df[i1,i2] = 1.0
122
if (is.na(P2df[i1,i2])) P2df[i1,i2] = 1.0
123
if (is.na(Pint1df[i1,i2])) Pint1df[i1,i2] = 1.0
124
if (is.na(Pint2df[i1,i2])) Pint2df[i1,i2] = 1.0
128
if (bcast && round((donan)/bcast) == (donan)/bcast) {
129
cat("\b\b\b\b\b\b\b\b",round(100*donan/((nsnps-1)*nsnps/2),digits=2),"%",sep="");
50
for (i1 in 1:(length(snpsubset)-1)) {
51
for (i2 in (i1+1):length(snpsubset)) {
52
mygt1 <- as.numeric(gtdata[,snpsubset[i1]])
53
mygt2 <- as.numeric(gtdata[,snpsubset[i2]])
54
polym1 <- length(levels(as.factor(mygt1)))
55
polym2 <- length(levels(as.factor(mygt2)))
56
if (polym1<=1 || polym2<=1) {
57
cat("One of markers",snpsubset[i1],snpsubset[i2],"is (are) monomorphic; skipping in analysis\n")
63
DuMmY1 <- rep(0,length(mygt1))
64
DuMmY1 <- replace(DuMmY1,is.na(mygt1),NA)
65
DuMmY2 <- rep(0,length(mygt2))
66
DuMmY2 <- replace(DuMmY2,is.na(mygt2),NA)
67
if (family$family != "gaussian") {
68
m1 <- glm(fla1,family = family,data=phdata)
69
m1.i0 <- glm(fla1.i0,family = family,data=phdata)
70
if (polym1>2 && polym2>2) {
71
m2 <- glm(fla2,family = family,data=phdata)
72
m2.i0 <- glm(fla2.i0,family = family,data=phdata)
73
} else if (polym1>2) {
74
m2 <- glm(fla2.1,family = family,data=phdata)
75
m2.i0 <- glm(fla2.1.i0,family = family,data=phdata)
76
} else if (polym2>2) {
77
m2 <- glm(fla2.2,family = family,data=phdata)
78
m2.i0 <- glm(fla2.2.i0,family = family,data=phdata)
83
m0 <- glm(fla0,family = family,data=phdata)
84
anv1 <- anova(m0,m1,test="Chisq")
85
anv2 <- anova(m0,m2,test="Chisq")
86
anv1.i <- anova(m1.i0,m1,test="Chisq")
87
anv2.i <- anova(m2.i0,m2,test="Chisq")
88
P1df[i1,i2] <- anv1[2, grep("^P.*Chi",names(anv1))]
89
P2df[i1,i2] <- anv2[2, grep("^P.*Chi",names(anv2))]
90
Pint1df[i1,i2] <- anv1.i[2, grep("^P.*Chi",names(anv1.i))]
91
Pint2df[i1,i2] <- anv2.i[2, grep("^P.*Chi",names(anv2.i))]
92
if (is.na(P1df[i1,i2])) P1df[i1,i2] = 1.0
93
if (is.na(P2df[i1,i2])) P2df[i1,i2] = 1.0
94
if (is.na(Pint1df[i1,i2])) Pint1df[i1,i2] = 1.0
95
if (is.na(Pint2df[i1,i2])) Pint2df[i1,i2] = 1.0
97
m1 <- lm(fla1,data=phdata)
98
m1.i0 <- lm(fla1.i0,data=phdata)
99
if (polym1>2 && polym2>2) {
100
m2 <- lm(fla2,data=phdata)
101
m2.i0 <- lm(fla2.i0,data=phdata)
102
} else if (polym1>2) {
103
m2 <- lm(fla2.1,data=phdata)
104
m2.i0 <- lm(fla2.1.i0,data=phdata)
105
} else if (polym2>2) {
106
m2 <- lm(fla2.2,data=phdata)
107
m2.i0 <- lm(fla2.2.i0,data=phdata)
112
m0 <- lm(fla0,data=phdata)
113
anv1 <- anova(m0,m1,test="Chisq")
114
anv2 <- anova(m0,m2,test="Chisq")
115
anv1.i <- anova(m1.i0,m1,test="Chisq")
116
anv2.i <- anova(m2.i0,m2,test="Chisq")
117
P1df[i1,i2] <- anv1[2, grep("^P.*Chi",names(anv1))]
118
P2df[i1,i2] <- anv2[2, grep("^P.*Chi",names(anv2))]
119
Pint1df[i1,i2] <- anv1.i[2, grep("^P.*Chi",names(anv1.i))]
120
Pint2df[i1,i2] <- anv2.i[2, grep("^P.*Chi",names(anv2.i))]
121
if (is.na(P1df[i1,i2])) P1df[i1,i2] = 1.0
122
if (is.na(P2df[i1,i2])) P2df[i1,i2] = 1.0
123
if (is.na(Pint1df[i1,i2])) Pint1df[i1,i2] = 1.0
124
if (is.na(Pint2df[i1,i2])) Pint2df[i1,i2] = 1.0
128
if (bcast && round((donan)/bcast) == (donan)/bcast) {
129
cat("\b\b\b\b\b\b\b\b",round(100*donan/((nsnps-1)*nsnps/2),digits=2),"%",sep="");
134
if (bcast && donan>=bcast) cat("\n")
137
chromosome <- gtdata@chromosome
138
med1df <- median(qchisq(1.-P1df,df=1))
139
med2df <- median(qchisq(1.-P2df,df=2))
140
colnames(P1df) <- snpsubset
141
rownames(P1df) <- snpsubset #[length(snpsubset):1]
142
out <- list(P1df = P1df, Pint1df=Pint1df, P2df=P2df, Pint2df=Pint2df, medChi1df = med1df, medChi2df = med2df, snpnames = snpsubset, idnames = idsubset, formula = match.call(), family = family, map = map, chromosome = chromosome)
143
out$Pc1df <- rep(NA,length(P1df))
144
class(out) <- "scan.gwaa.2D"
134
if (bcast && donan>=bcast) cat("\n")
137
chromosome <- gtdata@chromosome
138
med1df <- median(qchisq(1.-P1df,df=1))
139
med2df <- median(qchisq(1.-P2df,df=2))
140
colnames(P1df) <- snpsubset
141
rownames(P1df) <- snpsubset #[length(snpsubset):1]
142
out <- list(P1df = P1df, Pint1df=Pint1df, P2df=P2df, Pint2df=Pint2df, medChi1df = med1df, medChi2df = med2df, snpnames = snpsubset, idnames = idsubset, formula = match.call(), family = family, map = map, chromosome = chromosome)
143
out$Pc1df <- rep(NA,length(P1df))
144
class(out) <- "scan.gwaa.2D"