1
#########################################################################
2
# Categorical CUSUM for y_t \sim M_k(n_t, \pi_t) for t=1,...,tmax
3
# Workhorse function doing the actual computations - no semantic checks
4
# are performed here, we expect "proper" input.
7
# y - (k) \times tmax observation matrix for all categories
8
# pi0 - (k) \times tmax in-control prob vector for all categories
9
# pi1 - (k) \times tmax out-of-control prob vector for all categories
10
# dfun - PMF function of the categorical response, i.e. multinomial, binomial,
12
# n - vector of dim tmax containing the varying sizes
13
# h - decision threshold of the Categorical CUSUM
14
#########################################################################
16
catcusum.LLRcompute <- function(y, pi0, pi1, h, dfun, n, calc.at=TRUE,...) {
20
S <- numeric(ncol(y)+1)
21
U <- numeric(ncol(y)+1)
23
#Run the Categorical LR CUSUM
27
#Compute log likelihood ratio
28
# llr <- dmultinom(y[,t], size=n[t], prob=pi1[,t],log=TRUE) -
29
# dmultinom(y[,t], size=n[t], prob=pi0[,t],log=TRUE)
30
llr <- dfun(y=y[,t,drop=FALSE], size=n[t], mu=pi1[,t,drop=FALSE], log=TRUE,...) - dfun(y=y[,t,drop=FALSE], size=n[t], mu=pi0[,t,drop=FALSE], log=TRUE, ...)
32
S[t+1] <- max(0,S[t] + llr)
34
#For binomial data it is also possible to compute how many cases it would take
35
#to sound an alarm given the past.
36
if (nrow(y) == 2 & calc.at) {
37
#For the binomial its possible to compute the number needed for an
39
if (identical(dfun,dbinom)) {
40
#Calculations in ../maple/numberneededbeforealarm.mw
41
at <- (h - S[t] - n[t] * ( log(1 - pi1[1,t]) - log(1-pi0[1,t]))) / (log(pi1[1,t]) - log(pi0[1,t]) - log(1-pi1[1,t]) + log(1-pi0[1,t]))
42
U[t+1] = ceiling(max(0,at))
44
#Compute the value at by trying all values betweeen 0 and n_t. If
45
#no alarm, then we know the value for an alarm must be larger than y_t
47
ay <- rbind(seq(0,y[1,t],by=1),n[t]-seq(0,y[1,t],by=1))
49
ay <- rbind(seq(y[1,t],n[t],by=1),n[t]-seq(y[1,t],n[t],by=1))
52
llr <- dfun(ay, size=n[t], mu=pi1[,t,drop=FALSE], log=TRUE,...) - dfun(ay, size=n[t], mu=pi0[,t,drop=FALSE], log=TRUE, ...)
55
#Is a_t available, i.e. does a y_t exist or is the set over which to
56
#take the minimum empty?
60
U[t+1] <- ay[1,which.max(alarm)]
65
#Only run to the first alarm. Then reset.
66
if ((S[t+1] > h) | (t==ncol(y))) { stopped <- TRUE}
68
#If no alarm at the end put rl to end (its censored!)
69
#Question: Is this not automatically handled?
71
t <- which.max(S[-1] > h)
73
t <- ncol(pi0) ##Last one
75
#Missing: cases needs to be returned!
76
return(list(N=t,val=S[-1],cases=U[-1]))
79
######################################################################
80
# Wrap function to process sts object by categoricalCUSUM (new S4
81
# style). Time varying number of counts is found in slot populationFrac.
84
# control - list with the following components
85
# * range - vector of indices in disProgObj to monitor
86
# * h - threshold, once CUSUM > h we have an alarm
87
# * pi0 - (k-1) \times tmax in-control prob vector for all but ref cat
88
# * pi1 - (k-1) \times tmax out-of-control prob vector for all but ref cat
89
# * dfun - PMF to use for the computations, dmultinom, dbinom, dBB, etc.
90
# ... - further parameters to be sent to dfun
91
######################################################################
93
categoricalCUSUM <- function(stsObj,
94
control = list(range=NULL,h=5,
95
pi0=NULL, pi1=NULL, dfun=NULL, ret=c("cases","value")),...) {
97
# Set the default values if not yet set
98
if(is.null(control[["pi0",exact=TRUE]])) {
99
stop("Error: No specification of in-control proportion vector pi0!")
101
if(is.null(control[["pi1",exact=TRUE]])) {
102
stop("Error: No specification of out-of-control proportion vector pi1!")
104
if(is.null(control[["dfun",exact=TRUE]])) {
105
stop("Error: No specification of the distribution to use, e.g. dbinom, dmultinom or similar!")
108
if(is.null(control[["h",exact=TRUE]]))
110
if(is.null(control[["ret",exact=TRUE]]))
111
control$ret <- "value"
113
#Extract the important parts from the arguments
114
range <- control$range
115
y <- t(stsObj@observed[range,,drop=FALSE])
116
pi0 <- control[["pi0",exact=TRUE]]
117
pi1 <- control[["pi1",exact=TRUE]]
118
dfun <- control[["dfun",exact=TRUE]]
119
control$ret <- match.arg(control$ret, c("value","cases"))
120
#Total number of objects that are investigated. Note this
121
#can't be deduced from the observed y, because only (c-1) columns
122
#are reported so using: n <- apply(y, 2, sum) is wrong!
123
#Assumption: all populationFrac's contain n_t and we can take just one
124
n <- stsObj@populationFrac[range,1,drop=TRUE]
127
if ( ((ncol(y) != ncol(pi0)) | (ncol(pi0) != ncol(pi1))) |
128
((nrow(y) != nrow(pi0)) | (nrow(pi0) != nrow(pi1)))) {
129
stop("Error: dimensions of y, pi0 and pi1 have to match")
131
if ((control$ret == "cases") & nrow(pi0) != 2) {
132
stop("Cases can only be returned in case k=2.")
134
if (length(n) != ncol(y)) {
135
stop("Error: Length of n has to be equal to number of columns in y.")
137
#Check if all n entries are the same
138
if (!all(apply(stsObj@populationFrac[range,],1,function(x) all.equal(as.numeric(x),rev(as.numeric(x)))))) {
139
stop("Error: All entries for n have to be the same in populationFrac")
142
#Reserve space for the results
143
# start with cusum[timePoint -1] = 0, i.e. set cusum[1] = 0
144
alarm <- matrix(data = 0, nrow = length(range), ncol = nrow(y))
145
upperbound <- matrix(data = 0, nrow = length(range), ncol = nrow(y))
147
#Small helper function to be used along the way --> move to other file!
148
either <- function(cond, whenTrue, whenFalse) {
149
if (cond) return(whenTrue) else return(whenFalse)
152
#Setup counters for the progress
156
noOfTimePoints <- length(range)
158
#######################################################
159
#Loop as long as we are not through the entire sequence
160
#######################################################
161
while (doneidx < noOfTimePoints) {
162
##Run Categorical CUSUM until the next alarm
163
res <- catcusum.LLRcompute(y=y, pi0=pi0, pi1=pi1, n=n, h=control$h, dfun=dfun,calc.at=(control$ret=="cases"),...)
165
#In case an alarm found log this and reset the chart at res$N+1
166
if (res$N < ncol(y)) {
167
#Put appropriate value in upperbound
168
upperbound[1:res$N + doneidx,] <- matrix(rep(either(control$ret == "value", res$val[1:res$N] ,res$cases[1:res$N]),each=ncol(upperbound)),ncol=ncol(upperbound),byrow=TRUE)
169
alarm[res$N + doneidx,] <- TRUE
171
#Chop & get ready for next round
172
y <- y[,-(1:res$N),drop=FALSE]
173
pi0 <- pi0[,-(1:res$N),drop=FALSE]
174
pi1 <- pi1[,-(1:res$N),drop=FALSE]
177
#Add to the number of alarms
178
noofalarms <- noofalarms + 1
180
doneidx <- doneidx + res$N
183
#Add upperbound-statistic of last segment, where no alarm is reached
184
upperbound[(doneidx-res$N+1):nrow(upperbound),] <- matrix( rep(either(control$ret == "value", res$val, res$cases),each=ncol(upperbound)),ncol=ncol(upperbound),byrow=TRUE)
188
# Add name and data name to control object
189
control$name <- "multinomCUSUM"
190
control$data <- NULL #not supported anymore
193
#New direct calculations on the sts object
194
stsObj@observed <- stsObj@observed[control$range,,drop=FALSE]
195
stsObj@state <- stsObj@state[control$range,,drop=FALSE]
196
stsObj@populationFrac <- stsObj@populationFrac[control$range,,drop=FALSE]
197
stsObj@alarm <- alarm
198
stsObj@upperbound <- upperbound
200
#Fix the corresponding start entry
201
start <- stsObj@start
202
new.sampleNo <- start[2] + min(control$range) - 1
203
start.year <- start[1] + (new.sampleNo - 1) %/% stsObj@freq
204
start.sampleNo <- (new.sampleNo - 1) %% stsObj@freq + 1
205
stsObj@start <- c(start.year,start.sampleNo)
207
#Ensure dimnames in the new object ## THIS NEEDS TO BE FIXED!
208
#stsObj <- fix.dimnames(stsObj)