1
\name{categoricalCUSUM}
2
\alias{categoricalCUSUM}
3
\alias{catcusum.LLRcompute}
6
\title{CUSUM detector for time-varying categorical time series}
8
Function to process \code{sts} object by binomial, beta-binomial
9
or multinomial CUSUM. Logistic, multinomial logistic, proportional
10
odds or Bradley-Terry regression models are used to specify in-control
11
and out-of-control parameters.
14
categoricalCUSUM(stsObj,control = list(range=NULL,h=5,pi0=NULL,
15
pi1=NULL, dfun=NULL, ret=c("cases","value")),...)
18
\item{stsObj}{Object of class \code{sts} containing the number of
19
counts in each of the \eqn{k} categories of the response
20
variable. Time varying number of counts \eqn{n_t} is found in slot
21
\code{populationFrac}. }
22
\item{control}{Control object containing several items
24
\item{\code{range}}{Vector of length \eqn{t_{max}} with indices of the
25
\code{observed} slot to monitor.}
26
\item{\code{h}}{Threshold to use for the monitoring. Once the
27
CUSUM statistics is larger or equal to \code{h} we have an alarm.}
28
\item{\code{pi0}}{\eqn{(k-1) \times t_{max}} in-control probability
29
vector for all categories except the reference category.}
30
\item{\code{mu1}}{\eqn{(k-1) \times t_{max}} out-of-control probability
31
vector for all categories except the reference category.}
32
\item{\code{dfun}}{The probability mass function or density used
33
to compute the likelihood ratios of the CUSUM. In a negative
34
binomial CUSUM this is \code{dnbinom}, in a binomial CUSUM
35
\code{dbinom} and in a multinomial CUSUM \code{dmultinom}. The
36
function must be able to handle the arguments \code{y},
37
\code{size}, \code{mu} and \code{log}. As a consequence, one in
38
the case of the beta-binomial distribution has to write a small
40
\item{\code{ret}}{Return the necessary proportion to sound an alarm in the
41
slot \code{upperbound} or just the value of the CUSUM
42
statistic. Thus, \code{ret} is one of tha values in
43
\code{c("cases","value")}.}
45
\item{\dots}{Additional arguments to send to \code{dfun}.}
48
The function allows the monitoring of categorical time series as
49
described by regression models for binomial, beta-binomial or
50
multinomial data. The later includes e.g. multinomial logistic
51
regression models, proportional odds models or Bradley-Terry models
52
for paired comparisons. See the \enc{H�hle}{Hoehle} (2010) reference
53
for further details about the methodology.
55
Once an alarm is found the CUSUM scheme is resetted (to zero) and
56
monitoring continues from there.
58
\seealso{\code{\link{categoricalCUSUM}}}
59
\value{An \code{sts} object with \code{observed}, \code{alarm},
60
etc. slots trimmed to the \code{control$range} indices.
63
H�hle, M. (2010), Changepoint detection in categorical time series,
64
Book chapter to appear in T. Kneib and G. Tutz (Eds.), Statistical
65
Modelling and Regression Structures, Springer.
68
###########################################################################
69
#Beta-binomial CUSUM for a small example containing the time-varying
70
#number of positive test out of a time-varying number of total
72
#######################################
74
#Load meat inspection data
77
#Use GAMLSS to fit beta-bin regression model
80
phase2 <- (max(phase1)+1) : nrow(abattoir)
82
#Fit beta-binomial model using GAMLSS
83
abattoir.df <- as.data.frame(abattoir)
84
colnames(abattoir.df) <- c("y","t","state","alarm","n")
85
m.bbin <- gamlss( cbind(y,n-y) ~ 1 + t +
86
+ sin(2*pi/52*t) + cos(2*pi/52*t) +
87
+ sin(4*pi/52*t) + cos(4*pi/52*t), sigma.formula=~1,
88
family=BB(sigma.link="log"),
89
data=abattoir.df[phase1,c("n","y","t")])
92
R <- 2 #detect a doubling of the odds for a test being positive
93
h <- 4 #threshold of the cusum
95
#Compute in-control and out of control mean
96
pi0 <- predict(m.bbin,newdata=abattoir.df[phase2,c("n","y","t")],type="response")
97
pi1 <- plogis(qlogis(pi0)+log(R))
98
#Create matrix with in control and out of control proportions.
99
#Categories are D=1 and D=0, where the latter is the reference category
100
pi0m <- rbind(pi0, 1-pi0)
101
pi1m <- rbind(pi1, 1-pi1)
104
######################################################################
105
# Use the multinomial surveillance function. To this end it is necessary
106
# to create a new abattoir object containing counts and proportion for
107
# each of the k=2 categories. For binomial data this appears a bit
108
# redundant, but generalizes easier to k>2 categories.
109
######################################################################
111
abattoir2 <- new("sts",epoch=1:nrow(abattoir), start=c(2006,1),freq=52,
112
observed=cbind(abattoir@observed,abattoir@populationFrac -abattoir@observed),
113
populationFrac=cbind(abattoir@populationFrac,abattoir@populationFrac),
114
state=matrix(0,nrow=nrow(abattoir),ncol=2),
117
######################################################################
118
#Function to use as dfun in the categoricalCUSUM
119
#(just a wrapper to the dBB function). Note that from v 3.0-1 the
120
#first argument of dBB changed its name from "y" to "x"!
121
######################################################################
122
mydBB.cusum <- function(y, mu, sigma, size, log = FALSE) {
123
return(dBB(y[1,], mu = mu[1,], sigma = sigma, bd = size, log = log))
127
#Create control object for multinom cusum and use the categoricalCUSUM
129
control <- list(range=phase2,h=h,pi0=pi0m, pi1=pi1m, ret="cases",
131
surv <- categoricalCUSUM(abattoir2, control=control,
132
sigma=exp(m.bbin$sigma.coef))
135
plot(surv[,1],legend.opts=NULL,dx.upperbound=0)
136
lines(pi0,col="green")
140
which.max(alarms(surv[,1]))
142
#ToDo: Compute run length using LRCUSUM.runlength
145
\author{M. \enc{H�hle}{Hoehle}}
148
% positives <- matrix(c(25,54,50,70,54,83,62,37,53,29,48,95,63,53,47,67,31,56,20,37,33,34,28,58,45,31,34,17,25,23,19,32,29,34,46,58,45,50,50,41,57,66,41,68,58,35,40,45,20,49,33,24,21,31,25,28,29,20,25,13,41,31,25,25,7,9,48,15,4,45,7,52,21,19,60,25,24,26,16,15,8,17,43,19,10,31,26,19,20,20,27,17,27,23,35,34,53,32,58,81,37,3,0,0,0,1,41,21,34,33,48,17,61,39,70,13,56,65,50,39,46,29,35,18,35,43,34,49,49,18,36,30,34,55,61,44,35,23,35,37,33,37,15,17,66,8,2,10,39,17,42,27,28,55,26,0,0,0,0,0,1,0,0,0,16,36,36,25,34,26,2),ncol=1)
150
% n <- matrix(c(4200,4900,4700,5100,4900,5000,4700,4500,4500,4400,4300,4500,4500,4800,3000,3600,4200,4300,2900,4000,3000,4500,3300,4700,4300,4100,3100,2500,3700,3300,3700,4200,4200,4100,4000,5000,3900,3700,4600,4400,4500,5000,4100,3800,2300,3000,3700,3200,2000,2500,4000,1500,1700,3000,3000,2500,2300,3100,2600,1300,4100,1800,2600,2900,1100,1300,3700,2700,1700,2700,1100,3300,1800,3500,4100,3300,4100,2500,3000,2400,800,3300,3100,3000,3100,2500,3300,3200,2700,2700,1500,1400,2200,1600,2700,1700,4200,3900,8100,5900,4800,300,0,0,0,200,5000,5400,6100,5500,4300,4100,8000,8200,7700,2500,5200,7200,7400,5000,6500,3900,5300,4100,6600,6600,5400,6300,6100,5000,5000,4700,4900,5200,4800,4900,4900,4900,5000,5000,4900,4900,900,800,1300,200,700,1200,2300,1600,2900,2600,3600,4000,2000,0,0,0,0,0,100,0,0,100,2200,2900,3100,3200,3900,4300,1000),ncol=1)
153
%sts <- new("sts",epoch=1:nrow(positives),start=c(2006,1),freq=52,observed=positives,state=0*positives,populationFrac=n,multinomialTS=TRUE)
b'\\ No newline at end of file'