~ubuntu-branches/ubuntu/precise/weka/precise

* Andrew Mccallum, Kamal Nigam: A Comparison of Event Models for Naive Bayes Text Classification. In: AAAI-98 Workshop on 'Learning for Text Categorization', 1998.

*

* The core equation for this classifier:

*

* P[Ci|D] = (P[D|Ci] x P[Ci]) / P[D] (Bayes rule)

*

* where Ci is class i and D is a document.

*

* Incremental version of the algorithm.

*

* BibTeX:

* <pre>

* @inproceedings{Mccallum1998,

* author = {Andrew Mccallum and Kamal Nigam},

* booktitle = {AAAI-98 Workshop on 'Learning for Text Categorization'},

* title = {A Comparison of Event Models for Naive Bayes Text Classification},

* year = {1998}

* }

* </pre>

*

* Valid options are:

* <pre> -D

* If set, classifier is run in debug mode and

* may output additional info to the console</pre>

* @author Andrew Golightly (acg4@cs.waikato.ac.nz)

* @author Bernhard Pfahringer (bernhard@cs.waikato.ac.nz)

* @author Jiang Su

* @version $Revision: 1.2 $

public class NaiveBayesMultinomialUpdateable

extends NaiveBayesMultinomial

implements UpdateableClassifier {

/** for serialization */

private static final long serialVersionUID = -7204398796974263186L;

/** the word count per class */

protected double[] m_wordsPerClass;

/**

* Returns a string describing this classifier

* @return a description of the classifier suitable for

* displaying in the explorer/experimenter gui

public String globalInfo() {

return

super.globalInfo() + "\n\n"

+ "Incremental version of the algorithm.";

}

/**

* Generates the classifier.

* @param instances set of instances serving as training data

* @throws Exception if the classifier has not been generated successfully

100

101

public void buildClassifier(Instances instances) throws Exception {

102

// can classifier handle the data?

103

getCapabilities().testWithFail(instances);

104

105

// remove instances with missing class

106

instances = new Instances(instances);

107

instances.deleteWithMissingClass();

108

109

m_headerInfo = new Instances(instances, 0);

110

m_numClasses = instances.numClasses();

111

m_numAttributes = instances.numAttributes();

112

m_probOfWordGivenClass = new double[m_numClasses][];

113

m_wordsPerClass = new double[m_numClasses];

114

m_probOfClass = new double[m_numClasses];

115

116

// initialising the matrix of word counts

117

// NOTE: Laplace estimator introduced in case a word that does not

118

// appear for a class in the training set does so for the test set

119

double laplace = 1;

120

for (int c = 0; c < m_numClasses; c++) {

121

m_probOfWordGivenClass[c] = new double[m_numAttributes];

122

m_probOfClass[c] = laplace;

123

m_wordsPerClass[c] = laplace * m_numAttributes;

124

for(int att = 0; att<m_numAttributes; att++) {

125

m_probOfWordGivenClass[c][att] = laplace;

126

}

127

}

128

129

for (int i = 0; i < instances.numInstances(); i++)

130

updateClassifier(instances.instance(i));

131

}

132

133

/**

134

* Updates the classifier with the given instance.

135

136

* @param instance the new training instance to include in the model

137

* @throws Exception if the instance could not be incorporated in

138

* the model.

139

140

public void updateClassifier(Instance instance) throws Exception {

141

int classIndex = (int) instance.value(instance.classIndex());

142

m_probOfClass[classIndex] += instance.weight();

143

144

for (int a = 0; a < instance.numValues(); a++) {

145

if (instance.index(a) == instance.classIndex() ||

146

instance.isMissing(a))

147

continue;

148

149

double numOccurences = instance.valueSparse(a) * instance.weight();

150

if (numOccurences < 0)

151

throw new Exception(

152

"Numeric attribute values must all be greater or equal to zero.");

153

m_wordsPerClass[classIndex] += numOccurences;

154

m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurences;

155

}

156

}

157

158

/**

159

* Calculates the class membership probabilities for the given test

160

* instance.

161

162

* @param instance the instance to be classified

163

* @return predicted class probability distribution

164

* @throws Exception if there is a problem generating the prediction

165

166

public double[] distributionForInstance(Instance instance) throws Exception {

167

double[] probOfClassGivenDoc = new double[m_numClasses];

168

169

// calculate the array of log(Pr[D|C])

170

double[] logDocGivenClass = new double[m_numClasses];

171

for (int c = 0; c < m_numClasses; c++) {

172

logDocGivenClass[c] += Math.log(m_probOfClass[c]);

173

int allWords = 0;

174

for (int i = 0; i < instance.numValues(); i++) {

175

if (instance.index(i) == instance.classIndex())

176

continue;

177

double frequencies = instance.valueSparse(i);

178

allWords += frequencies;

179

logDocGivenClass[c] += frequencies *

180

Math.log(m_probOfWordGivenClass[c][instance.index(i)]);

181

}

182

logDocGivenClass[c] -= allWords * Math.log(m_wordsPerClass[c]);

183

}

184

185

double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];

186

for (int i = 0; i < m_numClasses; i++)

187

probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max);

188

189

Utils.normalize(probOfClassGivenDoc);

190

191

return probOfClassGivenDoc;

192

}

193

194

/**

195

* Returns a string representation of the classifier.

196

197

* @return a string representation of the classifier

198

199

public String toString() {

200

StringBuffer result = new StringBuffer();

201

202

result.append("The independent probability of a class\n");

203

result.append("--------------------------------------\n");

204

205

for (int c = 0; c < m_numClasses; c++)

206

result.append(m_headerInfo.classAttribute().value(c)).append("\t").

207

append(Double.toString(m_probOfClass[c])).append("\n");

208

209

result.append("\nThe probability of a word given the class\n");

210

result.append("-----------------------------------------\n\t");

211

212

for (int c = 0; c < m_numClasses; c++)

213

result.append(m_headerInfo.classAttribute().value(c)).append("\t");

214

215

result.append("\n");

216

217

for (int w = 0; w < m_numAttributes; w++) {

218

result.append(m_headerInfo.attribute(w).name()).append("\t");

219

for (int c = 0; c < m_numClasses; c++)

220

result.append(

221

Double.toString(Math.exp(m_probOfWordGivenClass[c][w]))).append("\t");

222

result.append("\n");

223

}

224

225

return result.toString();

226

}

227

228

/**

229

* Main method for testing this class.

230

231

* @param args the options

232

233

public static void main(String[] args) {

234

runClassifier(new NaiveBayesMultinomialUpdateable(), args);

235

}

236

}

Older »