2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.associations;
25
import weka.core.FastVector;
26
import weka.core.Instance;
27
import weka.core.Instances;
29
import java.io.Serializable;
30
import java.util.Enumeration;
31
import java.util.Hashtable;
34
* Class for storing a set of items. Item sets are stored in a lexicographic
35
* order, which is determined by the header information of the set of instances
36
* used for generating the set of items. All methods in this class assume that
37
* item sets are stored in lexicographic order.
38
* The class provides the general methods used for item sets in class - and
39
* standard association rule mining.
41
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
42
* @version $Revision: 1.12 $
45
implements Serializable {
47
/** for serialization */
48
private static final long serialVersionUID = 2724000045282835791L;
50
/** The items stored as an array of of ints. */
51
protected int[] m_items;
53
/** Counter for how many transactions contain this item set. */
54
protected int m_counter;
56
/** The total number of transactions */
57
protected int m_totalTransactions;
61
* @param totalTrans the total number of transactions in the data
63
public ItemSet(int totalTrans) {
64
m_totalTransactions = totalTrans;
69
* @param totalTrans the total number of transactions in the data
70
* @param array the attribute values encoded in an int array
72
public ItemSet(int totalTrans, int[] array){
74
m_totalTransactions = totalTrans;
80
* @param array the item set represented as an int array
82
public ItemSet(int[] array){
89
* Checks if an instance contains an item set.
91
* @param instance the instance to be tested
92
* @return true if the given instance contains this item set
94
public boolean containedBy(Instance instance) {
96
for (int i = 0; i < instance.numAttributes(); i++)
97
if (m_items[i] > -1) {
98
if (instance.isMissing(i))
100
if (m_items[i] != (int)instance.value(i))
106
/** Deletes all item sets that don't have minimum support.
107
* @return the reduced set of item sets
108
* @param maxSupport the maximum support
109
* @param itemSets the set of item sets to be pruned
110
* @param minSupport the minimum number of transactions to be covered
112
public static FastVector deleteItemSets(FastVector itemSets,
116
FastVector newVector = new FastVector(itemSets.size());
118
for (int i = 0; i < itemSets.size(); i++) {
119
ItemSet current = (ItemSet)itemSets.elementAt(i);
120
if ((current.m_counter >= minSupport)
121
&& (current.m_counter <= maxSupport))
122
newVector.addElement(current);
128
* Tests if two item sets are equal.
130
* @param itemSet another item set
131
* @return true if this item set contains the same items as the given one
133
public boolean equals(Object itemSet) {
135
if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
138
if (m_items.length != ((ItemSet)itemSet).m_items.length)
140
for (int i = 0; i < m_items.length; i++)
141
if (m_items[i] != ((ItemSet)itemSet).m_items[i])
147
* Return a hashtable filled with the given item sets.
149
* @param itemSets the set of item sets to be used for filling the hash table
150
* @param initialSize the initial size of the hashtable
151
* @return the generated hashtable
153
public static Hashtable getHashtable(FastVector itemSets, int initialSize) {
155
Hashtable hashtable = new Hashtable(initialSize);
157
for (int i = 0; i < itemSets.size(); i++) {
158
ItemSet current = (ItemSet)itemSets.elementAt(i);
159
hashtable.put(current, new Integer(current.m_counter));
165
* Produces a hash code for a item set.
167
* @return a hash code for a set of items
169
public int hashCode() {
173
for (int i = m_items.length-1; i >= 0; i--)
174
result += (i * m_items[i]);
178
/** Merges all item sets in the set of (k-1)-item sets
179
* to create the (k)-item sets and updates the counters.
180
* @return the generated (k)-item sets
181
* @param totalTrans thetotal number of transactions
182
* @param itemSets the set of (k-1)-item sets
183
* @param size the value of (k-1)
185
public static FastVector mergeAllItemSets(FastVector itemSets, int size,
188
FastVector newVector = new FastVector();
192
for (int i = 0; i < itemSets.size(); i++) {
193
ItemSet first = (ItemSet)itemSets.elementAt(i);
195
for (int j = i+1; j < itemSets.size(); j++) {
196
ItemSet second = (ItemSet)itemSets.elementAt(j);
197
result = new ItemSet(totalTrans);
198
result.m_items = new int[first.m_items.length];
200
// Find and copy common prefix of size 'size'
203
while (numFound < size) {
204
if (first.m_items[k] == second.m_items[k]) {
205
if (first.m_items[k] != -1)
207
result.m_items[k] = first.m_items[k];
214
while (k < first.m_items.length) {
215
if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
218
if (first.m_items[k] != -1)
219
result.m_items[k] = first.m_items[k];
221
result.m_items[k] = second.m_items[k];
225
if (k == first.m_items.length) {
226
result.m_counter = 0;
228
newVector.addElement(result);
236
* Prunes a set of (k)-item sets using the given (k-1)-item sets.
238
* @param toPrune the set of (k)-item sets to be pruned
239
* @param kMinusOne the (k-1)-item sets to be used for pruning
240
* @return the pruned set of item sets
242
public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) {
244
FastVector newVector = new FastVector(toPrune.size());
247
for (int i = 0; i < toPrune.size(); i++) {
248
ItemSet current = (ItemSet)toPrune.elementAt(i);
249
for (j = 0; j < current.m_items.length; j++)
250
if (current.m_items[j] != -1) {
251
help = current.m_items[j];
252
current.m_items[j] = -1;
253
if (kMinusOne.get(current) == null) {
254
current.m_items[j] = help;
257
current.m_items[j] = help;
260
if (j == current.m_items.length)
261
newVector.addElement(current);
267
* Prunes a set of rules.
269
* @param rules a two-dimensional array of lists of item sets. The first list
270
* of item sets contains the premises, the second one the consequences.
271
* @param minConfidence the minimum confidence the rules have to have
273
public static void pruneRules(FastVector[] rules, double minConfidence) {
275
FastVector newPremises = new FastVector(rules[0].size()),
276
newConsequences = new FastVector(rules[1].size()),
277
newConf = new FastVector(rules[2].size());
279
for (int i = 0; i < rules[0].size(); i++)
280
if (!(((Double)rules[2].elementAt(i)).doubleValue() <
282
newPremises.addElement(rules[0].elementAt(i));
283
newConsequences.addElement(rules[1].elementAt(i));
284
newConf.addElement(rules[2].elementAt(i));
286
rules[0] = newPremises;
287
rules[1] = newConsequences;
292
* Converts the header info of the given set of instances into a set
293
* of item sets (singletons). The ordering of values in the header file
294
* determines the lexicographic order.
296
* @param instances the set of instances whose header info is to be used
297
* @return a set of item sets, each containing a single item
298
* @exception Exception if singletons can't be generated successfully
300
public static FastVector singletons(Instances instances) throws Exception {
302
FastVector setOfItemSets = new FastVector();
305
for (int i = 0; i < instances.numAttributes(); i++) {
306
if (instances.attribute(i).isNumeric())
307
throw new Exception("Can't handle numeric attributes!");
308
for (int j = 0; j < instances.attribute(i).numValues(); j++) {
309
current = new ItemSet(instances.numInstances());
310
current.m_items = new int[instances.numAttributes()];
311
for (int k = 0; k < instances.numAttributes(); k++)
312
current.m_items[k] = -1;
313
current.m_items[i] = j;
315
setOfItemSets.addElement(current);
318
return setOfItemSets;
322
* Outputs the support for an item set.
324
* @return the support
326
public int support() {
332
* Returns the contents of an item set as a string.
334
* @param instances contains the relevant header information
335
* @return string describing the item set
337
public String toString(Instances instances) {
339
StringBuffer text = new StringBuffer();
341
for (int i = 0; i < instances.numAttributes(); i++)
342
if (m_items[i] != -1) {
343
text.append(instances.attribute(i).name()+'=');
344
text.append(instances.attribute(i).value(m_items[i])+' ');
346
text.append(m_counter);
347
return text.toString();
351
* Updates counter of item set with respect to given transaction.
353
* @param instance the instance to be used for ubdating the counter
355
public void upDateCounter(Instance instance) {
357
if (containedBy(instance))
362
* Updates counters for a set of item sets and a set of instances.
364
* @param itemSets the set of item sets which are to be updated
365
* @param instances the instances to be used for updating the counters
367
public static void upDateCounters(FastVector itemSets, Instances instances) {
369
for (int i = 0; i < instances.numInstances(); i++) {
370
Enumeration enu = itemSets.elements();
371
while (enu.hasMoreElements())
372
((ItemSet)enu.nextElement()).upDateCounter(instances.instance(i));
377
* @return the counter
379
public int counter(){
384
/** Gest the item set as an int array
385
* @return int array represneting an item set
387
public int[] items(){
392
/** Gest the index of the value of the specified attribute
393
* @param k the attribute index
394
* @return the index of the attribute value
396
public int itemAt(int k){
402
* @param count the counter
404
public void setCounter(int count){
409
/** Sets an item sets
410
* @param items an int array representing an item set
412
public void setItem(int[] items){
417
/** Sets the index of an attribute value
418
* @param value the inex of the attribute value
419
* @param k the index of the attribute
421
public void setItemAt(int value, int k){