2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2003 Peter A. Flach, Nicolas Lachiche
21
* Thanks to Amelie Deltour for porting the original C code to Java
22
* and integrating it into Weka.
25
package weka.associations.tertius;
27
import weka.core.Instance;
28
import weka.core.Instances;
30
import java.io.Serializable;
31
import java.text.DecimalFormat;
32
import java.util.ArrayList;
33
import java.util.Comparator;
34
import java.util.Enumeration;
37
* Class representing a rule with a body and a head.
39
* @author <a href="mailto:adeltour@netcourrier.com">Amelie Deltour</a>
40
* @version $Revision: 1.6 $
43
public class Rule implements Serializable, Cloneable {
45
/** for serialization */
46
private static final long serialVersionUID = -7763378359090435505L;
48
/** The body of the rule. */
51
/** The head of the rule. */
54
/** Can repeat predicates in the rule ? */
55
private boolean m_repeatPredicate;
57
/** Maximal number of literals in the rule. */
58
private int m_maxLiterals;
60
/** Can there be negations in the body ? */
61
private boolean m_negBody;
63
/** Can there be negations in the head ? */
64
private boolean m_negHead;
66
/** Is this rule a classification rule ? */
67
private boolean m_classRule;
69
/** Can there be only one literal in the head ? */
70
private boolean m_singleHead;
72
/** Number of instances in the data this rule deals with. */
73
private int m_numInstances;
75
/** Set of counter-instances of this rule. */
76
private ArrayList m_counterInstances;
78
/** Counter for the counter-instances of this rule. */
79
private int m_counter;
81
/** Confirmation of this rule. */
82
private double m_confirmation;
84
/** Optimistic estimate of this rule. */
85
private double m_optimistic;
88
* Constructor for a rule when the counter-instances are not stored,
89
* giving all the constraints applied to this rule.
91
* @param repeatPredicate True if predicates can be repeated.
92
* @param maxLiterals Maximum number of literals.
93
* @param negBody True if negation is allowed in the body.
94
* @param negHead True if negation is allowed in the head.
95
* @param classRule True if the rule is a classification rule.
96
* @param horn True if the rule is a horn clause.
98
public Rule(boolean repeatPredicate, int maxLiterals, boolean negBody,
99
boolean negHead, boolean classRule, boolean horn) {
103
m_repeatPredicate = repeatPredicate;
104
m_maxLiterals = maxLiterals;
105
m_negBody = negBody && !horn;
106
m_negHead = negHead && !horn;
107
m_classRule = classRule;
108
m_singleHead = classRule || horn;
112
* Constructor for a rule when the counter-instances are stored,
113
* giving all the constraints applied to this rule.
114
* The counter-instances are initialized to all the instances in the dataset.
116
* @param instances The dataset.
117
* @param repeatPredicate True if predicates can be repeated.
118
* @param maxLiterals Maximum number of literals.
119
* @param negBody True if negation is allowed in the body.
120
* @param negHead True if negation is allowed in the head.
121
* @param classRule True if the rule is a classification rule.
122
* @param horn True if the rule is a horn clause.
124
public Rule(Instances instances,
125
boolean repeatPredicate, int maxLiterals, boolean negBody,
126
boolean negHead, boolean classRule, boolean horn) {
128
m_body = new Body(instances);
129
m_head = new Head(instances);
130
m_repeatPredicate = repeatPredicate;
131
m_maxLiterals = maxLiterals;
132
m_negBody = negBody && !horn;
133
m_negHead = negHead && !horn;
134
m_classRule = classRule;
135
m_singleHead = classRule || horn;
136
m_numInstances = instances.numInstances();
137
m_counterInstances = new ArrayList(m_numInstances);
138
Enumeration enu = instances.enumerateInstances();
139
while (enu.hasMoreElements()) {
140
m_counterInstances.add(enu.nextElement());
145
* Returns a shallow copy of this rule.
146
* The structured is copied but the literals themselves are not copied.
148
* @return A copy of this Rule.
150
public Object clone() {
152
Object result = null;
154
result = super.clone();
155
/* Clone the body and the head. */
156
((Rule) result).m_body = (Body) m_body.clone();
157
((Rule) result).m_head = (Head) m_head.clone();
158
/* Clone the set of counter-instances. */
159
if (m_counterInstances != null) {
160
((Rule) result).m_counterInstances
161
= (ArrayList) m_counterInstances.clone();
163
} catch (Exception e) {
164
/* An exception is not supposed to happen here. */
172
* Test if an instance is a counter-instance of this rule.
174
* @param instance The instance to test.
175
* @return True if the instance is a counter-instance.
177
public boolean counterInstance(Instance instance) {
179
return ((m_body.counterInstance(instance)
180
&& m_head.counterInstance(instance)));
184
* Update the number of counter-instances of this rule in the dataset.
185
* This method should be used is the rule does not store its counter-instances.
187
* @param instances The dataset.
189
public void upDate(Instances instances) {
191
Enumeration enu = instances.enumerateInstances();
192
m_numInstances = instances.numInstances();
194
while (enu.hasMoreElements()) {
195
if (this.counterInstance((Instance) enu.nextElement())) {
199
m_head.upDate(instances);
200
m_body.upDate(instances);
204
* Get the confirmation value of this rule.
206
* @return The confirmation.
208
public double getConfirmation() {
210
return m_confirmation;
214
* Get the optimistic estimate of the confirmation obtained by refining
217
* @return The optimistic estimate.
219
public double getOptimistic() {
225
* Get the expected number of counter-instances of this rule,
226
* calculated from the number of instances satisfying the body and
227
* the number of instances satisfying the negation of the head.
229
* @return The expected number of counter-instances.
231
public double getExpectedNumber() {
233
return (double) m_body.getCounterInstancesNumber()
234
* (double) m_head.getCounterInstancesNumber()
235
/ (double) m_numInstances;
239
* Get the expected frequency of counter-instances of this rule.
241
* @return The expected frequency of counter-instances.
243
public double getExpectedFrequency() {
245
return getExpectedNumber() / (double) m_numInstances;
249
* Get the observed number of counter-instances of this rule in the dataset.
251
* @return The observed number of counter-instances.
253
public int getObservedNumber() {
255
if (m_counterInstances != null) {
256
return m_counterInstances.size();
263
* Get the observed frequency of counter-instances of this rule in the dataset.
265
* @return The expected frequency of counter-instances.
267
public double getObservedFrequency() {
269
return (double) getObservedNumber() / (double) m_numInstances;
273
* Get the rate of True Positive instances of this rule.
275
* @return The TP-rate.
277
public double getTPRate() {
279
int tp = m_body.getCounterInstancesNumber() - getObservedNumber();
280
int fn = m_numInstances - m_head.getCounterInstancesNumber() - tp;
281
return ((double) tp / (double) (tp + fn));
285
* Get the rate of False Positive instances of this rule.
287
* @return The FP-rate.
289
public double getFPRate() {
291
int fp = getObservedNumber();
292
int tn = m_head.getCounterInstancesNumber() - fp;
293
return ((double) fp / (double) (fp + tn));
297
* Calculate the confirmation of this rule.
299
public void calculateConfirmation() {
301
double expected = getExpectedFrequency();
302
double observed = getObservedFrequency();
303
if ((expected == 0) || (expected == 1)) {
306
m_confirmation = (expected - observed) / (Math.sqrt(expected) - expected);
311
* Calculate the optimistic estimate of this rule.
313
public void calculateOptimistic() {
315
int counterInstances = this.getObservedNumber();
316
int body = m_body.getCounterInstancesNumber();
317
int notHead = m_head.getCounterInstancesNumber();
318
int n = m_numInstances;
319
double expectedOptimistic;
320
/* optimistic expected number of counter-instances */
321
if (counterInstances <= body - notHead) {
322
expectedOptimistic = (double) (notHead * (body - counterInstances))
324
} else if (counterInstances <= notHead - body) {
325
expectedOptimistic = (double) (body * (notHead - counterInstances))
328
expectedOptimistic = (double) ((notHead + body - counterInstances)
329
* (notHead + body - counterInstances))
330
/ (double) (4 * n * n);
332
if ((expectedOptimistic == 0) || (expectedOptimistic == 1)) {
335
m_optimistic = expectedOptimistic
336
/ (Math.sqrt(expectedOptimistic) - expectedOptimistic);
341
* Test if this rule is empty.
343
* @return True if it is the empty rule.
345
public boolean isEmpty() {
347
return (m_head.isEmpty() && m_body.isEmpty());
351
* Give the number of literals in this rule.
353
* @return The number of literals.
355
public int numLiterals() {
357
return m_body.numLiterals() + m_head.numLiterals();
361
* Add a literal to the body of the rule.
363
* @param newLit The literal to add.
364
* @return The rule obtained by adding the literal, null if the literal can
365
* not be added because of the constraints on the rule.
367
private Rule addTermToBody(Literal newLit) {
369
if (!m_negBody && newLit.negative()
370
|| (m_classRule && newLit.getPredicate().isClass())
371
|| (newLit instanceof IndividualLiteral
372
&& (((IndividualLiteral) newLit).getType()
373
- m_body.getType()) > 1
374
&& (((IndividualLiteral) newLit).getType()
375
- m_head.getType()) > 1)) {
378
Rule result = (Rule) this.clone();
379
result.m_body.addElement(newLit);
380
/* Update the counter-instances. */
381
if (m_counterInstances != null) {
382
for (int i = result.m_counterInstances.size() - 1; i >= 0; i--) {
383
Instance current = (Instance) result.m_counterInstances.get(i);
384
if (!result.m_body.canKeep(current, newLit)) {
385
result.m_counterInstances.remove(i);
394
* Add a literal to the head of the rule.
396
* @param newLit The literal to add.
397
* @return The rule obtained by adding the literal, null if the literal can
398
* not be added because of the constraints on the rule.
400
private Rule addTermToHead(Literal newLit) {
401
if ((!m_negHead && newLit.negative())
402
|| (m_classRule && !newLit.getPredicate().isClass())
403
|| (m_singleHead && !m_head.isEmpty())
404
|| (newLit instanceof IndividualLiteral
405
&& ((IndividualLiteral) newLit).getType()
406
!= IndividualLiteral.INDIVIDUAL_PROPERTY)) {
409
Rule result = (Rule) this.clone();
410
result.m_head.addElement(newLit);
411
/* Update counter-instances. */
412
if (m_counterInstances != null) {
413
for (int i = result.m_counterInstances.size() - 1; i >= 0; i--) {
414
Instance current = (Instance) result.m_counterInstances.get(i);
415
if (!result.m_head.canKeep(current, newLit)) {
416
result.m_counterInstances.remove(i);
425
* Refine a rule by adding a range of literals of a predicate, either to
426
* the head or to the body of the rule.
428
* @param pred The predicate to consider.
429
* @param firstIndex The index of the first literal of the predicate to add.
430
* @param lastIndex The index of the last literal of the predicate to add.
431
* @param addTobody True if the literals should be added to the body.
432
* @param addToHead True if the literals should be added to the head.
433
* @return A list of rules obtained by refinement.
435
private SimpleLinkedList refine(Predicate pred, int firstIndex, int lastIndex,
436
boolean addToBody, boolean addToHead) {
438
SimpleLinkedList result = new SimpleLinkedList();
442
for (int i = firstIndex; i < lastIndex; i++) {
443
currentLit = pred.getLiteral(i);
445
refinement = addTermToBody(currentLit);
446
if (refinement != null) {
447
result.add(refinement);
451
refinement = addTermToHead(currentLit);
452
if (refinement != null) {
453
result.add(refinement);
456
negation = currentLit.getNegation();
457
if (negation != null) {
459
refinement = addTermToBody(negation);
460
if (refinement != null) {
461
result.add(refinement);
465
refinement = addTermToHead(negation);
466
if (refinement != null) {
467
result.add(refinement);
476
* Refine a rule by adding literal from a set of predictes.
478
* @param predicates The predicates available.
479
* @return The list of the children obtained by refining the rule.
481
public SimpleLinkedList refine(ArrayList predicates) {
483
SimpleLinkedList result = new SimpleLinkedList();
484
Predicate currentPred;
488
if (this.numLiterals() == m_maxLiterals) {
492
if (this.isEmpty()) {
493
/* Literals can be added on both sides of the rule. */
494
for (int i = 0; i < predicates.size(); i++) {
495
currentPred = (Predicate) predicates.get(i);
496
result.addAll(refine(currentPred,
497
0, currentPred.numLiterals(),
500
} else if (m_body.isEmpty() || m_head.isEmpty()) {
501
/* Literals can be added to the empty side only. */
504
if (m_body.isEmpty()) {
508
} else { // m_head.isEmpty()
513
last = side.getLastLiteral();
514
currentPred = last.getPredicate();
515
if (m_repeatPredicate) {
516
result.addAll(refine(currentPred,
517
currentPred.indexOf(last) + 1,
518
currentPred.numLiterals(),
519
addToBody, addToHead));
521
for (int i = predicates.indexOf(currentPred) + 1; i < predicates.size();
523
currentPred = (Predicate) predicates.get(i);
524
result.addAll(refine(currentPred,
525
0, currentPred.numLiterals(),
526
addToBody, addToHead));
529
Literal lastLitBody = m_body.getLastLiteral();
530
Literal lastLitHead = m_head.getLastLiteral();
531
Predicate lastPredBody = lastLitBody.getPredicate();
532
Predicate lastPredHead = lastLitHead.getPredicate();
533
int lastLitBodyIndex = lastPredBody.indexOf(lastLitBody);
534
int lastLitHeadIndex = lastPredHead.indexOf(lastLitHead);
535
int lastPredBodyIndex = predicates.indexOf(lastPredBody);
536
int lastPredHeadIndex = predicates.indexOf(lastPredHead);
537
Predicate inferiorPred;
538
Predicate superiorPred;
541
addToBody = (m_head.numLiterals() == 1
542
&& (lastPredBodyIndex < lastPredHeadIndex
543
|| (lastPredBodyIndex == lastPredHeadIndex
544
&& lastLitBodyIndex < lastLitHeadIndex)));
545
addToHead = (m_body.numLiterals() == 1
546
&& (lastPredHeadIndex < lastPredBodyIndex
547
|| (lastPredHeadIndex == lastPredBodyIndex
548
&& lastLitHeadIndex < lastLitBodyIndex)));
549
if (addToBody || addToHead) {
550
/* Add literals in the gap between the body and the head. */
552
inferiorPred = lastPredBody;
553
inferiorLit = lastLitBodyIndex;
554
superiorPred = lastPredHead;
555
superiorLit = lastLitHeadIndex;
556
} else { // addToHead
557
inferiorPred = lastPredHead;
558
inferiorLit = lastLitHeadIndex;
559
superiorPred = lastPredBody;
560
superiorLit = lastLitBodyIndex;
562
if (predicates.indexOf(inferiorPred)
563
< predicates.indexOf(superiorPred)) {
564
if (m_repeatPredicate) {
565
result.addAll(refine(inferiorPred,
566
inferiorLit + 1, inferiorPred.numLiterals(),
567
addToBody, addToHead));
569
for (int j = predicates.indexOf(inferiorPred) + 1;
570
j < predicates.indexOf(superiorPred); j++) {
571
currentPred = (Predicate) predicates.get(j);
572
result.addAll(refine(currentPred,
573
0, currentPred.numLiterals(),
574
addToBody, addToHead));
576
if (m_repeatPredicate) {
577
result.addAll(refine(superiorPred,
579
addToBody, addToHead));
582
//((inferiorPred.getIndex() == superiorPred.getIndex())
583
//&& (inferiorLit < superiorLit))
584
if (m_repeatPredicate) {
585
result.addAll(refine(inferiorPred,
586
inferiorLit + 1, superiorLit,
587
addToBody, addToHead));
591
/* Add other literals. */
592
if (predicates.indexOf(lastPredBody) > predicates.indexOf(lastPredHead)) {
593
superiorPred = lastPredBody;
594
superiorLit = lastPredBody.indexOf(lastLitBody);
595
} else if (predicates.indexOf(lastPredBody)
596
< predicates.indexOf(lastPredHead)) {
597
superiorPred = lastPredHead;
598
superiorLit = lastPredHead.indexOf(lastLitHead);
600
superiorPred = lastPredBody;
601
if (lastLitBodyIndex > lastLitHeadIndex) {
602
superiorLit = lastPredBody.indexOf(lastLitBody);
604
superiorLit = lastPredHead.indexOf(lastLitHead);
607
if (m_repeatPredicate) {
608
result.addAll(refine(superiorPred,
609
superiorLit + 1, superiorPred.numLiterals(),
612
for (int j = predicates.indexOf(superiorPred) + 1; j < predicates.size();
614
currentPred = (Predicate) predicates.get(j);
615
result.addAll(refine(currentPred,
616
0, currentPred.numLiterals(),
624
* Test if this rule subsumes another rule.
626
* @param otherRule The other rule.
627
* @return True if the other rule is subsumed.
629
public boolean subsumes(Rule otherRule) {
631
if (this.numLiterals() > otherRule.numLiterals()) {
634
return (m_body.isIncludedIn(otherRule) && m_head.isIncludedIn(otherRule));
638
* Test if this rule and another rule correspond to the same clause.
640
* @param otherRule The other rule.
641
* @return True if both rules correspond to the same clause.
643
public boolean sameClauseAs(Rule otherRule) {
645
return (this.numLiterals() == otherRule.numLiterals()
646
&& this.subsumes(otherRule));
650
* Test if this rule is equivalent to another rule.
652
* @param otherRule The other rule.
653
* @return True if both rules are equivalent.
655
public boolean equivalentTo(Rule otherRule) {
657
return (this.numLiterals() == otherRule.numLiterals()
658
&& m_head.negationIncludedIn(otherRule.m_body)
659
&& m_body.negationIncludedIn(otherRule.m_head));
663
* Test if the body of the rule contains a literal.
665
* @param lit The literal to look for.
666
* @return True if the literal is contained in the body of the rule.
668
public boolean bodyContains(Literal lit) {
670
return m_body.contains(lit);
674
* Test if the head of the rule contains a literal.
676
* @param lit The literal to look for.
677
* @return True if the literal is contained in the head of the rule.
679
public boolean headContains(Literal lit) {
681
return m_head.contains(lit);
685
* Test if this rule is over the frequency threshold.
687
* @param minFrequency The frequency threshold.
688
* @return True if the rule is over the threshold.
690
public boolean overFrequencyThreshold(double minFrequency) {
692
return (m_body.overFrequencyThreshold(minFrequency)
693
&& m_head.overFrequencyThreshold(minFrequency));
697
* Test if the body of the rule is true.
699
* @return True if the body is always satisfied.
701
public boolean hasTrueBody() {
703
return (!m_body.isEmpty()
704
&& m_body.hasMaxCounterInstances());
708
* Test if the head of the rule is false.
710
* @return True if the body is never satisfied.
712
public boolean hasFalseHead() {
714
return (!m_head.isEmpty()
715
&& m_head.hasMaxCounterInstances());
719
* Return a String giving the confirmation and optimistic estimate of
722
* @return A String with the values of the rule.
724
public String valuesToString() {
726
StringBuffer text = new StringBuffer();
727
DecimalFormat decimalFormat = new DecimalFormat("0.000000");
728
text.append(decimalFormat.format(getConfirmation()));
730
text.append(decimalFormat.format(getObservedFrequency()));
731
return text.toString();
735
* Return a String giving the TP-rate and FP-rate of
738
* @return A String with the values of the rule.
740
public String rocToString() {
742
StringBuffer text = new StringBuffer();
743
DecimalFormat decimalFormat = new DecimalFormat("0.000000");
744
text.append(decimalFormat.format(getConfirmation()));
746
text.append(decimalFormat.format(getTPRate()));
748
text.append(decimalFormat.format(getFPRate()));
749
return text.toString();
753
* Retrun a String for this rule.
755
* @return The String describing this rule.
757
public String toString() {
759
StringBuffer text = new StringBuffer();
760
text.append(m_body.toString());
761
text.append(" ==> ");
762
text.append(m_head.toString());
763
return text.toString();
767
* Comparator used to compare two rules according to their confirmation value.
769
public static Comparator confirmationComparator = new Comparator() {
771
public int compare(Object o1, Object o2) {
775
double conf1 = r1.getConfirmation();
776
double conf2 = r2.getConfirmation();
779
} else if (conf1 < conf2) {
788
* Comparator used to compare two rules according to their observed number
789
* of counter-instances.
791
public static Comparator observedComparator = new Comparator() {
793
public int compare(Object o1, Object o2) {
797
double obs1 = r1.getObservedFrequency();
798
double obs2 = r2.getObservedFrequency();
801
} else if (obs1 > obs2) {
810
* Comparator used to compare two rules according to their optimistic estimate.
812
public static Comparator optimisticComparator = new Comparator() {
814
public int compare(Object o1, Object o2) {
818
double opt1 = r1.getOptimistic();
819
double opt2 = r2.getOptimistic();
822
} else if (opt1 < opt2) {
831
* Comparator used to compare two rules according to their confirmation and
832
* then their observed number of counter-instances.
834
public static Comparator confirmationThenObservedComparator
836
public int compare(Object o1, Object o2) {
838
int confirmationComparison = confirmationComparator.compare(o1, o2);
839
if (confirmationComparison != 0) {
840
return confirmationComparison;
842
return observedComparator.compare(o1, o2);
848
* Comparator used to compare two rules according to their optimistic estimate
849
* and then their observed number of counter-instances.
851
public static Comparator optimisticThenObservedComparator = new Comparator() {
852
public int compare(Object o1, Object o2) {
853
int optimisticComparison = optimisticComparator.compare(o1, o2);
854
if (optimisticComparison != 0) {
855
return optimisticComparison;
857
return observedComparator.compare(o1, o2);