2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* RepeatedHillClimber.java
19
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.bayes.net.search.global;
25
import weka.classifiers.bayes.BayesNet;
26
import weka.classifiers.bayes.net.ParentSet;
27
import weka.core.Instances;
28
import weka.core.Option;
29
import weka.core.Utils;
31
import java.util.Enumeration;
32
import java.util.Random;
33
import java.util.Vector;
36
<!-- globalinfo-start -->
37
* This Bayes Network learning algorithm repeatedly uses hill climbing starting with a randomly generated network structure and return the best structure of the various runs.
39
<!-- globalinfo-end -->
41
<!-- options-start -->
42
* Valid options are: <p/>
44
* <pre> -U <integer>
45
* Number of runs</pre>
47
* <pre> -A <seed>
48
* Random number seed</pre>
50
* <pre> -P <nr of parents>
51
* Maximum number of parents</pre>
54
* Use arc reversal operation.
55
* (default false)</pre>
58
* Initial structure is empty (instead of Naive Bayes)</pre>
61
* Applies a Markov Blanket correction to the network structure,
62
* after a network structure is learned. This ensures that all
63
* nodes in the network are part of the Markov blanket of the
64
* classifier node.</pre>
66
* <pre> -S [LOO-CV|k-Fold-CV|Cumulative-CV]
67
* Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre>
70
* Use probabilistic or 0/1 scoring.
71
* (default probabilistic scoring)</pre>
75
* @author Remco Bouckaert (rrb@xm.co.nz)
76
* @version $Revision: 1.5 $
78
public class RepeatedHillClimber
81
/** for serialization */
82
static final long serialVersionUID = -7359197180460703069L;
84
/** number of runs **/
86
/** random number seed **/
88
/** random number generator **/
92
* search determines the network structure/graph of the network
93
* with the repeated hill climbing.
95
* @param bayesNet the network to use
96
* @param instances the data to use
97
* @throws Exception if something goes wrong
99
protected void search(BayesNet bayesNet, Instances instances) throws Exception {
100
m_random = new Random(getSeed());
101
// keeps track of score pf best structure found so far
103
double fCurrentScore = calcScore(bayesNet);
105
// keeps track of best structure found so far
106
BayesNet bestBayesNet;
108
// initialize bestBayesNet
109
fBestScore = fCurrentScore;
110
bestBayesNet = new BayesNet();
111
bestBayesNet.m_Instances = instances;
112
bestBayesNet.initStructure();
113
copyParentSets(bestBayesNet, bayesNet);
117
for (int iRun = 0; iRun < m_nRuns; iRun++) {
118
// generate random nework
119
generateRandomNet(bayesNet, instances);
122
super.search(bayesNet, instances);
125
fCurrentScore = calcScore(bayesNet);
127
// keep track of best network seen so far
128
if (fCurrentScore > fBestScore) {
129
fBestScore = fCurrentScore;
130
copyParentSets(bestBayesNet, bayesNet);
134
// restore current network to best network
135
copyParentSets(bayesNet, bestBayesNet);
146
void generateRandomNet(BayesNet bayesNet, Instances instances) {
147
int nNodes = instances.numAttributes();
149
for (int iNode = 0; iNode < nNodes; iNode++) {
150
ParentSet parentSet = bayesNet.getParentSet(iNode);
151
while (parentSet.getNrOfParents() > 0) {
152
parentSet.deleteLastParent(instances);
156
// initialize as naive Bayes?
157
if (getInitAsNaiveBayes()) {
158
int iClass = instances.classIndex();
159
// initialize parent sets to have arrow from classifier node to
160
// each of the other nodes
161
for (int iNode = 0; iNode < nNodes; iNode++) {
162
if (iNode != iClass) {
163
bayesNet.getParentSet(iNode).addParent(iClass, instances);
168
// insert random arcs
169
int nNrOfAttempts = m_random.nextInt(nNodes * nNodes);
170
for (int iAttempt = 0; iAttempt < nNrOfAttempts; iAttempt++) {
171
int iTail = m_random.nextInt(nNodes);
172
int iHead = m_random.nextInt(nNodes);
173
if (bayesNet.getParentSet(iHead).getNrOfParents() < getMaxNrOfParents() &&
174
addArcMakesSense(bayesNet, instances, iHead, iTail)) {
175
bayesNet.getParentSet(iHead).addParent(iTail, instances);
178
} // generateRandomNet
181
* copyParentSets copies parent sets of source to dest BayesNet
183
* @param dest destination network
184
* @param source source network
186
void copyParentSets(BayesNet dest, BayesNet source) {
187
int nNodes = source.getNrOfNodes();
188
// clear parent set first
189
for (int iNode = 0; iNode < nNodes; iNode++) {
190
dest.getParentSet(iNode).copy(source.getParentSet(iNode));
196
* Returns the number of runs
198
* @return number of runs
200
public int getRuns() {
205
* Sets the number of runs
207
* @param nRuns The number of runs to set
209
public void setRuns(int nRuns) {
214
* Returns the random seed
216
* @return random number seed
218
public int getSeed() {
223
* Sets the random number seed
225
* @param nSeed The number of the seed to set
227
public void setSeed(int nSeed) {
232
* Returns an enumeration describing the available options.
234
* @return an enumeration of all the available options.
236
public Enumeration listOptions() {
237
Vector newVector = new Vector(4);
239
newVector.addElement(new Option("\tNumber of runs", "U", 1, "-U <integer>"));
240
newVector.addElement(new Option("\tRandom number seed", "A", 1, "-A <seed>"));
242
Enumeration enu = super.listOptions();
243
while (enu.hasMoreElements()) {
244
newVector.addElement(enu.nextElement());
246
return newVector.elements();
250
* Parses a given list of options. <p/>
252
<!-- options-start -->
253
* Valid options are: <p/>
255
* <pre> -U <integer>
256
* Number of runs</pre>
258
* <pre> -A <seed>
259
* Random number seed</pre>
261
* <pre> -P <nr of parents>
262
* Maximum number of parents</pre>
265
* Use arc reversal operation.
266
* (default false)</pre>
269
* Initial structure is empty (instead of Naive Bayes)</pre>
272
* Applies a Markov Blanket correction to the network structure,
273
* after a network structure is learned. This ensures that all
274
* nodes in the network are part of the Markov blanket of the
275
* classifier node.</pre>
277
* <pre> -S [LOO-CV|k-Fold-CV|Cumulative-CV]
278
* Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre>
281
* Use probabilistic or 0/1 scoring.
282
* (default probabilistic scoring)</pre>
286
* @param options the list of options as an array of strings
287
* @throws Exception if an option is not supported
289
public void setOptions(String[] options) throws Exception {
290
String sRuns = Utils.getOption('U', options);
291
if (sRuns.length() != 0) {
292
setRuns(Integer.parseInt(sRuns));
295
String sSeed = Utils.getOption('A', options);
296
if (sSeed.length() != 0) {
297
setSeed(Integer.parseInt(sSeed));
300
super.setOptions(options);
304
* Gets the current settings of the search algorithm.
306
* @return an array of strings suitable for passing to setOptions
308
public String[] getOptions() {
309
String[] superOptions = super.getOptions();
310
String[] options = new String[7 + superOptions.length];
313
options[current++] = "-U";
314
options[current++] = "" + getRuns();
316
options[current++] = "-A";
317
options[current++] = "" + getSeed();
319
// insert options from parent class
320
for (int iOption = 0; iOption < superOptions.length; iOption++) {
321
options[current++] = superOptions[iOption];
324
// Fill up rest with empty strings, not nulls!
325
while (current < options.length) {
326
options[current++] = "";
332
* This will return a string describing the classifier.
334
* @return The string.
336
public String globalInfo() {
337
return "This Bayes Network learning algorithm repeatedly uses hill climbing starting " +
338
"with a randomly generated network structure and return the best structure of the " +
343
* @return a string to describe the Runs option.
345
public String runsTipText() {
346
return "Sets the number of times hill climbing is performed.";
350
* @return a string to describe the Seed option.
352
public String seedTipText() {
353
return "Initialization value for random number generator." +
354
" Setting the seed allows replicability of experiments.";