2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.bayes.net;
25
import weka.classifiers.bayes.BayesNet;
26
import weka.classifiers.bayes.net.estimate.DiscreteEstimatorBayes;
27
import weka.core.FastVector;
28
import weka.core.Instances;
29
import weka.core.TechnicalInformation;
30
import weka.core.TechnicalInformation.Type;
31
import weka.core.TechnicalInformation.Field;
32
import weka.core.TechnicalInformationHandler;
33
import weka.estimators.Estimator;
36
import java.io.StringReader;
37
import java.util.StringTokenizer;
39
import javax.xml.parsers.DocumentBuilderFactory;
41
import org.w3c.dom.CharacterData;
42
import org.w3c.dom.Document;
43
import org.w3c.dom.Element;
44
import org.w3c.dom.Node;
45
import org.w3c.dom.NodeList;
48
<!-- globalinfo-start -->
49
* Builds a description of a Bayes Net classifier stored in XML BIF 0.3 format.<br/>
51
* For more details on XML BIF see:<br/>
53
* Fabio Cozman, Marek Druzdzel, Daniel Garcia (1998). XML BIF version 0.3. URL http://www-2.cs.cmu.edu/\~fgcozman/Research/InterchangeFormat/.
55
<!-- globalinfo-end -->
57
<!-- technical-bibtex-start -->
60
* @misc{Cozman1998,
61
* author = {Fabio Cozman and Marek Druzdzel and Daniel Garcia},
62
* title = {XML BIF version 0.3},
64
* URL = {http://www-2.cs.cmu.edu/\~fgcozman/Research/InterchangeFormat/}
68
<!-- technical-bibtex-end -->
70
<!-- options-start -->
71
* Valid options are: <p/>
74
* Do not use ADTree data structure
77
* <pre> -B <BIF file>
78
* BIF file to compare with
81
* <pre> -Q weka.classifiers.bayes.net.search.SearchAlgorithm
85
* <pre> -E weka.classifiers.bayes.net.estimate.SimpleEstimator
91
* @author Remco Bouckaert (rrb@xm.co.nz)
92
* @version $Revision: 1.13 $
94
public class BIFReader
96
implements TechnicalInformationHandler {
98
protected int [] m_nPositionX;
99
protected int [] m_nPositionY;
100
private int [] m_order;
102
/** for serialization */
103
static final long serialVersionUID = -8358864680379881429L;
106
* This will return a string describing the classifier.
107
* @return The string.
109
public String globalInfo() {
111
"Builds a description of a Bayes Net classifier stored in XML "
112
+ "BIF 0.3 format.\n\n"
113
+ "For more details on XML BIF see:\n\n"
114
+ getTechnicalInformation().toString();
117
/** processFile reads a BIFXML file and initializes a Bayes Net
118
* @param sFile name of the file to parse
119
* @return the BIFReader
120
* @throws Exception if processing fails
122
public BIFReader processFile(String sFile) throws Exception {
124
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
125
factory.setValidating(true);
126
Document doc = factory.newDocumentBuilder().parse(new File(sFile));
129
buildInstances(doc, sFile);
134
public BIFReader processString(String sStr) throws Exception {
135
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
136
factory.setValidating(true);
137
Document doc = factory.newDocumentBuilder().parse(new org.xml.sax.InputSource(new StringReader(sStr)));
139
buildInstances(doc, "from-string");
145
/** the current filename */
149
* returns the current filename
151
* @return the current filename
153
public String getFileName() {
159
* Returns an instance of a TechnicalInformation object, containing
160
* detailed information about the technical background of this class,
161
* e.g., paper reference or book this class is based on.
163
* @return the technical information about this class
165
public TechnicalInformation getTechnicalInformation() {
166
TechnicalInformation result;
168
result = new TechnicalInformation(Type.MISC);
169
result.setValue(Field.AUTHOR, "Fabio Cozman and Marek Druzdzel and Daniel Garcia");
170
result.setValue(Field.YEAR, "1998");
171
result.setValue(Field.TITLE, "XML BIF version 0.3");
172
result.setValue(Field.URL, "http://www-2.cs.cmu.edu/~fgcozman/Research/InterchangeFormat/");
177
/** buildStructure parses the BIF document in the DOM tree contained
178
* in the doc parameter and specifies the the network structure and
179
* probability tables.
180
* It assumes that buildInstances has been called before
181
* @param doc DOM document containing BIF document in DOM tree
182
* @throws Exception if building of structure fails
184
void buildStructure(Document doc) throws Exception {
185
// Get the name of the network
186
// initialize conditional distribution tables
187
m_Distributions = new Estimator[m_Instances.numAttributes()][];
188
for (int iNode = 0; iNode < m_Instances.numAttributes(); iNode++) {
189
// find definition that goes with this node
190
String sName = m_Instances.attribute(iNode).name();
191
Element definition = getDefinition(doc, sName);
193
if (nodelist.getLength() == 0) {
194
throw new Exception("No definition found for node " + sName);
196
if (nodelist.getLength() > 1) {
197
System.err.println("More than one definition found for node " + sName + ". Using first definition.");
199
Element definition = (Element) nodelist.item(0);
202
// get the parents for this node
204
FastVector nodelist = getParentNodes(definition);
205
for (int iParent = 0; iParent < nodelist.size(); iParent++) {
206
Node parentName = ((Node) nodelist.elementAt(iParent)).getFirstChild();
207
String sParentName = ((CharacterData) (parentName)).getData();
208
int nParent = getNode(sParentName);
209
m_ParentSets[iNode].addParent(nParent, m_Instances);
211
// resolve conditional probability table
212
int nCardinality = m_ParentSets[iNode].getCardinalityOfParents();
213
int nValues = m_Instances.attribute(iNode).numValues();
214
m_Distributions[iNode] = new Estimator[nCardinality];
215
for (int i = 0; i < nCardinality; i++) {
216
m_Distributions[iNode][i] = new DiscreteEstimatorBayes(nValues, 0.0f);
220
StringBuffer sTable = new StringBuffer();
221
for (int iText = 0; iText < nodelist.getLength(); iText++) {
222
sTable.append(((CharacterData) (nodelist.item(iText))).getData());
225
StringTokenizer st = new StringTokenizer(sTable.toString());
227
String sTable = getTable(definition);
228
StringTokenizer st = new StringTokenizer(sTable.toString());
231
for (int i = 0; i < nCardinality; i++) {
232
DiscreteEstimatorBayes d = (DiscreteEstimatorBayes) m_Distributions[iNode][i];
233
for (int iValue = 0; iValue < nValues; iValue++) {
234
String sWeight = st.nextToken();
235
d.addValue(iValue, new Double(sWeight).doubleValue());
241
/** synchronizes the node ordering of this Bayes network with
242
* those in the other network (if possible).
243
* @param other Bayes network to synchronize with
244
* @throws Exception if nr of attributes differs or not all of the variables have the same name.
246
public void Sync(BayesNet other) throws Exception {
247
int nAtts = m_Instances.numAttributes();
248
if (nAtts != other.m_Instances.numAttributes()) {
249
throw new Exception ("Cannot synchronize networks: different number of attributes.");
251
m_order = new int[nAtts];
252
for (int iNode = 0; iNode < nAtts; iNode++) {
253
String sName = other.getNodeName(iNode);
254
m_order[getNode(sName)] = iNode;
260
* Returns all TEXT children of the given node in one string. Between
261
* the node values new lines are inserted.
263
* @param node the node to return the content for
264
* @return the content of the node
266
public String getContent(Element node) {
273
list = node.getChildNodes();
275
for (i = 0; i < list.getLength(); i++) {
277
if (item.getNodeType() == Node.TEXT_NODE)
278
result += "\n" + item.getNodeValue();
285
/** buildInstances parses the BIF document and creates a Bayes Net with its
286
* nodes specified, but leaves the network structure and probability tables empty.
287
* @param doc DOM document containing BIF document in DOM tree
288
* @param sName default name to give to the Bayes Net. Will be overridden if specified in the BIF document.
289
* @throws Exception if building fails
291
void buildInstances(Document doc, String sName) throws Exception {
293
// Get the name of the network
294
nodelist = selectAllNames(doc);
295
if (nodelist.getLength() > 0) {
296
sName = ((CharacterData) (nodelist.item(0).getFirstChild())).getData();
300
nodelist = selectAllVariables(doc);
301
int nNodes = nodelist.getLength();
302
// initialize structure
303
FastVector attInfo = new FastVector(nNodes);
306
m_nPositionX = new int[nodelist.getLength()];
307
m_nPositionY = new int[nodelist.getLength()];
310
for (int iNode = 0; iNode < nodelist.getLength(); iNode++) {
312
FastVector valueslist;
313
// Get the name of the network
314
valueslist = selectOutCome(nodelist.item(iNode));
316
int nValues = valueslist.size();
317
// generate value strings
318
FastVector nomStrings = new FastVector(nValues + 1);
319
for (int iValue = 0; iValue < nValues; iValue++) {
320
Node node = ((Node) valueslist.elementAt(iValue)).getFirstChild();
321
String sValue = ((CharacterData) (node)).getData();
322
if (sValue == null) {
323
sValue = "Value" + (iValue + 1);
325
nomStrings.addElement(sValue);
327
FastVector nodelist2;
328
// Get the name of the network
329
nodelist2 = selectName(nodelist.item(iNode));
330
if (nodelist2.size() == 0) {
331
throw new Exception ("No name specified for variable");
333
String sNodeName = ((CharacterData) (((Node) nodelist2.elementAt(0)).getFirstChild())).getData();
335
weka.core.Attribute att = new weka.core.Attribute(sNodeName, nomStrings);
336
attInfo.addElement(att);
338
valueslist = selectProperty(nodelist.item(iNode));
339
nValues = valueslist.size();
340
// generate value strings
341
for (int iValue = 0; iValue < nValues; iValue++) {
342
// parsing for strings of the form "position = (73, 165)"
343
Node node = ((Node)valueslist.elementAt(iValue)).getFirstChild();
344
String sValue = ((CharacterData) (node)).getData();
345
if (sValue.startsWith("position")) {
346
int i0 = sValue.indexOf('(');
347
int i1 = sValue.indexOf(',');
348
int i2 = sValue.indexOf(')');
349
String sX = sValue.substring(i0 + 1, i1).trim();
350
String sY = sValue.substring(i1 + 1, i2).trim();
352
m_nPositionX[iNode] = (int) Integer.parseInt(sX);
353
m_nPositionY[iNode] = (int) Integer.parseInt(sY);
354
} catch (NumberFormatException e) {
355
System.err.println("Wrong number format in position :(" + sX + "," + sY +")");
356
m_nPositionX[iNode] = 0;
357
m_nPositionY[iNode] = 0;
364
m_Instances = new Instances(sName, attInfo, 100);
365
m_Instances.setClassIndex(nNodes - 1);
370
// /** selectNodeList selects list of nodes from document specified in XPath expression
371
// * @param doc : document (or node) to query
372
// * @param sXPath : XPath expression
373
// * @return list of nodes conforming to XPath expression in doc
374
// * @throws Exception
376
// private NodeList selectNodeList(Node doc, String sXPath) throws Exception {
377
// NodeList nodelist = org.apache.xpath.XPathAPI.selectNodeList(doc, sXPath);
379
// } // selectNodeList
381
NodeList selectAllNames(Document doc) throws Exception {
382
//NodeList nodelist = selectNodeList(doc, "//NAME");
383
NodeList nodelist = doc.getElementsByTagName("NAME");
387
NodeList selectAllVariables(Document doc) throws Exception {
388
//NodeList nodelist = selectNodeList(doc, "//VARIABLE");
389
NodeList nodelist = doc.getElementsByTagName("VARIABLE");
391
} // selectAllVariables
393
Element getDefinition(Document doc, String sName) throws Exception {
394
//NodeList nodelist = selectNodeList(doc, "//DEFINITION[normalize-space(FOR/text())=\"" + sName + "\"]");
396
NodeList nodelist = doc.getElementsByTagName("DEFINITION");
397
for (int iNode = 0; iNode < nodelist.getLength(); iNode++) {
398
Node node = nodelist.item(iNode);
399
FastVector list = selectElements(node, "FOR");
400
if (list.size() > 0) {
401
Node forNode = (Node) list.elementAt(0);
402
if (getContent((Element) forNode).trim().equals(sName)) {
403
return (Element) node;
407
throw new Exception("Could not find definition for ((" + sName + "))");
410
FastVector getParentNodes(Node definition) throws Exception {
411
//NodeList nodelist = selectNodeList(definition, "GIVEN");
412
FastVector nodelist = selectElements(definition, "GIVEN");
416
String getTable(Node definition) throws Exception {
417
//NodeList nodelist = selectNodeList(definition, "TABLE/text()");
418
FastVector nodelist = selectElements(definition, "TABLE");
419
String sTable = getContent((Element) nodelist.elementAt(0));
420
sTable = sTable.replaceAll("\\n"," ");
424
FastVector selectOutCome(Node item) throws Exception {
425
//NodeList nodelist = selectNodeList(item, "OUTCOME");
426
FastVector nodelist = selectElements(item, "OUTCOME");
430
FastVector selectName(Node item) throws Exception {
431
//NodeList nodelist = selectNodeList(item, "NAME");
432
FastVector nodelist = selectElements(item, "NAME");
436
FastVector selectProperty(Node item) throws Exception {
437
// NodeList nodelist = selectNodeList(item, "PROPERTY");
438
FastVector nodelist = selectElements(item, "PROPERTY");
442
FastVector selectElements(Node item, String sElement) throws Exception {
443
NodeList children = item.getChildNodes();
444
FastVector nodelist = new FastVector();
445
for (int iNode = 0; iNode < children.getLength(); iNode++) {
446
Node node = children.item(iNode);
447
if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getNodeName().equals(sElement)) {
448
nodelist.addElement(node);
453
/** Count nr of arcs missing from other network compared to current network
454
* Note that an arc is not 'missing' if it is reversed.
455
* @param other network to compare with
456
* @return nr of missing arcs
458
public int missingArcs(BayesNet other) {
462
for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) {
463
for (int iParent = 0; iParent < m_ParentSets[iAttribute].getNrOfParents(); iParent++) {
464
int nParent = m_ParentSets[iAttribute].getParent(iParent);
465
if (!other.getParentSet(m_order[iAttribute]).contains(m_order[nParent]) && !other.getParentSet(m_order[nParent]).contains(m_order[iAttribute])) {
471
} catch (Exception e) {
472
System.err.println(e.getMessage());
477
/** Count nr of exta arcs from other network compared to current network
478
* Note that an arc is not 'extra' if it is reversed.
479
* @param other network to compare with
480
* @return nr of missing arcs
482
public int extraArcs(BayesNet other) {
486
for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) {
487
for (int iParent = 0; iParent < other.getParentSet(m_order[iAttribute]).getNrOfParents(); iParent++) {
488
int nParent = m_order[other.getParentSet(m_order[iAttribute]).getParent(iParent)];
489
if (!m_ParentSets[iAttribute].contains(nParent) && !m_ParentSets[nParent].contains(iAttribute)) {
495
} catch (Exception e) {
496
System.err.println(e.getMessage());
502
/** calculates the divergence between the probability distribution
503
* represented by this network and that of another, that is,
504
* \sum_{x\in X} P(x)log P(x)/Q(x)
505
* where X is the set of values the nodes in the network can take,
506
* P(x) the probability of this network for configuration x
507
* Q(x) the probability of the other network for configuration x
508
* @param other network to compare with
509
* @return divergence between this and other Bayes Network
511
public double divergence(BayesNet other) {
516
int nNodes = m_Instances.numAttributes();
517
int [] nCard = new int[nNodes];
518
for (int iNode = 0; iNode < nNodes; iNode++) {
519
nCard[iNode] = m_Instances.attribute(iNode).numValues();
521
// x: holds current configuration of nodes
522
int [] x = new int[nNodes];
523
// simply sum over all configurations to calc divergence D
526
// update configuration
528
while (i < nNodes && x[i] == m_Instances.attribute(i).numValues()) {
537
// calc P(x) and Q(x)
539
for (int iNode = 0; iNode < nNodes; iNode++) {
541
for (int iParent = 0; iParent < m_ParentSets[iNode].getNrOfParents(); iParent++) {
542
int nParent = m_ParentSets[iNode].getParent(iParent);
543
iCPT = iCPT * nCard[nParent] + x[nParent];
545
P = P * m_Distributions[iNode][iCPT].getProbability(x[iNode]);
549
for (int iNode = 0; iNode < nNodes; iNode++) {
551
for (int iParent = 0; iParent < other.getParentSet(m_order[iNode]).getNrOfParents(); iParent++) {
552
int nParent = m_order[other.getParentSet(m_order[iNode]).getParent(iParent)];
553
iCPT = iCPT * nCard[nParent] + x[nParent];
555
Q = Q * other.m_Distributions[m_order[iNode]][iCPT].getProbability(x[iNode]);
558
// update divergence if probabilities are positive
559
if (P > 0.0 && Q > 0.0) {
560
D = D + P * Math.log(Q / P);
565
} catch (Exception e) {
566
System.err.println(e.getMessage());
571
/** Count nr of reversed arcs from other network compared to current network
572
* @param other network to compare with
573
* @return nr of missing arcs
575
public int reversedArcs(BayesNet other) {
579
for (int iAttribute = 0; iAttribute < m_Instances.numAttributes(); iAttribute++) {
580
for (int iParent = 0; iParent < m_ParentSets[iAttribute].getNrOfParents(); iParent++) {
581
int nParent = m_ParentSets[iAttribute].getParent(iParent);
582
if (!other.getParentSet(m_order[iAttribute]).contains(m_order[nParent]) && other.getParentSet(m_order[nParent]).contains(m_order[iAttribute])) {
588
} catch (Exception e) {
589
System.err.println(e.getMessage());
593
/** getNode finds the index of the node with name sNodeName
594
* and throws an exception if no such node can be found.
595
* @param sNodeName name of the node to get the index from
596
* @return index of the node with name sNodeName
597
* @throws Exception if node cannot be found
599
public int getNode(String sNodeName) throws Exception {
601
while (iNode < m_Instances.numAttributes()) {
602
if (m_Instances.attribute(iNode).name().equals(sNodeName)) {
607
throw new Exception("Could not find node [[" + sNodeName + "]]");
611
* the default constructor
617
* Loads the file specified as first parameter and prints it to stdout.
619
* @param args the command line parameters
621
public static void main(String[] args) {
623
BIFReader br = new BIFReader();
624
br.processFile(args[0]);
625
System.out.println(br.toString());
628
catch (Throwable t) {