2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
23
package weka.core.xml;
26
import java.io.InputStream;
27
import java.io.Reader;
28
import java.util.Vector;
29
import org.w3c.dom.Document;
30
import org.w3c.dom.Element;
31
import org.w3c.dom.Node;
32
import org.w3c.dom.NodeList;
35
* A class for transforming options listed in XML to a regular WEKA command
38
* @author FracPete (fracpete at waikato dot ac dot nz)
39
* @version $Revision: 1.2 $
41
public class XMLOptions {
42
/** tag for a single option */
43
public final static String TAG_OPTION = "option";
45
/** tag for a list of options */
46
public final static String TAG_OPTIONS = "options";
48
/** the name attribute */
49
public final static String ATT_NAME = "name";
51
/** the type attribute */
52
public final static String ATT_TYPE = "type";
54
/** the value attribute */
55
public final static String ATT_VALUE = "value";
57
/** a value of the type attribute */
58
public final static String VAL_TYPE_FLAG = "flag";
60
/** a value of the type attribute */
61
public final static String VAL_TYPE_SINGLE = "single";
63
/** a value of the type attribute */
64
public final static String VAL_TYPE_HYPHENS = "hyphens";
66
/** a value of the type attribute */
67
public final static String VAL_TYPE_QUOTES = "quotes";
69
/** a value of the type attribute */
70
public final static String VAL_TYPE_CLASSIFIER = "classifier";
73
public final static String ROOT_NODE = TAG_OPTIONS;
75
/** the DTD for the XML file */
76
public final static String DOCTYPE =
77
"<!DOCTYPE " + ROOT_NODE + "\n"
79
+ " <!ELEMENT " + TAG_OPTIONS + " (" + TAG_OPTION + ")*>\n"
80
+ " <!ATTLIST " + TAG_OPTIONS + " " + ATT_TYPE + " CDATA \"classifier\">\n"
81
+ " <!ATTLIST " + TAG_OPTIONS + " " + ATT_VALUE + " CDATA \"\">\n"
82
+ " <!ELEMENT " + TAG_OPTION + " (#PCDATA | " + TAG_OPTIONS + ")*>\n"
83
+ " <!ATTLIST " + TAG_OPTION + " " + ATT_NAME + " CDATA #REQUIRED>\n"
84
+ " <!ATTLIST " + TAG_OPTION + " " + ATT_TYPE + " (flag | single | hyphens | quotes) \"single\">\n"
88
/** the XML document */
89
protected XMLDocument m_XMLDocument = null;
92
* Creates a new instance of XMLOptions
93
* @throws Exception if the construction of the DocumentBuilder fails
94
* @see #setValidating(boolean)
96
public XMLOptions() throws Exception {
97
m_XMLDocument = new XMLDocument();
98
m_XMLDocument.setRootNode(ROOT_NODE);
99
m_XMLDocument.setDocType(DOCTYPE);
104
* Creates a new instance of XMLOptions
105
* @param xml the xml to parse (if "<?xml" is not found then it is considered a file)
106
* @throws Exception if the construction of the DocumentBuilder fails
107
* @see #setValidating(boolean)
109
public XMLOptions(String xml) throws Exception {
111
getXMLDocument().read(xml);
115
* Creates a new instance of XMLOptions
116
* @param file the XML file to parse
117
* @throws Exception if the construction of the DocumentBuilder fails
118
* @see #setValidating(boolean)
120
public XMLOptions(File file) throws Exception {
122
getXMLDocument().read(file);
126
* Creates a new instance of XMLOptions
127
* @param stream the XML stream to parse
128
* @throws Exception if the construction of the DocumentBuilder fails
129
* @see #setValidating(boolean)
131
public XMLOptions(InputStream stream) throws Exception {
133
getXMLDocument().read(stream);
137
* Creates a new instance of XMLOptions
138
* @param reader the XML reader to parse
139
* @throws Exception if the construction of the DocumentBuilder fails
140
* @see #setValidating(boolean)
142
public XMLOptions(Reader reader) throws Exception {
144
getXMLDocument().read(reader);
148
* returns whether a validating parser is used
149
* @return whether a validating parser is used
151
public boolean getValidating() {
152
return m_XMLDocument.getValidating();
156
* sets whether to use a validating parser or not. <br>
157
* Note: this does clear the current DOM document!
158
* @param validating whether to use a validating parser
159
* @throws Exception if the instantiating of the DocumentBuilder fails
161
public void setValidating(boolean validating) throws Exception {
162
m_XMLDocument.setValidating(validating);
166
* returns the parsed DOM document
167
* @return the parsed DOM document
169
public Document getDocument() {
170
return fixHyphens(m_XMLDocument.getDocument());
174
* returns the handler of the XML document. the internal DOM document can
175
* be accessed via the <code>getDocument()</code> method.
176
* @return the object handling the XML document
177
* @see #getDocument()
179
public XMLDocument getXMLDocument() {
180
return m_XMLDocument;
184
* pushes any options with type ATT_HYPHENS to the end, s.t. the "--" are
185
* really added at the end
186
* @param document the DOM document to work on
187
* @return the fixed DOM document
189
protected Document fixHyphens(Document document) {
197
// get all option tags
198
list = document.getDocumentElement().getElementsByTagName(TAG_OPTION);
200
// get all hyphen tags
201
hyphens = new Vector();
202
for (i = 0; i < list.getLength(); i++) {
203
if (((Element) list.item(i)).getAttribute(ATT_TYPE).equals(VAL_TYPE_HYPHENS))
204
hyphens.add(list.item(i));
207
// check all hyphen tags whether they are the end, if not fix it
208
for (i = 0; i < hyphens.size(); i++) {
209
node = (Node) hyphens.get(i);
214
while (tmpNode.getNextSibling() != null) {
216
if (tmpNode.getNextSibling().getNodeType() == Node.ELEMENT_NODE) {
220
tmpNode = tmpNode.getNextSibling();
225
tmpNode = node.getParentNode();
226
tmpNode.removeChild(node);
227
tmpNode.appendChild(node);
235
* returns the quotes level for the given node, i.e. it returns the number
236
* of option's of the type "quotes" are in the path
238
protected int getQuotesLevel(Node node) {
242
while (node.getParentNode() != null) {
243
if (!(node instanceof Element))
247
if (node.getNodeName().equals(TAG_OPTION)) {
249
if (((Element) node).getAttribute(ATT_TYPE).equals(VAL_TYPE_QUOTES))
253
node = node.getParentNode();
260
* converts the given node into a command line representation and adds it
261
* to the existing command line
262
* @param cl the command line so far
263
* @param parent the node to convert to command line
264
* @param depth the current depth
265
* @return the new command line
267
protected String toCommandLine(String cl, Element parent, int depth) {
273
NodeList subNodeList;
279
if (parent.getNodeName().equals(TAG_OPTIONS)) {
280
// classifier? -> add
281
if (parent.getAttribute(ATT_TYPE).equals(VAL_TYPE_CLASSIFIER)) {
282
newCl += parent.getAttribute(ATT_VALUE);
286
list = XMLDocument.getChildTags(parent);
287
for (i = 0; i < list.size(); i++)
288
newCl = toCommandLine(newCl, (Element) list.get(i), depth + 1);
292
if (parent.getNodeName().equals(TAG_OPTION)) {
293
newCl += " -" + parent.getAttribute(ATT_NAME);
294
subList = XMLDocument.getChildTags(parent);
295
subNodeList = parent.getChildNodes();
297
if (parent.getAttribute(ATT_TYPE).equals(VAL_TYPE_SINGLE)) {
298
if ( (subNodeList.getLength() > 0) && (!subNodeList.item(0).getNodeValue().trim().equals("")) )
299
newCl += " " + subNodeList.item(0).getNodeValue().trim();
302
if (parent.getAttribute(ATT_TYPE).equals(VAL_TYPE_HYPHENS)) {
303
newCl += " " + ((Element) subList.get(0)).getAttribute(ATT_VALUE); // expects classifier
304
// get single options in this node
305
subList = XMLDocument.getChildTags((Element) subList.get(0));
306
// get options after --
308
for (i = 0; i < subList.size(); i++)
309
tmpCl = toCommandLine(tmpCl, (Element) subList.get(i), depth + 1);
311
tmpCl = tmpCl.trim();
312
if (!tmpCl.equals(""))
313
newCl += " -- " + tmpCl;
316
if (parent.getAttribute(ATT_TYPE).equals(VAL_TYPE_QUOTES)) {
319
for (i = 1; i < getQuotesLevel(parent); i++)
324
for (i = 0; i < subList.size(); i++)
325
tmpCl = toCommandLine(tmpCl, (Element) subList.get(i), depth + 1);
326
newCl += tmpCl.trim();
328
for (i = 1; i < getQuotesLevel(parent); i++)
334
// add to existing command line
335
cl += " " + newCl.trim();
341
* returns the given DOM document as command line
342
* @return the document as command line
343
* @throws Exception if anything goes wrong initializing the parsing
345
public String toCommandLine() throws Exception {
346
return toCommandLine(new String(), getDocument().getDocumentElement(), 0);
350
* returns the current DOM document as string array (takes care of quotes!)
351
* @return the document as string array
352
* @throws Exception if anything goes wrong initializing the parsing
354
public String[] toArray() throws Exception {
363
cl = toCommandLine();
364
result = new Vector();
370
for (i = 0; i < cl.length(); i++) {
373
switch (cl.charAt(i)) {
379
// can we toggle quotes? (ignore nested quotes)
388
// if not quoted then break!
390
result.add(tmpStr.replaceAll("\\\\\"", "\""));
398
tmpStr += "" + cl.charAt(i);
402
if (!tmpStr.equals(""))
405
return (String[]) result.toArray(new String[1]);
409
* returns the object in a string representation (as indented XML output)
411
* @return the object in a string representation
413
public String toString() {
414
return getXMLDocument().toString();
418
* for testing only. prints the given XML, the resulting commandline and
421
public static void main(String[] args) throws Exception {
422
if (args.length > 0) {
423
System.out.println("\nXML:\n\n" + new XMLOptions(args[0]).toString());
425
System.out.println("\nCommandline:\n\n" + new XMLOptions(args[0]).toCommandLine());
427
System.out.println("\nString array:\n");
428
String[] options = new XMLOptions(args[0]).toArray();
429
for (int i = 0; i < options.length; i++)
430
System.out.println(options[i]);