2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
23
package weka.core.converters;
25
import weka.core.Attribute;
26
import weka.core.Capabilities;
27
import weka.core.FastVector;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.Option;
31
import weka.core.OptionHandler;
32
import weka.core.Utils;
33
import weka.core.Capabilities.Capability;
36
import java.io.IOException;
37
import java.io.PrintWriter;
38
import java.util.Enumeration;
41
<!-- globalinfo-start -->
42
* Writes to a destination that is in the format used by the C4.5 algorithm.<br/>
43
* Therefore it outputs a names and a data file.
45
<!-- globalinfo-end -->
47
<!-- options-start -->
48
* Valid options are: <p/>
50
* <pre> -i <the input file>
51
* The input file</pre>
53
* <pre> -o <the output file>
54
* The output file</pre>
56
* <pre> -c <the class index>
57
* The class index</pre>
61
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
62
* @version $Revision: 1.6 $
66
extends AbstractFileSaver
67
implements BatchConverter, IncrementalConverter, OptionHandler {
69
/** for serialization */
70
static final long serialVersionUID = -821428878384253377L;
79
* Returns a string describing this Saver
80
* @return a description of the Saver suitable for
81
* displaying in the explorer/experimenter gui
83
public String globalInfo() {
84
return "Writes to a destination that is in the format used by the C4.5 algorithm.\nTherefore it outputs a names and a data file.";
89
* Returns a description of the file type.
91
* @return a short file description
93
public String getFileDescription() {
94
return "C4.5 file format";
100
public void resetOptions() {
102
super.resetOptions();
103
setFileExtension(".names");
107
* Returns the Capabilities of this saver.
109
* @return the capabilities of this object
112
public Capabilities getCapabilities() {
113
Capabilities result = super.getCapabilities();
116
result.enable(Capability.NOMINAL_ATTRIBUTES);
117
result.enable(Capability.NUMERIC_ATTRIBUTES);
118
result.enable(Capability.DATE_ATTRIBUTES);
119
result.enable(Capability.MISSING_VALUES);
122
result.enable(Capability.NOMINAL_CLASS);
123
result.enable(Capability.NUMERIC_CLASS);
124
result.enable(Capability.DATE_CLASS);
125
result.enable(Capability.MISSING_CLASS_VALUES);
130
/** Saves an instances incrementally. Structure has to be set by using the
131
* setStructure() method or setInstances() method.
132
* @param inst the instance to save
133
* @throws IOException throws IOEXception if an instance cannot be saved incrementally.
135
public void writeIncremental(Instance inst) throws IOException{
137
int writeMode = getWriteMode();
138
Instances structure = getInstances();
139
PrintWriter outW = null;
141
if(structure != null){
142
if(structure.classIndex() == -1){
143
structure.setClassIndex(structure.numAttributes()-1);
144
System.err.println("No class specified. Last attribute is used as class attribute.");
146
if(structure.attribute(structure.classIndex()).isNumeric())
147
throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
149
if(getRetrieval() == BATCH || getRetrieval() == NONE)
150
throw new IOException("Batch and incremental saving cannot be mixed.");
151
if(retrieveFile() == null || getWriter() == null){
152
throw new IOException("C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
156
outW = new PrintWriter(getWriter());
158
if(writeMode == WAIT){
159
if(structure == null){
160
setWriteMode(CANCEL);
162
System.err.println("Structure(Header Information) has to be set in advance");
165
setWriteMode(STRUCTURE_READY);
166
writeMode = getWriteMode();
168
if(writeMode == CANCEL){
173
if(writeMode == STRUCTURE_READY){
175
//write header: here names file
176
for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
177
outW.write(structure.attribute(structure.classIndex()).value(i));
178
if (i < structure.attribute(structure.classIndex()).numValues()-1) {
184
for (int i = 0; i < structure.numAttributes(); i++) {
185
if (i != structure.classIndex()) {
186
outW.write(structure.attribute(i).name()+": ");
187
if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
188
outW.write("continuous.\n");
190
Attribute temp = structure.attribute(i);
191
for (int j = 0; j < temp.numValues(); j++) {
192
outW.write(temp.value(j));
193
if (j < temp.numValues()-1) {
205
writeMode = getWriteMode();
207
String out = retrieveFile().getAbsolutePath();
208
setFileExtension(".data");
209
out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
210
File namesFile = new File(out);
213
} catch(Exception ex){
214
throw new IOException("Cannot create data file, only names file created.");
216
if(retrieveFile() == null || getWriter() == null){
217
throw new IOException("Cannot create data file, only names file created.");
219
outW = new PrintWriter(getWriter());
221
if(writeMode == WRITE){
222
if(structure == null)
223
throw new IOException("No instances information available.");
225
//write instance: here data file
226
for(int j = 0; j < inst.numAttributes(); j++){
227
if(j != structure.classIndex()){
228
if (inst.isMissing(j)) {
231
if (structure.attribute(j).isNominal() ||
232
structure.attribute(j).isString()) {
233
outW.write(structure.attribute(j).value((int)inst.value(j))+",");
235
outW.write(""+inst.value(j)+",");
239
// write the class value
240
if (inst.isMissing(structure.classIndex())) {
244
outW.write(structure.attribute(structure.classIndex()).value((int)inst.value(structure.classIndex())));
247
//flushes every 100 instances
248
m_incrementalCounter++;
249
if(m_incrementalCounter > 100){
250
m_incrementalCounter = 0;
260
setFileExtension(".names");
261
m_incrementalCounter = 0;
271
* Writes a Batch of instances
272
* @throws IOException throws IOException if saving in batch mode is not possible
274
public void writeBatch() throws IOException {
276
Instances instances = getInstances();
278
if(instances == null)
279
throw new IOException("No instances to save");
280
if(instances.classIndex() == -1){
281
instances.setClassIndex(instances.numAttributes()-1);
282
System.err.println("No class specified. Last attribute is used as class attribute.");
284
if(instances.attribute(instances.classIndex()).isNumeric())
285
throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
286
if(getRetrieval() == INCREMENTAL)
287
throw new IOException("Batch and incremental saving cannot be mixed.");
290
if(retrieveFile() == null || getWriter() == null){
291
throw new IOException("C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
295
setFileExtension(".names");
296
PrintWriter outW = new PrintWriter(getWriter());
297
for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) {
298
outW.write(instances.attribute(instances.classIndex()).value(i));
299
if (i < instances.attribute(instances.classIndex()).numValues()-1) {
305
for (int i = 0; i < instances.numAttributes(); i++) {
306
if (i != instances.classIndex()) {
307
outW.write(instances.attribute(i).name()+": ");
308
if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) {
309
outW.write("continuous.\n");
311
Attribute temp = instances.attribute(i);
312
for (int j = 0; j < temp.numValues(); j++) {
313
outW.write(temp.value(j));
314
if (j < temp.numValues()-1) {
327
String out = retrieveFile().getAbsolutePath();
328
setFileExtension(".data");
329
out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
330
File namesFile = new File(out);
333
} catch(Exception ex){
334
throw new IOException("Cannot create data file, only names file created (Reason: " + ex.toString() + ").");
336
if(retrieveFile() == null || getWriter() == null){
337
throw new IOException("Cannot create data file, only names file created.");
339
outW = new PrintWriter(getWriter());
341
for (int i = 0; i < instances.numInstances(); i++) {
342
Instance temp = instances.instance(i);
343
for(int j = 0; j < temp.numAttributes(); j++){
344
if(j != instances.classIndex()){
345
if (temp.isMissing(j)) {
347
} else if (instances.attribute(j).isNominal() ||
348
instances.attribute(j).isString()) {
349
outW.write(instances.attribute(j).value((int)temp.value(j))+",");
351
outW.write(""+temp.value(j)+",");
355
// write the class value
356
if (temp.isMissing(instances.classIndex())) {
360
outW.write(instances.attribute(instances.classIndex()).value((int)temp.value(instances.classIndex())));
366
setFileExtension(".names");
370
setWriteMode(CANCEL);
375
* Returns an enumeration describing the available options.
377
* @return an enumeration of all the available options.
379
public Enumeration listOptions() {
380
FastVector result = new FastVector();
382
Enumeration en = super.listOptions();
383
while (en.hasMoreElements())
384
result.addElement(en.nextElement());
386
result.addElement(new Option(
388
"c", 1, "-c <the class index>"));
390
return result.elements();
395
* Parses a given list of options. <p/>
397
<!-- options-start -->
398
* Valid options are: <p/>
400
* <pre> -i <the input file>
401
* The input file</pre>
403
* <pre> -o <the output file>
404
* The output file</pre>
406
* <pre> -c <the class index>
407
* The class index</pre>
411
* @param options the list of options as an array of strings
412
* @throws Exception if an option is not supported
414
public void setOptions(String[] options) throws Exception {
416
String outputString = Utils.getOption('o', options);
417
String inputString = Utils.getOption('i', options);
418
String indexString = Utils.getOption('c', options);
420
ArffLoader loader = new ArffLoader();
426
if (indexString.length() != 0){
427
if(indexString.equals("first"))
430
if (indexString.equals("last"))
433
index = Integer.parseInt(indexString);
437
if (inputString.length() != 0){
439
File input = new File(inputString);
440
loader.setFile(input);
441
Instances inst = loader.getDataSet();
443
inst.setClassIndex(inst.numAttributes() - 1);
445
inst.setClassIndex(index);
447
} catch(Exception ex){
448
throw new IOException("No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ").");
452
throw new IOException("No data set to save.");
454
if (outputString.length() != 0){
455
//add appropriate file extension
456
if (!outputString.endsWith(getFileExtension())){
457
if (outputString.lastIndexOf('.') != -1)
458
outputString = (outputString.substring(0,outputString.lastIndexOf('.'))) + getFileExtension();
460
outputString = outputString + getFileExtension();
463
File output = new File(outputString);
465
} catch(Exception ex){
466
throw new IOException("Cannot create output file.");
471
index = getInstances().numAttributes() - 1;
472
getInstances().setClassIndex(index);
476
* Gets the current settings of the C45Saver object.
478
* @return an array of strings suitable for passing to setOptions
480
public String [] getOptions() {
482
String [] options = new String [10];
484
if(retrieveFile() != null){
485
options[current++] = "-o"; options[current++] = "" + retrieveFile();
488
options[current++] = "-o"; options[current++] = "";
490
if(getInstances() != null){
491
options[current++] = "-i"; options[current++] = "" + getInstances().relationName();
492
options[current++] = "-c"; options[current++] = "" + getInstances().classIndex();
495
options[current++] = "-i"; options[current++] = "";
496
options[current++] = "-c"; options[current++] = "";
498
while (current < options.length) {
499
options[current++] = "";
509
* @param args should contain the options of a Saver.
511
public static void main(String[] args) {
512
runFileSaver(new C45Saver(), args);