2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
23
package weka.core.converters;
25
import weka.core.Instance;
26
import weka.core.Instances;
27
import weka.core.xml.XMLInstances;
29
import java.io.BufferedReader;
31
import java.io.FileInputStream;
32
import java.io.FileNotFoundException;
33
import java.io.IOException;
34
import java.io.InputStream;
35
import java.io.InputStreamReader;
36
import java.io.Reader;
38
import java.util.zip.GZIPInputStream;
41
<!-- globalinfo-start -->
42
* Reads a source that is in the XML version of the ARFF format. It automatically decompresses the data if the extension is '.xrff.gz'.
44
<!-- globalinfo-end -->
46
* @author FracPete (fracpete at waikato dot ac dot nz)
47
* @version $Revision: 1.3 $
50
public class XRFFLoader
51
extends AbstractFileLoader
52
implements BatchConverter, URLSourcedLoader {
54
/** for serialization */
55
private static final long serialVersionUID = 3764533621135196582L;
57
/** the file extension */
58
public static String FILE_EXTENSION = XMLInstances.FILE_EXTENSION;
60
/** the extension for compressed files */
61
public static String FILE_EXTENSION_COMPRESSED = FILE_EXTENSION + ".gz";
64
protected String m_URL = "http://";
66
/** The reader for the source file. */
67
protected transient Reader m_sourceReader = null;
69
/** the loaded XML document */
70
protected XMLInstances m_XMLInstances;
73
* Returns a string describing this Loader
75
* @return a description of the Loader suitable for
76
* displaying in the explorer/experimenter gui
78
public String globalInfo() {
80
"Reads a source that is in the XML version of the ARFF format. "
81
+ "It automatically decompresses the data if the extension is '"
82
+ FILE_EXTENSION_COMPRESSED + "'.";
86
* Get the file extension used for libsvm files
88
* @return the file extension
90
public String getFileExtension() {
91
return FILE_EXTENSION;
95
* Gets all the file extensions used for this type of file
97
* @return the file extensions
99
public String[] getFileExtensions() {
100
return new String[]{FILE_EXTENSION, FILE_EXTENSION_COMPRESSED};
104
* Returns a description of the file type.
106
* @return a short file description
108
public String getFileDescription() {
109
return "XRFF data files";
113
* Resets the Loader ready to read a new data set
115
* @throws IOException if something goes wrong
117
public void reset() throws IOException {
119
m_XMLInstances = null;
123
if ((m_File != null) && (new File(m_File)).isFile()) {
124
setFile(new File(m_File));
126
else if ((m_URL != null) && !m_URL.equals("http://")) {
132
* Resets the Loader object and sets the source of the data set to be
133
* the supplied File object.
135
* @param file the source file.
136
* @throws IOException if an error occurs
138
public void setSource(File file) throws IOException {
140
m_XMLInstances = null;
145
throw new IOException("Source file object is null!");
148
if (file.getName().endsWith(FILE_EXTENSION_COMPRESSED))
149
setSource(new GZIPInputStream(new FileInputStream(file)));
151
setSource(new FileInputStream(file));
153
catch (FileNotFoundException ex) {
154
throw new IOException("File not found");
158
m_File = file.getAbsolutePath();
162
* Resets the Loader object and sets the source of the data set to be
165
* @param url the source url.
166
* @throws IOException if an error occurs
168
public void setSource(URL url) throws IOException {
170
m_XMLInstances = null;
174
setSource(url.openStream());
176
m_URL = url.toString();
180
* Set the url to load from
182
* @param url the url to load from
183
* @throws IOException if the url can't be set.
185
public void setURL(String url) throws IOException {
187
setSource(new URL(url));
191
* Return the current url
193
* @return the current url
195
public String retrieveURL() {
200
* Resets the Loader object and sets the source of the data set to be
201
* the supplied InputStream.
203
* @param in the source InputStream.
204
* @throws IOException if initialization of reader fails.
206
public void setSource(InputStream in) throws IOException {
207
m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath();
210
m_sourceReader = new BufferedReader(new InputStreamReader(in));
214
* Determines and returns (if possible) the structure (internally the
215
* header) of the data set as an empty set of instances.
217
* @return the structure of the data set as an empty set
219
* @throws IOException if an error occurs
221
public Instances getStructure() throws IOException {
222
if (m_sourceReader == null)
223
throw new IOException("No source has been specified");
225
if (m_structure == null) {
227
m_XMLInstances = new XMLInstances(m_sourceReader);
228
m_structure = new Instances(m_XMLInstances.getInstances(), 0);
230
catch (IOException ioe) {
234
catch (Exception e) {
235
throw new RuntimeException(e);
239
return new Instances(m_structure, 0);
243
* Return the full data set. If the structure hasn't yet been determined
244
* by a call to getStructure then method should do so before processing
245
* the rest of the data set.
247
* @return the structure of the data set as an empty
249
* @throws IOException if there is no source or parsing fails
251
public Instances getDataSet() throws IOException {
252
if (m_sourceReader == null)
253
throw new IOException("No source has been specified");
255
if (getRetrieval() == INCREMENTAL)
256
throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
259
if (m_structure == null)
262
return m_XMLInstances.getInstances();
266
* XRFFLoader is unable to process a data set incrementally.
268
* @param structure ignored
269
* @return never returns without throwing an exception
270
* @throws IOException always. XRFFLoader is unable to process a
271
* data set incrementally.
273
public Instance getNextInstance(Instances structure) throws IOException {
274
throw new IOException("XRFFLoader can't read data sets incrementally.");
280
* @param args should contain the name of an input file.
282
public static void main(String[] args) {
283
runFileLoader(new XRFFLoader(), args);