62
62
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
63
63
* @author Len Trigg (trigg@cs.waikato.ac.nz)
64
64
* @author FracPete (fracpete at waikato dot ac dot nz)
65
* @version $Revision: 1.73 $
65
* @version $Revision: 1.76 $
67
67
public class Instances
68
implements Serializable {
68
implements Serializable, RevisionHandler {
70
70
/** for serialization */
71
71
static final long serialVersionUID = -19412345060742748L;
104
104
* @see #readInstance(Reader) */
105
105
protected int m_Lines = 0;
107
/** used in randomizeAttribute and undoRandomizeAttribute to store/restore
108
* the index of attribute that was last shuffled, and it's original values
110
private int attIdx4Randomization = -1;
111
private double[] attIdxOrigValues;
108
114
* Reads an ARFF file from a reader, and assigns a weight of
109
115
* one to each instance. Lets the index of the class
239
245
m_Instances = new FastVector(capacity);
243
* Create a copy of the structure, but "cleanse" string types (i.e.
244
* doesn't contain references to the strings seen in the past).
245
* Also cleanses all relational attributes.
250
* Create a copy of the structure if the data has string or
251
* relational attributes, "cleanses" string types (i.e. doesn't
252
* contain references to the strings seen in the past) and all
253
* relational attributes.
247
255
* @return a copy of the instance structure.
249
257
public Instances stringFreeStructure() {
251
FastVector atts = (FastVector)m_Attributes.copy();
252
for (int i = 0 ; i < atts.size(); i++) {
253
Attribute att = (Attribute)atts.elementAt(i);
259
FastVector newAtts = new FastVector();
260
for (int i = 0 ; i < m_Attributes.size(); i++) {
261
Attribute att = (Attribute)m_Attributes.elementAt(i);
254
262
if (att.type() == Attribute.STRING) {
255
atts.setElementAt(new Attribute(att.name(), (FastVector)null), i);
263
newAtts.addElement(new Attribute(att.name(), (FastVector)null, i));
256
264
} else if (att.type() == Attribute.RELATIONAL) {
257
atts.setElementAt(new Attribute(att.name(), new Instances(att.relation(), 0)), i);
265
newAtts.addElement(new Attribute(att.name(), new Instances(att.relation(), 0), i));
260
Instances result = new Instances(relationName(), atts, 0);
261
result.m_ClassIndex = m_ClassIndex;
268
if (newAtts.size() == 0) {
269
return new Instances(this, 0);
271
FastVector atts = (FastVector)m_Attributes.copy();
272
for (int i = 0; i < newAtts.size(); i++) {
273
atts.setElementAt(newAtts.elementAt(i), ((Attribute)newAtts.elementAt(i)).index());
275
Instances result = new Instances(this, 0);
276
result.m_Attributes = atts;
888
* Does an undo of a previous call to randomizeAttribute, so that the
889
* original values of the attribute are restored. Only the original values
890
* before the last call to randomizeAttribute can be restored. Note, the
891
* original Instances object is modified.
893
* @throws Exception if there was no call to randomizeAttribute or if
894
* attributes were added or removed since the last call to
895
* <code>randomizeAttribute</code>
896
* @see randomizeAttribute
897
* @author Arne Muller (arne.muller@gmail.com)
899
public void undoRandomizeAttribute() throws Exception {
901
if ( attIdx4Randomization < 0 )
902
throw new Exception("no randomization to undo!");
904
if ( attIdxOrigValues.length != this.numInstances() )
906
"meanwhile number of attributes has changed, can't undo!");
908
for ( int i=0; i<attIdxOrigValues.length; i++ ) {
909
Instance instance = (Instance)(m_Instances.elementAt(i));
910
instance.modifyValue(attIdx4Randomization, attIdxOrigValues[i]);
917
* Shuffles the values of a given attribute in all instances. Note,
918
* the original Instances object is modified (i.e. no copying), and the
919
* method is not thread save. To avoid undefined behavior of an Instances
920
* object you should not perform other Instances modifying operations between
921
* a call to <code>randomizeAttribute</code> and
922
* <code>undoRandomizeAttribute</code>.
924
* @param attIdx the index of the attribute to shuffle
925
* @param random a random number generator
926
* @param rounds how many rounds of shuffling, minimum must be 1. As more
927
* rounds of shuffling the more random your attribute value distribution
928
* (e.g. choose 3, but note that the time needed for shuffling is proportional
929
* to the number of rounds).
930
* @see undoRandomizeAttribute
931
* @author Arne Muller (arne.muller@gmail.com)
933
public void randomizeAttribute(int attIdx, Random random, int rounds) {
935
attIdx4Randomization = attIdx;
936
attIdxOrigValues = this.attributeToDoubleArray(attIdx);
937
int n = numInstances();
938
for ( int j=0; j<rounds; j++ ) {
939
for ( int i=0; i<n; i++ ) {
940
int r = random.nextInt(n);
941
Instance iOne = (Instance)(m_Instances.elementAt(i));
942
Instance iTwo = (Instance)(m_Instances.elementAt(r));
943
double helper = iOne.value(attIdx);
944
iOne.modifyValue(attIdx, iTwo.value(attIdx));
945
iTwo.modifyValue(attIdx, helper);
873
953
* Reads a single instance from the reader and appends it
874
954
* to the dataset. Automatically expands the dataset if it
875
955
* is not large enough to hold the instance. This method does