~ubuntu-branches/ubuntu/precise/weka/precise

* S. Lievens, B. De Baets, K. Cao-Van (2006). A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting. Annals of Operations Research..

*

* Kim Cao-Van (2003). Supervised ranking: from semantics to algorithms.

*

* Stijn Lievens (2004). Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken.

*

* For more information about supervised ranking, see

*

* http://users.ugent.be/~slievens/supervised_ranking.php

*

* BibTeX:

* <pre>

* @article{Lievens2006,

* author = {S. Lievens and B. De Baets and K. Cao-Van},

* journal = {Annals of Operations Research},

* title = {A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting},

* year = {2006}

* }

* @phdthesis{Cao-Van2003,

* author = {Kim Cao-Van},

* school = {Ghent University},

* title = {Supervised ranking: from semantics to algorithms},

* year = {2003}

* }

* @mastersthesis{Lievens2004,

* author = {Stijn Lievens},

* school = {Ghent University},

* title = {Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken},

* year = {2004}

* }

* </pre>

*

* Valid options are:

* <pre> -D

* If set, classifier is run in debug mode and

* may output additional info to the console</pre>

* <pre> -C <REG|WSUM|MAX|MED|RMED>

* Sets the classification type to be used.

* (Default: MED)</pre>

100

101

* <pre> -B

102

* Use the balanced version of the Ordinal Stochastic Dominance Learner</pre>

103

104

* <pre> -W

105

* Use the weighted version of the Ordinal Stochastic Dominance Learner</pre>

106

107

* <pre> -S <value of interpolation parameter>

108

* Sets the value of the interpolation parameter (not with -W/T/P/L/U)

109

* (default: 0.5).</pre>

110

111

* <pre> -T

112

* Tune the interpolation parameter (not with -W/S)

113

* (default: off)</pre>

114

115

* <pre> -L <Lower bound for interpolation parameter>

116

* Lower bound for the interpolation parameter (not with -W/S)

117

* (default: 0)</pre>

118

119

* <pre> -U <Upper bound for interpolation parameter>

120

* Upper bound for the interpolation parameter (not with -W/S)

121

* (default: 1)</pre>

122

123

* <pre> -P <Number of parts>

124

* Determines the step size for tuning the interpolation

125

* parameter, nl. (U-L)/P (not with -W/S)

126

* (default: 10)</pre>

127

128

129

130

* @author Stijn Lievens (stijn.lievens@ugent.be)

131

* @version $Revision: 1.1 $

132

133

public abstract class OSDLCore

134

extends Classifier

135

implements TechnicalInformationHandler {

136

137

/** for serialization */

138

private static final long serialVersionUID = -9209888846680062897L;

139

140

/**

141

* Constant indicating that the classification type is

142

* regression (probabilistic weighted sum).

143

144

public static final int CT_REGRESSION = 0;

145

146

/**

147

* Constant indicating that the classification type is

148

* the probabilistic weighted sum.

149

150

public static final int CT_WEIGHTED_SUM = 1;

151

152

/**

153

* Constant indicating that the classification type is

154

* the mode of the distribution.

155

156

public static final int CT_MAXPROB = 2;

157

158

/**

159

* Constant indicating that the classification type is

160

* the median.

161

162

public static final int CT_MEDIAN = 3;

163

164

/**

165

* Constant indicating that the classification type is

166

* the median, but not rounded to the nearest class.

167

168

public static final int CT_MEDIAN_REAL = 4;

169

170

/** the classification types */

171

public static final Tag[] TAGS_CLASSIFICATIONTYPES = {

172

new Tag(CT_REGRESSION, "REG", "Regression"),

173

new Tag(CT_WEIGHTED_SUM, "WSUM", "Weighted Sum"),

174

new Tag(CT_MAXPROB, "MAX", "Maximum probability"),

175

new Tag(CT_MEDIAN, "MED", "Median"),

176

new Tag(CT_MEDIAN_REAL, "RMED", "Median without rounding")

177

};

178

179

/**

180

* The classification type, by default set to CT_MEDIAN.

181

182

private int m_ctype = CT_MEDIAN;

183

184

/**

185

* The training examples.

186

187

private Instances m_train;

188

189

/**

190

* Collection of (Coordinates,DiscreteEstimator) pairs.

191

* This Map is build from the training examples.

192

* The DiscreteEstimator is over the classes.

193

* Each DiscreteEstimator indicates how many training examples

194

* there are with the specified classes.

195

196

private Map m_estimatedDistributions;

197

198

199

/**

200

* Collection of (Coordinates,CumulativeDiscreteDistribution) pairs.

201

* This Map is build from the training examples, and more

202

* specifically from the previous map.

203

204

private Map m_estimatedCumulativeDistributions;

205

206

207

/**

208

* The interpolationparameter s.

209

* By default set to 1/2.

210

211

private double m_s = 0.5;

212

213

/**

214

* Lower bound for the interpolationparameter s.

215

* Default value is 0.

216

217

private double m_sLower = 0.;

218

219

/**

220

* Upper bound for the interpolationparameter s.

221

* Default value is 1.

222

223

private double m_sUpper = 1.0;

224

225

/**

226

* The number of parts the interval [m_sLower,m_sUpper] is

227

* divided in, while searching for the best parameter s.

228

* This thus determines the granularity of the search.

229

* m_sNrParts + 1 values of the interpolationparameter will

230

* be tested.

231

232

private int m_sNrParts = 10;

233

234

/**

235

* Indicates whether the interpolationparameter is to be tuned

236

* using leave-one-out cross validation. <code> true </code> if

237

* this is the case (default is <code> false </code>).

238

239

private boolean m_tuneInterpolationParameter = false;

240

241

/**

242

* Indicates whether the current value of the interpolationparamter

243

* is valid. More specifically if <code>

244

* m_tuneInterpolationParameter == true </code>, and

245

* <code> m_InterpolationParameter == false </code>,

246

* this means that the current interpolation parameter is not valid.

247

* This parameter is only relevant if <code> m_tuneInterpolationParameter

248

* == true </code>.

249

250

* If <code> m_tuneInterpolationParameter </code> and <code>

251

* m_interpolationParameterValid </code> are both <code> true </code>,

252

* then <code> m_s </code> should always be between

253

* <code> m_sLower </code> and <code> m_sUpper </code>.

254

255

private boolean m_interpolationParameterValid = false;

256

257

258

/**

259

* Constant to switch between balanced and unbalanced OSDL.

260

* <code> true </code> means that one chooses balanced OSDL

261

* (default: <code> false </code>).

262

263

private boolean m_balanced = false;

264

265

/**

266

* Constant to choose the weighted variant of the OSDL algorithm.

267

268

private boolean m_weighted = false;

269

270

/**

271

* Coordinates representing the smallest element of the data space.

272

273

private Coordinates smallestElement;

274

275

/**

276

* Coordinates representing the biggest element of the data space.

277

278

private Coordinates biggestElement;

279

280

/**

281

* Returns a string describing the classifier.

282

* @return a description suitable for displaying in the

283

* explorer/experimenter gui

284

285

public String globalInfo() {

286

return "This class is an implementation of the Ordinal Stochastic "

287

+ "Dominance Learner.\n"

288

+ "Further information regarding the OSDL-algorithm can be found in:\n\n"

289

+ getTechnicalInformation().toString() + "\n\n"

290

+ "For more information about supervised ranking, see\n\n"

291

+ "http://users.ugent.be/~slievens/supervised_ranking.php";

292

}

293

294

/**

295

* Returns an instance of a TechnicalInformation object, containing

296

* detailed information about the technical background of this class,

297

* e.g., paper reference or book this class is based on.

298

299

* @return the technical information about this class

300

301

public TechnicalInformation getTechnicalInformation() {

302

TechnicalInformation result;

303

TechnicalInformation additional;

304

305

result = new TechnicalInformation(Type.ARTICLE);

306

result.setValue(Field.AUTHOR, "S. Lievens and B. De Baets and K. Cao-Van");

307

result.setValue(Field.YEAR, "2006");

308

result.setValue(Field.TITLE, "A Probabilistic Framework for the Design of Instance-Based Supervised Ranking Algorithms in an Ordinal Setting");

309

result.setValue(Field.JOURNAL, "Annals of Operations Research");

310

311

additional = result.add(Type.PHDTHESIS);

312

additional.setValue(Field.AUTHOR, "Kim Cao-Van");

313

additional.setValue(Field.YEAR, "2003");

314

additional.setValue(Field.TITLE, "Supervised ranking: from semantics to algorithms");

315

additional.setValue(Field.SCHOOL, "Ghent University");

316

317

additional = result.add(Type.MASTERSTHESIS);

318

additional.setValue(Field.AUTHOR, "Stijn Lievens");

319

additional.setValue(Field.YEAR, "2004");

320

additional.setValue(Field.TITLE, "Studie en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd rangschikken");

321

additional.setValue(Field.SCHOOL, "Ghent University");

322

323

return result;

324

}

325

326

/**

327

* Returns default capabilities of the classifier.

328

329

* @return the capabilities of this classifier

330

331

public Capabilities getCapabilities() {

332

Capabilities result = super.getCapabilities();

333

334

// attributes

335

result.enable(Capability.NOMINAL_ATTRIBUTES);

336

337

// class

338

result.enable(Capability.NOMINAL_CLASS);

339

result.enable(Capability.MISSING_CLASS_VALUES);

340

341

// instances

342

result.setMinimumNumberInstances(0);

343

344

return result;

345

}

346

347

/**

348

* Classifies a given instance using the current settings

349

* of the classifier.

350

351

* @param instance the instance to be classified

352

* @throws Exception if for some reason no distribution

353

* could be predicted

354

* @return the classification for the instance. Depending on the

355

* settings of the classifier this is a double representing

356

* a classlabel (internal WEKA format) or a real value in the sense

357

* of regression.

358

359

public double classifyInstance(Instance instance)

360

throws Exception {

361

362

try {

363

return classifyInstance(instance, m_s, m_ctype);

364

} catch (IllegalArgumentException e) {

365

throw new AssertionError(e);

366

}

367

}

368

369

/**

370

* Classifies a given instance using the settings in the paramater

371

* list. This doesn't change the internal settings of the classifier.

372

* In particular the interpolationparameter <code> m_s </code>

373

* and the classification type <code> m_ctype </code> are not changed.

374

375

* @param instance the instance to be classified

376

* @param s the value of the interpolationparameter to be used

377

* @param ctype the classification type to be used

378

* @throws IllegalStateException for some reason no distribution

379

* could be predicted

380

* @throws IllegalArgumentException if the interpolation parameter or the

381

* classification type is not valid

382

* @return the label assigned to the instance. It is given in internal floating point format.

383

384

private double classifyInstance(Instance instance, double s, int ctype)

385

throws IllegalArgumentException, IllegalStateException {

386

387

if (s < 0 || s > 1) {

388

throw new IllegalArgumentException("Interpolation parameter is not valid " + s);

389

}

390

391

DiscreteDistribution dist = null;

392

if (!m_balanced) {

393

dist = distributionForInstance(instance, s);

394

} else {

395

dist = distributionForInstanceBalanced(instance, s);

396

}

397

398

if (dist == null) {

399

throw new IllegalStateException("Null distribution predicted");

400

}

401

402

double value = 0;

403

switch(ctype) {

404

case CT_REGRESSION:

405

case CT_WEIGHTED_SUM:

406

value = dist.mean();

407

if (ctype == CT_WEIGHTED_SUM) {

408

value = Math.round(value);

409

}

410

break;

411

412

case CT_MAXPROB:

413

value = dist.modes()[0];

414

break;

415

416

case CT_MEDIAN:

417

case CT_MEDIAN_REAL:

418

value = dist.median();

419

if (ctype == CT_MEDIAN) {

420

value = Math.round(value);

421

}

422

break;

423

424

default:

425

throw new IllegalArgumentException("Not a valid classification type!");

426

}

427

return value;

428

}

429

430

/**

431

* Calculates the class probabilities for the given test instance.

432

* Uses the current settings of the parameters if these are valid.

433

* If necessary it updates the interpolationparameter first, and hence

434

* this may change the classifier.

435

436

* @param instance the instance to be classified

437

* @return an array of doubles representing the predicted

438

* probability distribution over the class labels

439

440

public double[] distributionForInstance(Instance instance) {

441

442

if (m_tuneInterpolationParameter

443

&& !m_interpolationParameterValid) {

444

tuneInterpolationParameter();

445

}

446

447

if (!m_balanced) {

448

return distributionForInstance(instance, m_s).toArray();

449

}

450

// balanced variant

451

return distributionForInstanceBalanced(instance, m_s).toArray();

452

}

453

454

/**

455

* Calculates the cumulative class probabilities for the given test

456

* instance. Uses the current settings of the parameters if these are

457

* valid. If necessary it updates the interpolationparameter first,

458

* and hence this may change the classifier.

459

460

* @param instance the instance to be classified

461

* @return an array of doubles representing the predicted

462

* cumulative probability distribution over the class labels

463

464

public double[] cumulativeDistributionForInstance(Instance instance) {

465

466

if (m_tuneInterpolationParameter

467

&& !m_interpolationParameterValid) {

468

tuneInterpolationParameter();

469

}

470

471

if (!m_balanced) {

472

return cumulativeDistributionForInstance(instance, m_s).toArray();

473

}

474

return cumulativeDistributionForInstanceBalanced(instance, m_s).toArray();

475

}

476

477

/**

478

* Calculates the class probabilities for the given test instance.

479

* Uses the interpolation parameter from the parameterlist, and

480

* always performs the ordinary or weighted OSDL algorithm,

481

* according to the current settings of the classifier.

482

* This method doesn't change the classifier.

483

484

* @param instance the instance to classify

485

* @param s value of the interpolationparameter to use

486

* @return the calculated distribution

487

488

private DiscreteDistribution distributionForInstance(Instance instance, double s) {

489

return new DiscreteDistribution(cumulativeDistributionForInstance(instance, s));

490

}

491

492

/**

493

* Calculates the class probabilities for the given test

494

* instance. Uses the interpolationparameter from the parameterlist, and

495

* always performs the balanced OSDL algorithm.

496

* This method doesn't change the classifier.

497

498

* @param instance the instance to classify

499

* @param s value of the interpolationparameter to use

500

* @return the calculated distribution

501

502

private DiscreteDistribution distributionForInstanceBalanced(

503

Instance instance, double s) {

504

505

return new DiscreteDistribution(cumulativeDistributionForInstanceBalanced(instance,s));

506

}

507

508

/**

509

* Calculates the cumulative class probabilities for the given test

510

* instance. Uses the interpolationparameter from the parameterlist, and

511

* always performs the ordinary or weighted OSDL algorithm,

512

* according to the current settings of the classifier.

513

* This method doesn't change the classifier.

514

515

* @param instance the instance to classify

516

* @param s value of the interpolationparameter to use

517

* @return the calculated distribution

518

519

private CumulativeDiscreteDistribution cumulativeDistributionForInstance(

520

Instance instance, double s) {

521

522

Coordinates xc = new Coordinates(instance);

523

int n = instance.numClasses();

524

int nrSmaller = 0;

525

int nrGreater = 0;

526

527

if (!containsSmallestElement()) {

528

// corresponds to adding the minimal element to the data space

529

nrSmaller = 1; // avoid division by zero

530

}

531

532

if (!containsBiggestElement()) {

533

// corresponds to adding the maximal element to the data space

534

nrGreater = 1; // avoid division by zero

535

}

536

537

538

// Create fMin and fMax

539

CumulativeDiscreteDistribution fMin =

540

DistributionUtils.getMinimalCumulativeDiscreteDistribution(n);

541

CumulativeDiscreteDistribution fMax =

542

DistributionUtils.getMaximalCumulativeDiscreteDistribution(n);

543

544

// Cycle through all the map of cumulative distribution functions

545

for (Iterator i = m_estimatedCumulativeDistributions.keySet().iterator();

546

i.hasNext(); ) {

547

Coordinates yc = (Coordinates) i.next();

548

CumulativeDiscreteDistribution cdf =

549

(CumulativeDiscreteDistribution)

550

m_estimatedCumulativeDistributions.get(yc);

551

552

if (yc.equals(xc)) {

553

nrSmaller++;

554

fMin = DistributionUtils.takeMin(fMin,cdf);

555

nrGreater++;

556

fMax = DistributionUtils.takeMax(fMax,cdf);

557

} else if (yc.strictlySmaller(xc)) {

558

nrSmaller++;

559

fMin = DistributionUtils.takeMin(fMin,cdf);

560

} else if (xc.strictlySmaller(yc)) {

561

nrGreater++;

562

fMax = DistributionUtils.takeMax(fMax,cdf);

563

}

564

}

565

566

if (m_weighted) {

567

s = ( (double) nrSmaller) / (nrSmaller + nrGreater);

568

if (m_Debug) {

569

System.err.println("Weighted OSDL: interpolation parameter"

570

+ " is s = " + s);

571

}

572

}

573

574

// calculate s*fMin + (1-s)*fMax

575

return DistributionUtils.interpolate(fMin, fMax, 1 - s);

576

}

577

578

/**

579

* @return true if the learning examples contain an element for which

580

* the coordinates are the minimal element of the data space, false

581

* otherwise

582

583

private boolean containsSmallestElement() {

584

return m_estimatedCumulativeDistributions.containsKey(smallestElement);

585

}

586

587

/**

588

* @return true if the learning examples contain an element for which

589

* the coordinates are the maximal element of the data space, false

590

* otherwise

591

592

private boolean containsBiggestElement() {

593

return m_estimatedCumulativeDistributions.containsKey(biggestElement);

594

}

595

596

597

/**

598

* Calculates the cumulative class probabilities for the given test

599

* instance. Uses the interpolationparameter from the parameterlist, and

600

* always performs the single or double balanced OSDL algorithm.

601

* This method doesn't change the classifier.

602

603

* @param instance the instance to classify

604

* @param s value of the interpolationparameter to use

605

* @return the calculated distribution

606

607

private CumulativeDiscreteDistribution cumulativeDistributionForInstanceBalanced(

608

Instance instance, double s) {

609

610

Coordinates xc = new Coordinates(instance);

611

int n = instance.numClasses();

612

613

// n_m[i] represents the number of examples smaller or equal

614

// than xc and with a class label strictly greater than i

615

int[] n_m = new int[n];

616

617

// n_M[i] represents the number of examples greater or equal

618

// than xc and with a class label smaller or equal than i

619

int[] n_M = new int[n];

620

621

// Create fMin and fMax

622

CumulativeDiscreteDistribution fMin =

623

DistributionUtils.getMinimalCumulativeDiscreteDistribution(n);

624

CumulativeDiscreteDistribution fMax =

625

DistributionUtils.getMaximalCumulativeDiscreteDistribution(n);

626

627

// Cycle through all the map of cumulative distribution functions

628

for (Iterator i =

629

m_estimatedCumulativeDistributions.keySet().iterator();

630

i.hasNext(); ) {

631

Coordinates yc = (Coordinates) i.next();

632

CumulativeDiscreteDistribution cdf =

633

(CumulativeDiscreteDistribution)

634

m_estimatedCumulativeDistributions.get(yc);

635

636

if (yc.equals(xc)) {

637

// update n_m and n_M

638

DiscreteEstimator df =

639

(DiscreteEstimator) m_estimatedDistributions.get(yc);

640

updateN_m(n_m,df);

641

updateN_M(n_M,df);

642

643

fMin = DistributionUtils.takeMin(fMin,cdf);

644

fMax = DistributionUtils.takeMax(fMax,cdf);

645

} else if (yc.strictlySmaller(xc)) {

646

// update n_m

647

DiscreteEstimator df =

648

(DiscreteEstimator) m_estimatedDistributions.get(yc);

649

updateN_m(n_m, df);

650

fMin = DistributionUtils.takeMin(fMin,cdf);

651

}

652

else if (xc.strictlySmaller(yc)) {

653

// update n_M

654

DiscreteEstimator df =

655

(DiscreteEstimator) m_estimatedDistributions.get(yc);

656

updateN_M(n_M, df);

657

fMax = DistributionUtils.takeMax(fMax,cdf);

658

}

659

}

660

661

double[] dd = new double[n];

662

663

// for each label decide what formula to use, either using

664

// n_m[i] and n_M[i] (if fMin[i]<fMax[i]) or using the

665

// interpolationparameter s or using the double balanced version

666

for (int i = 0; i < n; i++) {

667

double fmin = fMin.getCumulativeProbability(i);

668

double fmax = fMax.getCumulativeProbability(i);

669

670

if (m_weighted == true) { // double balanced version

671

if (fmin < fmax) { // reversed preference

672

dd[i] = (n_m[i] * fmin + n_M[i] * fmax)

673

/ (n_m[i] + n_M[i]);

674

} else {

675

if (n_m[i] + n_M[i] == 0) { // avoid division by zero

676

dd[i] = s * fmin + (1 - s) * fmax;

677

} else {

678

dd[i] = (n_M[i] * fmin + n_m[i] * fmax)

679

/ (n_m[i] + n_M[i]) ;

680

}

681

}

682

} else { // singly balanced version

683

dd[i] = (fmin < fmax)

684

? (n_m[i] * fmin + n_M[i] * fmax) / (n_m[i] + n_M[i])

685

: s * fmin + (1 - s) * fmax;

686

}

687

} try {

688

return new CumulativeDiscreteDistribution(dd);

689

} catch (IllegalArgumentException e) {

690

// this shouldn't happen.

691

System.err.println("We tried to create a cumulative "

692

+ "discrete distribution from the following array");

693

for (int i = 0; i < dd.length; i++) {

694

System.err.print(dd[i] + " ");

695

}

696

System.err.println();

697

throw new AssertionError(dd);

698

}

699

}

700

701

702

/**

703

* Update the array n_m using the given <code> DiscreteEstimator </code>.

704

705

* @param n_m the array n_m that will be updated.

706

* @param de the <code> DiscreteEstimator </code> that gives the

707

* count over the different class labels.

708

709

private void updateN_m(int[] n_m, DiscreteEstimator de) {

710

int[] tmp = new int[n_m.length];

711

712

// all examples have a class labels strictly greater

713

// than 0, except those that have class label 0.

714

tmp[0] = (int) de.getSumOfCounts() - (int) de.getCount(0);

715

n_m[0] += tmp[0];

716

for (int i = 1; i < n_m.length; i++) {

717

718

// the examples with a class label strictly greater

719

// than i are exactly those that have a class label strictly

720

// greater than i-1, except those that have class label i.

721

tmp[i] = tmp[i - 1] - (int) de.getCount(i);

722

n_m[i] += tmp[i];

723

}

724

725

if (n_m[n_m.length - 1] != 0) {

726

// this shouldn't happen

727

System.err.println("******** Problem with n_m in "

728

+ m_train.relationName());

729

System.err.println("Last argument is non-zero, namely : "

730

+ n_m[n_m.length - 1]);

731

}

732

}

733

734

/**

735

* Update the array n_M using the given <code> DiscreteEstimator </code>.

736

737

* @param n_M the array n_M that will be updated.

738

* @param de the <code> DiscreteEstimator </code> that gives the

739

* count over the different class labels.

740

741

private void updateN_M(int[] n_M, DiscreteEstimator de) {

742

int n = n_M.length;

743

int[] tmp = new int[n];

744

745

// all examples have a class label smaller or equal

746

// than n-1 (which is the maximum class label)

747

tmp[n - 1] = (int) de.getSumOfCounts();

748

n_M[n - 1] += tmp[n - 1];

749

for (int i = n - 2; i >= 0; i--) {

750

751

// the examples with a class label smaller or equal

752

// than i are exactly those that have a class label

753

// smaller or equal than i+1, except those that have

754

// class label i+1.

755

tmp[i] = tmp[i + 1] - (int) de.getCount(i + 1);

756

n_M[i] += tmp[i];

757

}

758

}

759

760

/**

761

* Builds the classifier.

762

* This means that all relevant examples are stored into memory.

763

* If necessary the interpolation parameter is tuned.

764

765

* @param instances the instances to be used for building the classifier

766

* @throws Exception if the classifier can't be built successfully

767

768

public void buildClassifier(Instances instances) throws Exception {

769

770

getCapabilities().testWithFail(instances);

771

772

// copy the dataset

773

m_train = new Instances(instances);

774

775

// new dataset in which examples with missing class value are removed

776

m_train.deleteWithMissingClass();

777

778

// build the Map for the estimatedDistributions

779

m_estimatedDistributions = new HashMap(m_train.numInstances()/2);

780

781

// cycle through all instances

782

for (Iterator it =

783

new EnumerationIterator(instances.enumerateInstances());

784

it.hasNext();) {

785

Instance instance = (Instance) it.next();

786

Coordinates c = new Coordinates(instance);

787

788

// get DiscreteEstimator from the map

789

DiscreteEstimator df =

790

(DiscreteEstimator) m_estimatedDistributions.get(c);

791

792

// if no DiscreteEstimator is present in the map, create one

793

if (df == null) {

794

df = new DiscreteEstimator(instances.numClasses(),0);

795

}

796

df.addValue(instance.classValue(),instance.weight()); // update

797

m_estimatedDistributions.put(c,df); // put back in map

798

}

799

800

801

// build the map of cumulative distribution functions

802

m_estimatedCumulativeDistributions =

803

new HashMap(m_estimatedDistributions.size()/2);

804

805

// Cycle trough the map of discrete distributions, and create a new

806

// one containing cumulative discrete distributions

807

for (Iterator it=m_estimatedDistributions.keySet().iterator();

808

it.hasNext();) {

809

Coordinates c = (Coordinates) it.next();

810

DiscreteEstimator df =

811

(DiscreteEstimator) m_estimatedDistributions.get(c);

812

m_estimatedCumulativeDistributions.put

813

(c, new CumulativeDiscreteDistribution(df));

814

}

815

816

// check if the interpolation parameter needs to be tuned

817

if (m_tuneInterpolationParameter && !m_interpolationParameterValid) {

818

tuneInterpolationParameter();

819

}

820

821

// fill in the smallest and biggest element (for use in the

822

// quasi monotone version of the algorithm)

823

double[] tmpAttValues = new double[instances.numAttributes()];

824

Instance instance = new Instance(1, tmpAttValues);

825

instance.setDataset(instances);

826

smallestElement = new Coordinates(instance);

827

if (m_Debug) {

828

System.err.println("minimal element of data space = "

829

+ smallestElement);

830

}

831

for (int i = 0; i < tmpAttValues.length; i++) {

832

tmpAttValues[i] = instances.attribute(i).numValues() - 1;

833

}

834

835

instance = new Instance(1, tmpAttValues);

836

instance.setDataset(instances);

837

biggestElement = new Coordinates(instance);

838

if (m_Debug) {

839

System.err.println("maximal element of data space = "

840

+ biggestElement);

841

}

842

}

843

844

/**

845

* Returns the tip text for this property.

846

847

* @return tip text for this property suitable for

848

* displaying in the explorer/experimenter gui

849

850

public String classificationTypeTipText() {

851

return "Sets the way in which a single label will be extracted "

852

+ "from the estimated distribution.";

853

}

854

855

/**

856

* Sets the classification type. Currently <code> ctype </code>

857

* must be one of:

858

* <ul>

859

* <li> <code> CT_REGRESSION </code> : use expectation value of

860

* distribution. (Non-ordinal in nature).

861

* <li> <code> CT_WEIGHTED_SUM </code> : use expectation value of

862

* distribution rounded to nearest class label. (Non-ordinal in

863

* nature).

864

* <li> <code> CT_MAXPROB </code> : use the mode of the distribution.

865

* (May deliver non-monotone results).

866

* <li> <code> CT_MEDIAN </code> : use the median of the distribution

867

* (rounded to the nearest class label).

868

* <li> <code> CT_MEDIAN_REAL </code> : use the median of the distribution

869

* but not rounded to the nearest class label.

870

* </ul>

871

872

* @param value the classification type

873

874

public void setClassificationType(SelectedTag value) {

875

if (value.getTags() == TAGS_CLASSIFICATIONTYPES)

876

m_ctype = value.getSelectedTag().getID();

877

}

878

879

/**

880

* Returns the classification type.

881

882

* @return the classification type

883

884

public SelectedTag getClassificationType() {

885

return new SelectedTag(m_ctype, TAGS_CLASSIFICATIONTYPES);

886

}

887

888

889

/**

890

* Returns the tip text for this property.

891

892

* @return tip text for this property suitable for

893

* displaying in the explorer/experimenter gui

894

895

public String tuneInterpolationParameterTipText() {

896

return "Whether to tune the interpolation parameter based on the bounds.";

897

}

898

899

/**

900

* Sets whether the interpolation parameter is to be tuned based on the

901

* bounds.

902

903

* @param value if true the parameter is tuned

904

905

public void setTuneInterpolationParameter(boolean value) {

906

m_tuneInterpolationParameter = value;

907

}

908

909

/**

910

* Returns whether the interpolation parameter is to be tuned based on the

911

* bounds.

912

913

* @return true if the parameter is to be tuned

914

915

public boolean getTuneInterpolationParameter() {

916

return m_tuneInterpolationParameter;

917

}

918

919

/**

920

* Returns the tip text for this property.

921

922

* @return tip text for this property suitable for

923

* displaying in the explorer/experimenter gui

924

925

public String interpolationParameterLowerBoundTipText() {

926

return "Sets the lower bound for the interpolation parameter tuning (0 <= x < 1).";

927

}

928

929

/**

930

* Sets the lower bound for the interpolation parameter tuning

931

* (0 <= x < 1).

932

933

* @param value the tne lower bound

934

* @throws IllegalArgumentException if bound is invalid

935

936

public void setInterpolationParameterLowerBound(double value) {

937

if ( (value < 0) || (value >= 1) || (value > getInterpolationParameterUpperBound()) )

938

throw new IllegalArgumentException("Illegal lower bound");

939

940

m_sLower = value;

941

m_tuneInterpolationParameter = true;

942

m_interpolationParameterValid = false;

943

}

944

945

/**

946

* Returns the lower bound for the interpolation parameter tuning

947

* (0 <= x < 1).

948

949

* @return the lower bound

950

951

public double getInterpolationParameterLowerBound() {

952

return m_sLower;

953

}

954

955

/**

956

* Returns the tip text for this property.

957

958

* @return tip text for this property suitable for

959

* displaying in the explorer/experimenter gui

960

961

public String interpolationParameterUpperBoundTipText() {

962

return "Sets the upper bound for the interpolation parameter tuning (0 < x <= 1).";

963

}

964

965

/**

966

* Sets the upper bound for the interpolation parameter tuning

967

* (0 < x <= 1).

968

969

* @param value the tne upper bound

970

* @throws IllegalArgumentException if bound is invalid

971

972

public void setInterpolationParameterUpperBound(double value) {

973

if ( (value <= 0) || (value > 1) || (value < getInterpolationParameterLowerBound()) )

974

throw new IllegalArgumentException("Illegal upper bound");

975

976

m_sUpper = value;

977

m_tuneInterpolationParameter = true;

978

m_interpolationParameterValid = false;

979

}

980

981

/**

982

* Returns the upper bound for the interpolation parameter tuning

983

* (0 < x <= 1).

984

985

* @return the upper bound

986

987

public double getInterpolationParameterUpperBound() {

988

return m_sUpper;

989

}

990

991

/**

992

* Sets the interpolation bounds for the interpolation parameter.

993

* When tuning the interpolation parameter only values in the interval

994

* <code> [sLow, sUp] </code> are considered.

995

* It is important to note that using this method immediately

996

* implies that the interpolation parameter is to be tuned.

997

998

* @param sLow lower bound for the interpolation parameter,

999

* should not be smaller than 0 or greater than <code> sUp </code>

1000

* @param sUp upper bound for the interpolation parameter,

1001

* should not exceed 1 or be smaller than <code> sLow </code>

1002

* @throws IllegalArgumentException if one of the above conditions

1003

* is not satisfied.

1004

1005

public void setInterpolationParameterBounds(double sLow, double sUp)

1006

throws IllegalArgumentException {

1007

1008

if (sLow < 0. || sUp > 1. || sLow > sUp)

1009

throw new IllegalArgumentException("Illegal upper and lower bounds");

1010

m_sLower = sLow;

1011

m_sUpper = sUp;

1012

m_tuneInterpolationParameter = true;

1013

m_interpolationParameterValid = false;

1014

}

1015

1016

/**

1017

* Returns the tip text for this property.

1018

1019

* @return tip text for this property suitable for

1020

* displaying in the explorer/experimenter gui

1021

1022

public String interpolationParameterTipText() {

1023

return "Sets the value of the interpolation parameter s;"

1024

+ "Estimated distribution is s * f_min + (1 - s) * f_max. ";

1025

}

1026

1027

/**

1028

* Sets the interpolation parameter. This immediately means that

1029

* the interpolation parameter is not to be tuned.

1030

1031

* @param s value for the interpolation parameter.

1032

* @throws IllegalArgumentException if <code> s </code> is not in

1033

* the range [0,1].

1034

1035

public void setInterpolationParameter(double s)

1036

throws IllegalArgumentException {

1037

1038

if (0 > s || s > 1)

1039

throw new IllegalArgumentException("Interpolationparameter exceeds bounds");

1040

m_tuneInterpolationParameter = false;

1041

m_interpolationParameterValid = false;

1042

m_s = s;

1043

}

1044

1045

/**

1046

* Returns the current value of the interpolation parameter.

1047

1048

* @return the value of the interpolation parameter

1049

1050

public double getInterpolationParameter() {

1051

return m_s;

1052

}

1053

1054

/**

1055

* Returns the tip text for this property.

1056

1057

* @return tip text for this property suitable for

1058

* displaying in the explorer/experimenter gui

1059

1060

public String numberOfPartsForInterpolationParameterTipText() {

1061

return "Sets the granularity for tuning the interpolation parameter; "

1062

+ "For instance if the value is 32 then 33 values for the "

1063

+ "interpolation are checked.";

1064

}

1065

1066

/**

1067

* Sets the granularity for tuning the interpolation parameter.

1068

* The interval between lower and upper bounds for the interpolation

1069

* parameter is divided into <code> sParts </code> parts, i.e.

1070

* <code> sParts + 1 </code> values will be checked when

1071

* <code> tuneInterpolationParameter </code> is invoked.

1072

* This also means that the interpolation parameter is to

1073

* be tuned.

1074

1075

* @param sParts the number of parts

1076

* @throws IllegalArgumentException if <code> sParts </code> is

1077

* smaller or equal than 0.

1078

1079

public void setNumberOfPartsForInterpolationParameter(int sParts)

1080

throws IllegalArgumentException {

1081

1082

if (sParts <= 0)

1083

throw new IllegalArgumentException("Number of parts is negative");

1084

1085

m_tuneInterpolationParameter = true;

1086

if (m_sNrParts != sParts) {

1087

m_interpolationParameterValid = false;

1088

m_sNrParts = sParts;

1089

}

1090

}

1091

1092

/**

1093

* Gets the granularity for tuning the interpolation parameter.

1094

1095

* @return the number of parts in which the interval

1096

* <code> [s_low, s_up] </code> is to be split

1097

1098

public int getNumberOfPartsForInterpolationParameter() {

1099

return m_sNrParts;

1100

}

1101

1102

/**

1103

* Returns a string suitable for displaying in the gui/experimenter.

1104

1105

* @return tip text for this property suitable for

1106

* displaying in the explorer/experimenter gui

1107

1108

public String balancedTipText() {

1109

return "If true, the balanced version of the OSDL-algorithm is used\n"

1110

+ "This means that distinction is made between the normal and "

1111

+ "reversed preference situation.";

1112

}

1113

1114

/**

1115

* If <code> balanced </code> is <code> true </code> then the balanced

1116

* version of OSDL will be used, otherwise the ordinary version of

1117

* OSDL will be in effect.

1118

1119

* @param balanced if <code> true </code> then B-OSDL is used, otherwise

1120

* it is OSDL

1121

1122

public void setBalanced(boolean balanced) {

1123

m_balanced = balanced;

1124

}

1125

1126

/**

1127

* Returns if the balanced version of OSDL is in effect.

1128

1129

* @return <code> true </code> if the balanced version is in effect,

1130

* <code> false </code> otherwise

1131

1132

public boolean getBalanced() {

1133

return m_balanced;

1134

}

1135

1136

/**

1137

* Returns a string suitable for displaying in the gui/experimenter.

1138

1139

* @return tip text for this property suitable for

1140

* displaying in the explorer/experimenter gui

1141

1142

public String weightedTipText() {

1143

return "If true, the weighted version of the OSDL-algorithm is used";

1144

}

1145

1146

/**

1147

* If <code> weighted </code> is <code> true </code> then the

1148

* weighted version of the OSDL is used.

1149

* Note: using the weighted (non-balanced) version only ensures the

1150

* quasi monotonicity of the results w.r.t. to training set.

1151

1152

* @param weighted <code> true </code> if the weighted version to be used,

1153

* <code> false </code> otherwise

1154

1155

public void setWeighted(boolean weighted) {

1156

m_weighted = weighted;

1157

}

1158

1159

/**

1160

* Returns if the weighted version is in effect.

1161

1162

* @return <code> true </code> if the weighted version is in effect,

1163

* <code> false </code> otherwise.

1164

1165

public boolean getWeighted() {

1166

return m_weighted;

1167

}

1168

1169

/**

1170

* Returns the current value of the lower bound for the interpolation

1171

* parameter.

1172

1173

* @return the current value of the lower bound for the interpolation

1174

* parameter

1175

1176

public double getLowerBound() {

1177

return m_sLower;

1178

}

1179

1180

/**

1181

* Returns the current value of the upper bound for the interpolation

1182

* parameter.

1183

1184

* @return the current value of the upper bound for the interpolation

1185

* parameter

1186

1187

public double getUpperBound() {

1188

return m_sUpper;

1189

}

1190

1191

/**

1192

* Returns the number of instances in the training set.

1193

1194

* @return the number of instances used for training

1195

1196

public int getNumInstances() {

1197

return m_train.numInstances();

1198

}

1199

1200

/** Tune the interpolation parameter using the current

1201

* settings of the classifier.

1202

* This also sets the interpolation parameter.

1203

* @return the value of the tuned interpolation parameter.

1204

1205

public double tuneInterpolationParameter() {

1206

try {

1207

return tuneInterpolationParameter(m_sLower, m_sUpper, m_sNrParts, m_ctype);

1208

} catch (IllegalArgumentException e) {

1209

throw new AssertionError(e);

1210

}

1211

}

1212

1213

/**

1214

* Tunes the interpolation parameter using the given settings.

1215

* The parameters of the classifier are updated accordingly!

1216

* Marks the interpolation parameter as valid.

1217

1218

* @param sLow lower end point of interval of paramters to be examined

1219

* @param sUp upper end point of interval of paramters to be examined

1220

* @param sParts number of parts the interval is divided into. This thus determines

1221

* the granularity of the search

1222

* @param ctype the classification type to use

1223

* @return the value of the tuned interpolation parameter

1224

* @throws IllegalArgumentException if the given parameter list is not

1225

* valid

1226

1227

public double tuneInterpolationParameter(double sLow, double sUp, int sParts, int ctype)

1228

throws IllegalArgumentException {

1229

1230

setInterpolationParameterBounds(sLow, sUp);

1231

setNumberOfPartsForInterpolationParameter(sParts);

1232

setClassificationType(new SelectedTag(ctype, TAGS_CLASSIFICATIONTYPES));

1233

1234

m_s = crossValidate(sLow, sUp, sParts, ctype);

1235

m_tuneInterpolationParameter = true;

1236

m_interpolationParameterValid = true;

1237

return m_s;

1238

}

1239

1240

/**

1241

* Tunes the interpolation parameter using the current settings

1242

* of the classifier. This doesn't change the classifier, i.e.

1243

* none of the internal parameters is changed!

1244

1245

* @return the tuned value of the interpolation parameter

1246

* @throws IllegalArgumentException if somehow the current settings of the

1247

* classifier are illegal.

1248

1249

public double crossValidate() throws IllegalArgumentException {

1250

return crossValidate(m_sLower, m_sUpper, m_sNrParts, m_ctype);

1251

}

1252

1253

/**

1254

* Tune the interpolation parameter using leave-one-out

1255

* cross validation, the loss function used is the 1-0 loss

1256

* function.

1257

*

1258

* The given settings are used, but the classifier is not

1259

* updated!. Also, the interpolation parameter s is not

1260

* set.

1261

*

1262

1263

* @param sLow lower end point of interval of paramters to be examined

1264

* @param sUp upper end point of interval of paramters to be examined

1265

* @param sNrParts number of parts the interval is divided into. This thus determines

1266

* the granularity of the search

1267

* @param ctype the classification type to use

1268

* @return the best value for the interpolation parameter

1269

* @throws IllegalArgumentException if the settings for the

1270

* interpolation parameter are not valid or if the classification

1271

* type is not valid

1272

1273

public double crossValidate (double sLow, double sUp, int sNrParts, int ctype)

1274

throws IllegalArgumentException {

1275

1276

double[] performanceStats = new double[sNrParts + 1];

1277

return crossValidate(sLow, sUp, sNrParts, ctype,

1278

performanceStats, new ZeroOneLossFunction());

1279

}

1280

1281

/**

1282

* Tune the interpolation parameter using leave-one-out

1283

* cross validation. The given parameters are used, but

1284

* the classifier is not changed, in particular, the interpolation

1285

* parameter remains unchanged.

1286

1287

* @param sLow lower bound for interpolation parameter

1288

* @param sUp upper bound for interpolation parameter

1289

* @param sNrParts determines the granularity of the search

1290

* @param ctype the classification type to use

1291

* @param performanceStats array acting as output, and that will

1292

* contain the total loss of the leave-one-out cross validation for

1293

* each considered value of the interpolation parameter

1294

* @param lossFunction the loss function to use

1295

* @return the value of the interpolation parameter that is considered

1296

* best

1297

* @throws IllegalArgumentException the length of the array

1298

* <code> performanceStats </code> is not sufficient

1299

* @throws IllegalArgumentException if the interpolation parameters

1300

* are not valid

1301

* @throws IllegalArgumentException if the classification type is

1302

* not valid

1303

1304

public double crossValidate(double sLow, double sUp, int sNrParts,

1305

int ctype, double[] performanceStats,

1306

NominalLossFunction lossFunction) throws IllegalArgumentException {

1307

1308

if (performanceStats.length < sNrParts + 1) {

1309

throw new IllegalArgumentException("Length of array is not sufficient");

1310

}

1311

1312

if (!interpolationParametersValid(sLow, sUp, sNrParts)) {

1313

throw new IllegalArgumentException("Interpolation parameters are not valid");

1314

}

1315

1316

if (!classificationTypeValid(ctype)) {

1317

throw new IllegalArgumentException("Not a valid classification type " + ctype);

1318

}

1319

1320

Arrays.fill(performanceStats, 0, sNrParts + 1, 0);

1321

1322

// cycle through all instances

1323

for (Iterator it =

1324

new EnumerationIterator(m_train.enumerateInstances());

1325

it.hasNext(); ) {

1326

Instance instance = (Instance) it.next();

1327

double classValue = instance.classValue();

1328

removeInstance(instance);

1329

1330

double s = sLow;

1331

double step = (sUp - sLow) / sNrParts; //step size

1332

for (int i = 0; i <= sNrParts; i++, s += step) {

1333

try {

1334

performanceStats[i] +=

1335

lossFunction.loss(classValue,

1336

classifyInstance(instance, s, ctype));

1337

} catch (Exception exception) {

1338

1339

// XXX what should I do here, normally we shouldn't be here

1340

System.err.println(exception.getMessage());

1341

System.exit(1);

1342

}

1343

}

1344

1345

// XXX may be done more efficiently

1346

addInstance(instance); // update

1347

}

1348

1349

// select the 'best' value for s

1350

// to this end, we sort the array with the leave-one-out

1351

// performance statistics, and we choose the middle one

1352

// off all those that score 'best'

1353

1354

// new code, august 2004

1355

// new code, june 2005. If performanceStats is longer than

1356

// necessary, copy it first

1357

double[] tmp = performanceStats;

1358

if (performanceStats.length > sNrParts + 1) {

1359

tmp = new double[sNrParts + 1];

1360

System.arraycopy(performanceStats, 0, tmp, 0, tmp.length);

1361

}

1362

int[] sort = Utils.stableSort(tmp);

1363

int minIndex = 0;

1364

while (minIndex + 1 < tmp.length

1365

&& tmp[sort[minIndex + 1]] == tmp[sort[minIndex]]) {

1366

minIndex++;

1367

}

1368

minIndex = sort[minIndex / 2]; // middle one

1369

// int minIndex = Utils.minIndex(performanceStats); // OLD code

1370

1371

return sLow + minIndex * (sUp - sLow) / sNrParts;

1372

}

1373

1374

/**

1375

* Checks if <code> ctype </code> is a valid classification

1376

* type.

1377

* @param ctype the int to be checked

1378

* @return true if ctype is a valid classification type, false otherwise

1379

1380

private boolean classificationTypeValid(int ctype) {

1381

return ctype == CT_REGRESSION || ctype == CT_WEIGHTED_SUM

1382

|| ctype == CT_MAXPROB || ctype == CT_MEDIAN

1383

|| ctype == CT_MEDIAN_REAL;

1384

}

1385

1386

/**

1387

* Checks if the given parameters are valid interpolation parameters.

1388

* @param sLow lower bound for the interval

1389

* @param sUp upper bound for the interval

1390

* @param sNrParts the number of parts the interval has to be divided in

1391

* @return true is the given parameters are valid interpolation parameters,

1392

* false otherwise

1393

1394

private boolean interpolationParametersValid(double sLow, double sUp, int sNrParts) {

1395

return sLow >= 0 && sUp <= 1 && sLow 0

1396

|| sLow == sUp && sNrParts == 0;

1397

// special case included

1398

}

1399

1400

/**

1401

* Remove an instance from the classifier. Updates the hashmaps.

1402

* @param instance the instance to be removed.

1403

1404

private void removeInstance(Instance instance) {

1405

Coordinates c = new Coordinates(instance);

1406

1407

// Remove instance temporarily from the Maps with the distributions

1408

DiscreteEstimator df =

1409

(DiscreteEstimator) m_estimatedDistributions.get(c);

1410

1411

// remove from df

1412

df.addValue(instance.classValue(),-instance.weight());

1413

1414

if (Math.abs(df.getSumOfCounts() - 0) < Utils.SMALL) {

1415

1416

/* There was apparently only one example with coordinates c

1417

* in the training set, and now we removed it.

1418

* Remove the key c from both maps.

1419

1420

m_estimatedDistributions.remove(c);

1421

m_estimatedCumulativeDistributions.remove(c);

1422

}

1423

else {

1424

1425

// update both maps

1426

m_estimatedDistributions.put(c,df);

1427

m_estimatedCumulativeDistributions.put

1428

(c, new CumulativeDiscreteDistribution(df));

1429

}

1430

}

1431

1432

/**

1433

* Update the classifier using the given instance. Updates the hashmaps

1434

* @param instance the instance to be added

1435

1436

private void addInstance(Instance instance) {

1437

1438

Coordinates c = new Coordinates(instance);

1439

1440

// Get DiscreteEstimator from the map

1441

DiscreteEstimator df =

1442

(DiscreteEstimator) m_estimatedDistributions.get(c);

1443

1444

// If no DiscreteEstimator is present in the map, create one

1445

if (df == null) {

1446

df = new DiscreteEstimator(instance.dataset().numClasses(),0);

1447

}

1448

df.addValue(instance.classValue(),instance.weight()); // update df

1449

m_estimatedDistributions.put(c,df); // put back in map

1450

m_estimatedCumulativeDistributions.put

1451

(c, new CumulativeDiscreteDistribution(df));

1452

}

1453

1454

/**

1455

* Returns an enumeration describing the available options.

1456

* For a list of available options, see <code> setOptions </code>.

1457

1458

* @return an enumeration of all available options.

1459

1460

public Enumeration listOptions() {

1461

Vector options = new Vector();

1462

1463

Enumeration enm = super.listOptions();

1464

while (enm.hasMoreElements())

1465

options.addElement(enm.nextElement());

1466

1467

String description =

1468

"\tSets the classification type to be used.\n"

1469

+ "\t(Default: " + new SelectedTag(CT_MEDIAN, TAGS_CLASSIFICATIONTYPES) + ")";

1470

String synopsis = "-C " + Tag.toOptionList(TAGS_CLASSIFICATIONTYPES);

1471

String name = "C";

1472

options.addElement(new Option(description, name, 1, synopsis));

1473

1474

description = "\tUse the balanced version of the "

1475

+ "Ordinal Stochastic Dominance Learner";

1476

synopsis = "-B";

1477

name = "B";

1478

options.addElement(new Option(description, name, 1, synopsis));

1479

1480

description = "\tUse the weighted version of the "

1481

+ "Ordinal Stochastic Dominance Learner";

1482

synopsis = "-W";

1483

name = "W";

1484

options.addElement(new Option(description, name, 1, synopsis));

1485

1486

description =

1487

"\tSets the value of the interpolation parameter (not with -W/T/P/L/U)\n"

1488

+ "\t(default: 0.5).";

1489

synopsis = "-S <value of interpolation parameter>";

1490

name = "S";

1491

options.addElement(new Option(description, name, 1, synopsis));

1492

1493

description =

1494

"\tTune the interpolation parameter (not with -W/S)\n"

1495

+ "\t(default: off)";

1496

synopsis = "-T";

1497

name = "T";

1498

options.addElement(new Option(description, name, 0, synopsis));

1499

1500

description =

1501

"\tLower bound for the interpolation parameter (not with -W/S)\n"

1502

+ "\t(default: 0)";

1503

synopsis = "-L <Lower bound for interpolation parameter>";

1504

name="L";

1505

options.addElement(new Option(description, name, 1, synopsis));

1506

1507

description =

1508

"\tUpper bound for the interpolation parameter (not with -W/S)\n"

1509

+ "\t(default: 1)";

1510

synopsis = "-U <Upper bound for interpolation parameter>";

1511

name="U";

1512

options.addElement(new Option(description, name, 1, synopsis));

1513

1514

description =

1515

"\tDetermines the step size for tuning the interpolation\n"

1516

+ "\tparameter, nl. (U-L)/P (not with -W/S)\n"

1517

+ "\t(default: 10)";

1518

synopsis = "-P <Number of parts>";

1519

name="P";

1520

options.addElement(new Option(description, name, 1, synopsis));

1521

1522

return options.elements();

1523

}

1524

1525

/**

1526

* Parses the options for this object.

1527

1528

1529

* Valid options are:

1530

1531

* <pre> -D

1532

* If set, classifier is run in debug mode and

1533

* may output additional info to the console</pre>

1534

1535

* <pre> -C <REG|WSUM|MAX|MED|RMED>

1536

* Sets the classification type to be used.

1537

* (Default: MED)</pre>

1538

1539

* <pre> -B

1540

* Use the balanced version of the Ordinal Stochastic Dominance Learner</pre>

1541

1542

* <pre> -W

1543

* Use the weighted version of the Ordinal Stochastic Dominance Learner</pre>

1544

1545

* <pre> -S <value of interpolation parameter>

1546

* Sets the value of the interpolation parameter (not with -W/T/P/L/U)

1547

* (default: 0.5).</pre>

1548

1549

* <pre> -T

1550

* Tune the interpolation parameter (not with -W/S)

1551

* (default: off)</pre>

1552

1553

* <pre> -L <Lower bound for interpolation parameter>

1554

* Lower bound for the interpolation parameter (not with -W/S)

1555

* (default: 0)</pre>

1556

1557

* <pre> -U <Upper bound for interpolation parameter>

1558

* Upper bound for the interpolation parameter (not with -W/S)

1559

* (default: 1)</pre>

1560

1561

* <pre> -P <Number of parts>

1562

* Determines the step size for tuning the interpolation

1563

* parameter, nl. (U-L)/P (not with -W/S)

1564

* (default: 10)</pre>

1565

1566

1567

1568

* @param options the list of options as an array of strings

1569

* @throws Exception if an option is not supported

1570

1571

public void setOptions(String[] options) throws Exception {

1572

String args;

1573

1574

args = Utils.getOption('C',options);

1575

if (args.length() != 0)

1576

setClassificationType(new SelectedTag(args, TAGS_CLASSIFICATIONTYPES));

1577

else

1578

setClassificationType(new SelectedTag(CT_MEDIAN, TAGS_CLASSIFICATIONTYPES));

1579

1580

setBalanced(Utils.getFlag('B',options));

1581

1582

if (Utils.getFlag('W', options)) {

1583

m_weighted = true;

1584

// ignore any T, S, P, L and U options

1585

Utils.getOption('T', options);

1586

Utils.getOption('S', options);

1587

Utils.getOption('P', options);

1588

Utils.getOption('L', options);

1589

Utils.getOption('U', options);

1590

} else {

1591

m_tuneInterpolationParameter = Utils.getFlag('T', options);

1592

1593

if (!m_tuneInterpolationParameter) {

1594

// ignore P, L, U

1595

Utils.getOption('P', options);

1596

Utils.getOption('L', options);

1597

Utils.getOption('U', options);

1598

1599

// value of s

1600

args = Utils.getOption('S',options);

1601

if (args.length() != 0)

1602

setInterpolationParameter(Double.parseDouble(args));

1603

else

1604

setInterpolationParameter(0.5);

1605

}

1606

else {

1607

// ignore S

1608

Utils.getOption('S', options);

1609

1610

args = Utils.getOption('L',options);

1611

double l = m_sLower;

1612

if (args.length() != 0)

1613

l = Double.parseDouble(args);

1614

else

1615

l = 0.0;

1616

1617

args = Utils.getOption('U',options);

1618

double u = m_sUpper;

1619

if (args.length() != 0)

1620

u = Double.parseDouble(args);

1621

else

1622

u = 1.0;

1623

1624

if (m_tuneInterpolationParameter)

1625

setInterpolationParameterBounds(l, u);

1626

1627

args = Utils.getOption('P',options);

1628

if (args.length() != 0)

1629

setNumberOfPartsForInterpolationParameter(Integer.parseInt(args));

1630

else

1631

setNumberOfPartsForInterpolationParameter(10);

1632

}

1633

}

1634

1635

super.setOptions(options);

1636

}

1637

1638

/**

1639

* Gets the current settings of the OSDLCore classifier.

1640

1641

* @return an array of strings suitable for passing

1642

* to <code> setOptions </code>

1643

1644

public String[] getOptions() {

1645

int i;

1646

Vector result;

1647

String[] options;

1648

1649

result = new Vector();

1650

1651

options = super.getOptions();

1652

for (i = 0; i < options.length; i++)

1653

result.add(options[i]);

1654

1655

// classification type

1656

result.add("-C");

1657

result.add("" + getClassificationType());

1658

1659

if (m_balanced)

1660

result.add("-B");

1661

1662

if (m_weighted) {

1663

result.add("-W");

1664

}

1665

else {

1666

// interpolation parameter

1667

if (!m_tuneInterpolationParameter) {

1668

result.add("-S");

1669

result.add(Double.toString(m_s));

1670

}

1671

else {

1672

result.add("-T");

1673

result.add("-L");

1674

result.add(Double.toString(m_sLower));

1675

result.add("-U");

1676

result.add(Double.toString(m_sUpper));

1677

result.add("-P");

1678

result.add(Integer.toString(m_sNrParts));

1679

}

1680

}

1681

1682

return (String[]) result.toArray(new String[result.size()]);

1683

}

1684

1685

/**

1686

* Returns a description of the classifier.

1687

* Attention: if debugging is on, the description can be become

1688

* very lengthy.

1689

1690

* @return a string containing the description

1691

1692

public String toString() {

1693

StringBuffer sb = new StringBuffer();

1694

1695

// balanced or ordinary OSDL

1696

if (m_balanced) {

1697

sb.append("Balanced OSDL\n=============\n\n");

1698

} else {

1699

sb.append("Ordinary OSDL\n=============\n\n");

1700

}

1701

1702

if (m_weighted) {

1703

sb.append("Weighted variant\n");

1704

}

1705

1706

// classification type used

1707

sb.append("Classification type: " + getClassificationType() + "\n");

1708

1709

// parameter s

1710

if (!m_weighted) {

1711

sb.append("Interpolation parameter: " + m_s + "\n");

1712

if (m_tuneInterpolationParameter) {

1713

sb.append("Bounds and stepsize: " + m_sLower + " " + m_sUpper +

1714

" " + m_sNrParts + "\n");

1715

if (!m_interpolationParameterValid) {

1716

sb.append("Interpolation parameter is not valid");

1717

}

1718

}

1719

}

1720

1721

1722

if(m_Debug) {

1723

1724

if (m_estimatedCumulativeDistributions != null) {

1725

1726

* Cycle through all the map of cumulative distribution functions

1727

* and print each cumulative distribution function

1728

1729

for (Iterator i =

1730

m_estimatedCumulativeDistributions.keySet().iterator();

1731

i.hasNext(); ) {

1732

Coordinates yc = (Coordinates) i.next();

1733

CumulativeDiscreteDistribution cdf =

1734

(CumulativeDiscreteDistribution)

1735

m_estimatedCumulativeDistributions.get(yc);

1736

sb.append( "[" + yc.hashCode() + "] " + yc.toString()

1737

+ " --> " + cdf.toString() + "\n");

1738

}

1739

}

1740

}

1741

return sb.toString();

1742

}

1743

}

Older »