2
* This file is licensed to You under the "Simplified BSD License".
3
* You may not use this software except in compliance with the License.
4
* You may obtain a copy of the License at
6
* http://www.opensource.org/licenses/bsd-license.php
8
* See the COPYRIGHT file distributed with this work for information
9
* regarding copyright ownership.
11
package ch.usi.inf.sape.hac;
13
import ch.usi.inf.sape.hac.agglomeration.AgglomerationMethod;
14
import ch.usi.inf.sape.hac.experiment.DissimilarityMeasure;
15
import ch.usi.inf.sape.hac.experiment.Experiment;
19
* The HierarchicalAgglomerativeClusterer creates a hierarchical agglomerative clustering.
22
* Experiment experiment = ...;
23
* DissimilarityMeasure dissimilarityMeasure = ...;
24
* AgglomerationMethod agglomerationMethod = ...;
25
* DendrogramBuilder dendrogramBuilder = new DendrogramBuilder(experiment.getNumberOfObservations());
26
* HierarchicalAgglomerativeClusterer clusterer = new HierarchicalAgglomerativeClusterer(experiment, dissimilarityMeasure, agglomerationMethod);
27
* clusterer.cluster(dendrogramBuilder);
28
* Dendrogram dendrogram = dendrogramBuilder.getDendrogram();
31
* @author Matthias.Hauswirth@usi.ch
33
public final class HierarchicalAgglomerativeClusterer {
35
private Experiment experiment;
36
private DissimilarityMeasure dissimilarityMeasure;
37
private AgglomerationMethod agglomerationMethod;
40
public HierarchicalAgglomerativeClusterer(final Experiment experiment, final DissimilarityMeasure dissimilarityMeasure, final AgglomerationMethod agglomerationMethod) {
41
this.experiment = experiment;
42
this.dissimilarityMeasure = dissimilarityMeasure;
43
this.agglomerationMethod = agglomerationMethod;
46
public void setExperiment(final Experiment experiment) {
47
this.experiment = experiment;
50
public Experiment getExperiment() {
54
public void setDissimilarityMeasure(final DissimilarityMeasure dissimilarityMeasure) {
55
this.dissimilarityMeasure = dissimilarityMeasure;
58
public DissimilarityMeasure getDissimilarityMeasure() {
59
return dissimilarityMeasure;
62
public void setAgglomerationMethod(final AgglomerationMethod agglomerationMethod) {
63
this.agglomerationMethod = agglomerationMethod;
66
public AgglomerationMethod getAgglomerationMethod() {
67
return agglomerationMethod;
70
public void cluster(final ClusteringBuilder clusteringBuilder) {
71
final double[][] dissimilarityMatrix = computeDissimilarityMatrix();
72
final int nObservations = dissimilarityMatrix.length;
74
final boolean[] indexUsed = new boolean[nObservations];
75
final int[] clusterCardinalities = new int[nObservations];
76
for (int i = 0; i<nObservations; i++) {
78
clusterCardinalities[i] = 1;
81
// Perform nObservations-1 agglomerations
82
for (int a = 1; a<nObservations; a++) {
83
// Determine the two most similar clusters, i and j (such that i<j)
84
final Pair pair = findMostSimilarClusters(dissimilarityMatrix, indexUsed);
85
final int i = pair.getSmaller();
86
final int j = pair.getLarger();
87
final double d = dissimilarityMatrix[i][j];
90
System.out.println("Agglomeration #"+a+
91
": merging clusters "+i+
92
" (cardinality "+(clusterCardinalities[i])+") and "+j+
93
" (cardinality "+(clusterCardinalities[j])+") with dissimilarity "+d);
96
// cluster i becomes new cluster
97
// (by agglomerating former clusters i and j)
98
// update dissimilarityMatrix[i][*] and dissimilarityMatrix[*][i]
99
for (int k = 0; k<nObservations; k++) {
100
if ((k!=i)&&(k!=j)&&indexUsed[k]) {
101
final double dissimilarity = agglomerationMethod.computeDissimilarity(dissimilarityMatrix[i][k], dissimilarityMatrix[j][k],
102
dissimilarityMatrix[i][j], clusterCardinalities[i], clusterCardinalities[j], clusterCardinalities[k]);
103
dissimilarityMatrix[i][k] = dissimilarity;
104
dissimilarityMatrix[k][i] = dissimilarity;
107
clusterCardinalities[i] = clusterCardinalities[i]+clusterCardinalities[j];
110
indexUsed[j] = false;
111
for (int k = 0; k<nObservations; k++) {
112
dissimilarityMatrix[j][k] = Double.POSITIVE_INFINITY;
113
dissimilarityMatrix[k][j] = Double.POSITIVE_INFINITY;
117
clusteringBuilder.merge(i, j, d);
121
private double[][] computeDissimilarityMatrix() {
122
final double[][] dissimilarityMatrix = new double[experiment.getNumberOfObservations()][experiment.getNumberOfObservations()];
124
for (int o = 0; o<dissimilarityMatrix.length; o++) {
125
dissimilarityMatrix[o][o] = 0.0;
127
// fill rest (only compute half, then mirror accross diagonal, assuming
128
// a symmetric dissimilarity measure)
129
for (int o1 = 0; o1<dissimilarityMatrix.length; o1++) {
130
for (int o2 = 0; o2<o1; o2++) {
131
final double dissimilarity = dissimilarityMeasure.computeDissimilarity(experiment, o1, o2);
132
dissimilarityMatrix[o1][o2] = dissimilarity;
133
dissimilarityMatrix[o2][o1] = dissimilarity;
136
return dissimilarityMatrix;
139
private static Pair findMostSimilarClusters(final double[][] dissimilarityMatrix, final boolean[] indexUsed) {
140
final Pair mostSimilarPair = new Pair();
141
double smallestDissimilarity = Double.POSITIVE_INFINITY;
142
for (int cluster = 0; cluster<dissimilarityMatrix.length; cluster++) {
143
if (indexUsed[cluster]) {
144
for (int neighbor = 0; neighbor<dissimilarityMatrix.length; neighbor++) {
145
if (indexUsed[neighbor]&&dissimilarityMatrix[cluster][neighbor]<smallestDissimilarity&&cluster!=neighbor) {
146
smallestDissimilarity = dissimilarityMatrix[cluster][neighbor];
147
mostSimilarPair.set(cluster, neighbor);
152
return mostSimilarPair;
156
private static final class Pair {
158
private int cluster1;
159
private int cluster2;
162
public final void set(final int cluster1, final int cluster2) {
163
this.cluster1 = cluster1;
164
this.cluster2 = cluster2;
167
public final int getLarger() {
168
return Math.max(cluster1, cluster2);
171
public final int getSmaller() {
172
return Math.min(cluster1, cluster2);