34
34
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
35
35
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
36
36
import de.lmu.ifi.dbs.elki.database.QueryUtil;
37
import de.lmu.ifi.dbs.elki.database.ids.DBID;
38
37
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
38
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
39
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
39
40
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
41
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
40
42
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
41
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
42
43
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
43
44
import de.lmu.ifi.dbs.elki.database.relation.Relation;
44
45
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
46
import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
45
47
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
46
48
import de.lmu.ifi.dbs.elki.logging.Logging;
47
49
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
78
80
* The logger for this class.
80
private static final Logging logger = Logging.getLogger(DBSCAN.class);
82
private static final Logging LOG = Logging.getLogger(DBSCAN.class);
83
85
* Parameter to specify the maximum radius of the neighborhood to be
84
86
* considered, must be suitable to the distance function specified.
86
public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
88
public static final OptionID EPSILON_ID = new OptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
89
91
* Holds the value of {@link #EPSILON_ID}.
94
96
* Parameter to specify the threshold for minimum number of points in the
95
97
* epsilon-neighborhood of a point, must be an integer greater than 0.
97
public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
99
public static final OptionID MINPTS_ID = new OptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
100
102
* Holds the value of {@link #MINPTS_ID}.
136
138
RangeQuery<O, D> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
137
139
final int size = relation.size();
139
FiniteProgress objprog = logger.isVerbose() ? new FiniteProgress("Processing objects", size, logger) : null;
140
IndefiniteProgress clusprog = logger.isVerbose() ? new IndefiniteProgress("Number of clusters", logger) : null;
141
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
142
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
141
143
resultList = new ArrayList<ModifiableDBIDs>();
142
144
noise = DBIDUtil.newHashSet();
143
145
processedIDs = DBIDUtil.newHashSet(size);
147
// The can't be any clusters
148
noise.addDBIDs(relation.getDBIDs());
149
objprog.setProcessed(noise.size(), LOG);
145
152
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
146
153
if(!processedIDs.contains(iditer)) {
147
expandCluster(relation, rangeQuery, iditer.getDBID(), objprog, clusprog);
154
expandCluster(relation, rangeQuery, iditer, objprog, clusprog);
149
156
if(objprog != null && clusprog != null) {
150
objprog.setProcessed(processedIDs.size(), logger);
151
clusprog.setProcessed(resultList.size(), logger);
157
objprog.setProcessed(processedIDs.size(), LOG);
158
clusprog.setProcessed(resultList.size(), LOG);
153
160
if(processedIDs.size() == size) {
159
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
161
if(objprog != null && clusprog != null) {
162
objprog.setProcessed(noise.size(), logger);
163
clusprog.setProcessed(resultList.size(), logger);
167
165
// Finish progress logging
168
166
if(objprog != null) {
169
objprog.ensureCompleted(logger);
167
objprog.ensureCompleted(LOG);
171
169
if(clusprog != null) {
172
clusprog.setCompleted(logger);
170
clusprog.setCompleted(LOG);
175
173
Clustering<Model> result = new Clustering<Model>("DBSCAN Clustering", "dbscan-clustering");
194
192
* @param startObjectID potential seed of a new potential cluster
195
193
* @param objprog the progress object for logging the current status
197
protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
198
List<DistanceResultPair<D>> seeds = rangeQuery.getRangeForDBID(startObjectID, epsilon);
195
protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
196
DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
200
198
// startObject is no core-object
201
if(seeds.size() < minpts) {
199
if(neighbors.size() < minpts) {
202
200
noise.add(startObjectID);
203
201
processedIDs.add(startObjectID);
204
202
if(objprog != null && clusprog != null) {
205
objprog.setProcessed(processedIDs.size(), logger);
206
clusprog.setProcessed(resultList.size(), logger);
203
objprog.setProcessed(processedIDs.size(), LOG);
204
clusprog.setProcessed(resultList.size(), LOG);
211
209
// try to expand the cluster
210
HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet();
212
211
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
213
for(DistanceResultPair<D> seed : seeds) {
212
for(DBIDIter seed = neighbors.iter(); seed.valid(); seed.advance()) {
214
213
if(!processedIDs.contains(seed)) {
215
214
currentCluster.add(seed);
216
215
processedIDs.add(seed);
218
218
else if(noise.contains(seed)) {
219
219
currentCluster.add(seed);
220
220
noise.remove(seed);
223
seeds.remove(startObjectID);
225
225
while(seeds.size() > 0) {
226
DistanceResultPair<D> o = seeds.remove(0);
227
List<DistanceResultPair<D>> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
226
DBIDMIter o = seeds.iter();
227
DistanceDBIDResult<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
229
230
if(neighborhood.size() >= minpts) {
230
for(DistanceResultPair<D> neighbor : neighborhood) {
231
for(DBIDIter neighbor = neighborhood.iter(); neighbor.valid(); neighbor.advance()) {
231
232
boolean inNoise = noise.contains(neighbor);
232
233
boolean unclassified = !processedIDs.contains(neighbor);
233
234
if(inNoise || unclassified) {
250
251
if(objprog != null && clusprog != null) {
251
objprog.setProcessed(processedIDs.size(), logger);
252
objprog.setProcessed(processedIDs.size(), LOG);
252
253
int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
253
clusprog.setProcessed(numClusters, logger);
254
clusprog.setProcessed(numClusters, LOG);
256
257
if(currentCluster.size() >= minpts) {