181
180
QStringList identifyingProperties;
182
181
QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
184
183
QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
185
184
QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
185
QList<Soprano::Node> requiredTypes;
186
186
for( ; it != constEnd; it++ ) {
187
187
const QUrl & prop = it.key();
189
189
// Special handling for rdf:type
190
190
if( prop == RDF::type() ) {
191
query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() );
191
requiredTypes << it.value().uri();
235
239
QString::number( numIdentifyingProperties++ ) );
238
// Make sure atleast one of the identification properties has been matched
242
// Make sure at least one of the identification properties has been matched
239
243
// by adding filter( bound(?o1) || bound(?o2) ... )
240
244
query += QString::fromLatin1("filter( ");
241
245
for( int i=0; i<numIdentifyingProperties-1; i++ ) {
247
251
query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
248
252
identifyingPropertiesHash.constBegin().value().toN3());
250
query += QLatin1String("}");
252
// Construct the entire query
253
QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt "
254
"where { ?r ?p ?o. filter( ?p in (%1) ).")
255
.arg( identifyingProperties.join(",") );
257
query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
262
// Only store the results which have the maximum score
256
// For performance reasons we add a limit even though this could mean that we
257
// miss a resource to identify since we check the types below.
259
query += QLatin1String("} LIMIT 100");
263
// Fetch a score for each result.
264
// We do this in a separate query for performance reasons.
266
QMultiHash<int, KUrl> resultsScoreHash;
266
268
Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
267
269
while( qit.next() ) {
268
//kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
270
int count = qit["cnt"].literal().toInt();
274
else if( count < score )
277
results << qit["r"].uri();
270
const Soprano::Node r(qit["r"]);
273
// Check the type requirements. Experiments have shown this to mean a substantial
274
// performance boost as compared to doing it in the main query.
276
if(!requiredTypes.isEmpty() ) {
277
query = QLatin1String("ask where { ");
278
foreach(const Soprano::Node& type, requiredTypes) {
279
query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
281
query += QLatin1String("}");
282
if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
288
const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
289
"%1 ?p ?o. filter( ?p in (%2) ) . }")
291
identifyingProperties.join(",") ),
292
Soprano::Query::QueryLanguageSparql)
293
.allBindings().first()["cnt"].literal().toInt();
295
if( maxScore < score ) {
299
resultsScoreHash.insert(score, r.uri());
303
// Only get the results which have the maximum score
305
QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
280
308
//kDebug() << "Got " << results.size() << " results";
281
309
if( results.empty() )
285
if( results.size() == 1 )
313
if( results.size() == 1 ) {
286
314
newUri = *results.begin();
288
317
kDebug() << "DUPLICATE RESULTS!";
289
318
newUri = duplicateMatch( res.uri(), results );