~slub.team/goobi-indexserver/3.x

* <pre class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&extendedResults=true&q=facial+salophosphoprotein</pre>

150

* <pre class="code">

151

<?xml version="1.0" encoding="UTF-8"?>

152

153

154

155

156

157

</lst>

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

</lst>

176

</lst>

177

178

179

180

181

182

183

184

</lst>

185

</lst>

186

</lst>

187

</response> </pre>

188

189

190

* @see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">The Lucene Spellchecker documentation</a>

191

192

193

* @deprecated Use {@link org.apache.solr.handler.component.SpellCheckComponent} instead.

194

195

* See also https://issues.apache.org/jira/browse/SOLR-474 and https://issues.apache.org/jira/browse/SOLR-485

196

197

198

@Deprecated

199

public class SpellCheckerRequestHandler extends RequestHandlerBase implements SolrCoreAware {

200

201

private static Logger log = LoggerFactory.getLogger(SpellCheckerRequestHandler.class);

202

203

private SpellChecker spellChecker;

204

205

206

* From http://wiki.apache.org/jakarta-lucene/SpellChecker

207

* If reader and restrictToField are both not null:

208

* 1. The returned words are restricted only to the words presents in the field

209

* "restrictToField "of the Lucene Index "reader".

210

211

* 2. The list is also sorted with a second criterium: the popularity (the

212

* frequence) of the word in the user field.

213

214

* 3. If "onlyMorePopular" is true and the mispelled word exist in the user field,

215

* return only the words more frequent than this.

216

217

218

219

protected Directory spellcheckerIndexDir = new RAMDirectory();

220

protected String dirDescription = "(ramdir)";

221

protected String termSourceField;

222

223

protected static final String PREFIX = "sp.";

224

protected static final String QUERY_PREFIX = PREFIX + "query.";

225

protected static final String DICTIONARY_PREFIX = PREFIX + "dictionary.";

226

227

protected static final String SOURCE_FIELD = DICTIONARY_PREFIX + "termSourceField";

228

protected static final String INDEX_DIR = DICTIONARY_PREFIX + "indexDir";

229

protected static final String THRESHOLD = DICTIONARY_PREFIX + "threshold";

230

231

protected static final String ACCURACY = QUERY_PREFIX + "accuracy";

232

protected static final String SUGGESTIONS = QUERY_PREFIX + "suggestionCount";

233

protected static final String POPULAR = QUERY_PREFIX + "onlyMorePopular";

234

protected static final String EXTENDED = QUERY_PREFIX + "extendedResults";

235

236

protected static final float DEFAULT_ACCURACY = 0.5f;

237

protected static final int DEFAULT_SUGGESTION_COUNT = 1;

238

protected static final boolean DEFAULT_MORE_POPULAR = false;

239

protected static final boolean DEFAULT_EXTENDED_RESULTS = false;

240

protected static final float DEFAULT_DICTIONARY_THRESHOLD = 0.0f;

241

242

protected SolrParams args = null;

243

244

@Override

245

public void init(NamedList args) {

246

super.init(args);

247

this.args = SolrParams.toSolrParams(args);

248

}

249

250

public void inform(SolrCore core)

251

{

252

termSourceField = args.get(SOURCE_FIELD, args.get("termSourceField"));

253

try {

254

String dir = args.get(INDEX_DIR, args.get("spellcheckerIndexDir"));

255

if (null != dir) {

256

File f = new File(dir);

257

if ( ! f.isAbsolute() ) {

258

f = new File(core.getDataDir(), dir);

259

}

260

dirDescription = f.getAbsolutePath();

261

log.info("using spell directory: " + dirDescription);

262

spellcheckerIndexDir = FSDirectory.open(f);

263

} else {

264

log.info("using RAM based spell directory");

265

}

266

spellChecker = new SpellChecker(spellcheckerIndexDir);

267

} catch (IOException e) {

268

throw new RuntimeException("Cannot open SpellChecker index", e);

269

}

270

}

271

272

/**

273

* Processes the following query string parameters: q, extendedResults, cmd rebuild,

274

* cmd reopen, accuracy, suggestionCount, restrictToField, and onlyMorePopular.

275

276

@Override

277

public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)

278

throws Exception {

279

SolrParams p = req.getParams();

280

String words = p.get("q");

281

String cmd = p.get("cmd");

282

if (cmd != null) {

283

cmd = cmd.trim();

284

if (cmd.equals("rebuild")) {

285

rebuild(req);

286

rsp.add("cmdExecuted","rebuild");

287

} else if (cmd.equals("reopen")) {

288

reopen();

289

rsp.add("cmdExecuted","reopen");

290

} else {

291

throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unrecognized Command: " + cmd);

292

}

293

}

294

295

// empty query string

296

if (null == words || "".equals(words.trim())) {

297

return;

298

}

299

300

IndexReader indexReader = null;

301

String suggestionField = null;

302

Float accuracy;

303

int numSug;

304

boolean onlyMorePopular;

305

boolean extendedResults;

306

try {

307

accuracy = p.getFloat(ACCURACY, p.getFloat("accuracy", DEFAULT_ACCURACY));

308

spellChecker.setAccuracy(accuracy);

309

} catch (NumberFormatException e) {

310

throw new RuntimeException("Accuracy must be a valid positive float", e);

311

}

312

try {

313

numSug = p.getInt(SUGGESTIONS, p.getInt("suggestionCount", DEFAULT_SUGGESTION_COUNT));

314

} catch (NumberFormatException e) {

315

throw new RuntimeException("Spelling suggestion count must be a valid positive integer", e);

316

}

317

try {

318

onlyMorePopular = p.getBool(POPULAR, DEFAULT_MORE_POPULAR);

319

} catch (SolrException e) {

320

throw new RuntimeException("'Only more popular' must be a valid boolean", e);

321

}

322

try {

323

extendedResults = p.getBool(EXTENDED, DEFAULT_EXTENDED_RESULTS);

324

} catch (SolrException e) {

325

throw new RuntimeException("'Extended results' must be a valid boolean", e);

326

}

327

328

// when searching for more popular, a non null index-reader and

329

// restricted-field are required

330

if (onlyMorePopular || extendedResults) {

331

indexReader = req.getSearcher().getReader();

332

suggestionField = termSourceField;

333

}

334

335

if (extendedResults) {

336

337

rsp.add("numDocs", indexReader.numDocs());

338

339

SimpleOrderedMap<Object> results = new SimpleOrderedMap<Object>();

340

String[] wordz = words.split(" ");

341

for (String word : wordz)

342

{

343

SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();

344

nl.add("frequency", indexReader.docFreq(new Term(suggestionField, word)));

345

String[] suggestions =

346

spellChecker.suggestSimilar(word, numSug,

347

indexReader, suggestionField, onlyMorePopular);

348

349

// suggestion array

350

NamedList<Object> sa = new NamedList<Object>();

351

for (int i=0; i<suggestions.length; i++) {

352

// suggestion item

353

SimpleOrderedMap<Object> si = new SimpleOrderedMap<Object>();

354

si.add("frequency", indexReader.docFreq(new Term(termSourceField, suggestions[i])));

355

sa.add(suggestions[i], si);

356

}

357

nl.add("suggestions", sa);

358

results.add(word, nl);

359

}

360

rsp.add( "result", results );

361

362

} else {

363

rsp.add("words", words);

364

if (spellChecker.exist(words)) {

365

rsp.add("exist","true");

366

} else {

367

rsp.add("exist","false");

368

}

369

String[] suggestions =

370

spellChecker.suggestSimilar(words, numSug,

371

indexReader, suggestionField,

372

onlyMorePopular);

373

374

rsp.add("suggestions", Arrays.asList(suggestions));

375

}

376

}

377

378

/** Returns a dictionary to be used when building the spell-checker index.

379

* Override the method for custom dictionary

380

381

protected Dictionary getDictionary(SolrQueryRequest req) {

382

float threshold;

383

try {

384

threshold = req.getParams().getFloat(THRESHOLD, DEFAULT_DICTIONARY_THRESHOLD);

385

} catch (NumberFormatException e) {

386

throw new RuntimeException("Threshold must be a valid positive float", e);

387

}

388

IndexReader indexReader = req.getSearcher().getReader();

389

return new HighFrequencyDictionary(indexReader, termSourceField, threshold);

390

}

391

392

/** Rebuilds the SpellChecker index using values from the <code>termSourceField</code> from the

393

* index pointed to by the current {@link IndexSearcher}.

394

* Any word appearing in less that thresh documents will not be added to the spellcheck index.

395

396

private void rebuild(SolrQueryRequest req) throws IOException, SolrException {

397

if (null == termSourceField) {

398

throw new SolrException

399

(SolrException.ErrorCode.SERVER_ERROR, "can't rebuild spellchecker index without termSourceField configured");

400

}

401

402

Dictionary dictionary = getDictionary(req);

403

spellChecker.clearIndex();

404

spellChecker.indexDictionary(dictionary, new IndexWriterConfig(req.getCore().getSolrConfig().luceneMatchVersion, null), false);

405

reopen();

406

}

407

408

/**

409

* Reopens the SpellChecker index directory.

410

* Useful if an external process is responsible for building

411

* the spell checker index.

412

413

private void reopen() throws IOException {

414

spellChecker.setSpellIndex(spellcheckerIndexDir);

415

}

416

417

//////////////////////// SolrInfoMBeans methods //////////////////////

418

419

@Override

420

public String getVersion() {

421

return "$Revision: 1197478 $";

422

}

423

424

@Override

425

public String getDescription() {

426

return "The SpellChecker Solr request handler for SpellChecker index: " + dirDescription;

427

}

428

429

@Override

430

public String getSourceId() {

431

return "$Id: SpellCheckerRequestHandler.java 1197478 2011-11-04 10:10:03Z rmuir $";

432

}

433

434

@Override

435

public String getSource() {

436

return "$URL: http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_5_0/solr/core/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java $";

437

}

438

439

@Override

440

public URL[] getDocs() {

441

return null;

442

}

443

}

Older »