~ubuntu-branches/ubuntu/trusty/ruby-ferret/trusty

« back to all changes in this revision

Viewing changes to ext/r_qparser.c

Committer: Bazaar Package Importer
Author(s): Antonio Terceiro
Date: 2011-07-28 00:02:49 UTC
Revision ID: james.westby@ubuntu.com-20110728000249-v0443y69ftcpxwi6

Tags: upstream-0.11.6

Import upstream version 0.11.6

files added:

CHANGELOG

MIT-LICENSE

README

Rakefile

TODO

TUTORIAL

bin/ferret-browser

ext/analysis.c

ext/analysis.h

ext/api.c

ext/api.h

ext/array.c

ext/array.h

ext/bitvector.c

ext/bitvector.h

ext/compound_io.c

ext/config.h

ext/document.c

ext/document.h

ext/except.c

ext/except.h

ext/extconf.rb

ext/ferret.c

ext/ferret.h

ext/filter.c

ext/fs_store.c

ext/global.c

ext/global.h

ext/hash.c

ext/hash.h

ext/hashset.c

ext/hashset.h

ext/header.h

ext/helper.c

ext/helper.h

ext/inc

ext/inc/lang.h

ext/inc/threading.h

ext/index.c

ext/index.h

ext/lang.h

ext/libstemmer.c

ext/libstemmer.h

ext/mempool.c

ext/mempool.h

ext/modules.h

ext/multimapper.c

ext/multimapper.h

ext/posh.c

ext/posh.h

ext/priorityqueue.c

ext/priorityqueue.h

ext/q_boolean.c

ext/q_const_score.c

ext/q_filtered_query.c

ext/q_fuzzy.c

ext/q_match_all.c

ext/q_multi_term.c

ext/q_parser.c

ext/q_phrase.c

ext/q_prefix.c

ext/q_range.c

ext/q_span.c

ext/q_term.c

ext/q_wildcard.c

ext/r_analysis.c

ext/r_index.c

ext/r_qparser.c

ext/r_search.c

ext/r_store.c

ext/r_utils.c

ext/ram_store.c

ext/search.c

ext/search.h

ext/similarity.c

ext/similarity.h

ext/sort.c

ext/stem_ISO_8859_1_danish.c

ext/stem_ISO_8859_1_danish.h

ext/stem_ISO_8859_1_dutch.c

ext/stem_ISO_8859_1_dutch.h

ext/stem_ISO_8859_1_english.c

ext/stem_ISO_8859_1_english.h

ext/stem_ISO_8859_1_finnish.c

ext/stem_ISO_8859_1_finnish.h

ext/stem_ISO_8859_1_french.c

ext/stem_ISO_8859_1_french.h

ext/stem_ISO_8859_1_german.c

ext/stem_ISO_8859_1_german.h

ext/stem_ISO_8859_1_italian.c

ext/stem_ISO_8859_1_italian.h

ext/stem_ISO_8859_1_norwegian.c

ext/stem_ISO_8859_1_norwegian.h

ext/stem_ISO_8859_1_porter.c

ext/stem_ISO_8859_1_porter.h

ext/stem_ISO_8859_1_portuguese.c

ext/stem_ISO_8859_1_portuguese.h

ext/stem_ISO_8859_1_spanish.c

ext/stem_ISO_8859_1_spanish.h

ext/stem_ISO_8859_1_swedish.c

ext/stem_ISO_8859_1_swedish.h

ext/stem_KOI8_R_russian.c

ext/stem_KOI8_R_russian.h

ext/stem_UTF_8_danish.c

ext/stem_UTF_8_danish.h

ext/stem_UTF_8_dutch.c

ext/stem_UTF_8_dutch.h

ext/stem_UTF_8_english.c

ext/stem_UTF_8_english.h

ext/stem_UTF_8_finnish.c

ext/stem_UTF_8_finnish.h

ext/stem_UTF_8_french.c

ext/stem_UTF_8_french.h

ext/stem_UTF_8_german.c

ext/stem_UTF_8_german.h

ext/stem_UTF_8_italian.c

ext/stem_UTF_8_italian.h

ext/stem_UTF_8_norwegian.c

ext/stem_UTF_8_norwegian.h

ext/stem_UTF_8_porter.c

ext/stem_UTF_8_porter.h

ext/stem_UTF_8_portuguese.c

ext/stem_UTF_8_portuguese.h

ext/stem_UTF_8_russian.c

ext/stem_UTF_8_russian.h

ext/stem_UTF_8_spanish.c

ext/stem_UTF_8_spanish.h

ext/stem_UTF_8_swedish.c

ext/stem_UTF_8_swedish.h

ext/stopwords.c

ext/store.c

ext/store.h

ext/term_vectors.c

ext/threading.h

ext/utilities.c

ext/win32.h

lib/ferret

lib/ferret.rb

lib/ferret/browser

lib/ferret/browser.rb

lib/ferret/browser/s

lib/ferret/browser/s/global.js

lib/ferret/browser/s/style.css

lib/ferret/browser/views

lib/ferret/browser/views/document

lib/ferret/browser/views/document/list.rhtml

lib/ferret/browser/views/document/show.rhtml

lib/ferret/browser/views/error

lib/ferret/browser/views/error/index.rhtml

lib/ferret/browser/views/help

lib/ferret/browser/views/help/index.rhtml

lib/ferret/browser/views/home

lib/ferret/browser/views/home/index.rhtml

lib/ferret/browser/views/layout.rhtml

lib/ferret/browser/views/term

lib/ferret/browser/views/term-vector

lib/ferret/browser/views/term-vector/index.rhtml

lib/ferret/browser/views/term/index.rhtml

lib/ferret/browser/views/term/termdocs.rhtml

lib/ferret/browser/webrick.rb

lib/ferret/document.rb

lib/ferret/field_infos.rb

lib/ferret/index.rb

lib/ferret/number_tools.rb

lib/ferret_version.rb

setup.rb

test

test/test_all.rb

test/test_helper.rb

test/threading

test/threading/number_to_spoken.rb

test/threading/thread_safety_index_test.rb

test/threading/thread_safety_read_write_test.rb

test/threading/thread_safety_test.rb

test/unit

test/unit/analysis

test/unit/analysis/tc_analyzer.rb

test/unit/analysis/tc_token_stream.rb

test/unit/index

test/unit/index/tc_index.rb

test/unit/index/tc_index_reader.rb

test/unit/index/tc_index_writer.rb

test/unit/index/th_doc.rb

test/unit/largefile

test/unit/largefile/tc_largefile.rb

test/unit/query_parser

test/unit/query_parser/tc_query_parser.rb

test/unit/search

test/unit/search/tc_filter.rb

test/unit/search/tc_fuzzy_query.rb

test/unit/search/tc_index_searcher.rb

test/unit/search/tc_multi_searcher.rb

test/unit/search/tc_multiple_search_requests.rb

test/unit/search/tc_search_and_sort.rb

test/unit/search/tc_sort.rb

test/unit/search/tc_sort_field.rb

test/unit/search/tc_spans.rb

test/unit/search/tm_searcher.rb

test/unit/store

test/unit/store/tc_fs_store.rb

test/unit/store/tc_ram_store.rb

test/unit/store/tm_store.rb

test/unit/store/tm_store_lock.rb

test/unit/tc_document.rb

test/unit/ts_analysis.rb

test/unit/ts_index.rb

test/unit/ts_largefile.rb

test/unit/ts_query_parser.rb

test/unit/ts_search.rb

test/unit/ts_store.rb

test/unit/ts_utils.rb

test/unit/utils

test/unit/utils/tc_bit_vector.rb

test/unit/utils/tc_number_tools.rb

test/unit/utils/tc_priority_queue.rb

Show diffs side-by-side

added added

removed removed

ext/r_qparser.c

#include "ferret.h"

#include "search.h"

static VALUE cQueryParser;

VALUE cQueryParseException;

extern VALUE sym_analyzer;

static VALUE sym_wild_card_downcase;

static VALUE sym_fields;

static VALUE sym_all_fields;

static VALUE sym_tkz_fields;

static VALUE sym_default_field;

static VALUE sym_validate_fields;

static VALUE sym_or_default;

static VALUE sym_default_slop;

static VALUE sym_handle_parse_errors;

static VALUE sym_clean_string;

static VALUE sym_max_clauses;

static VALUE sym_use_keywords;

extern VALUE frt_get_analyzer(Analyzer *a);

extern VALUE frt_get_q(Query *q);

extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);

/****************************************************************************

* QueryParser Methods

****************************************************************************/

static void

frt_qp_free(void *p)

{

object_del(p);

qp_destroy((QParser *)p);

}

static void

frt_qp_mark(void *p)

{

frt_gc_mark(((QParser *)p)->analyzer);

}

static HashSet *

frt_get_fields(VALUE rfields)

{

VALUE rval;

HashSet *fields;

char *s, *p, *str;

if (rfields == Qnil) return NULL;

fields = hs_new_str(&free);

if (TYPE(rfields) == T_ARRAY) {

int i;

for (i = 0; i < RARRAY(rfields)->len; i++) {

rval = rb_obj_as_string(RARRAY(rfields)->ptr[i]);

hs_add(fields, nstrdup(rval));

}

} else {

rval = rb_obj_as_string(rfields);

if (strcmp("*", rs2s(rval)) == 0) {

hs_destroy(fields);

fields = NULL;

} else {

s = str = nstrdup(rval);

while ((p = strchr(s, '|')) != '\0') {

*p = '\0';

hs_add(fields, estrdup(s));

s = p + 1;

}

hs_add(fields, estrdup(s));

free(str);

}

return fields;

}

* call-seq:

* QueryParser.new(options = {}) -> QueryParser

* Create a new QueryParser. The QueryParser is used to convert string

* queries into Query objects. The options are;

* === Options

* :default_field:: Default: "*" (all fields). The default field to

* search when no field is specified in the search

* string. It can also be an array of fields.

* :analyzer:: Default: StandardAnalyzer. Analyzer used by the

* query parser to parse query terms

* :wild_card_downcase:: Default: true. Specifies whether wild-card queries

* and range queries should be downcased or not since

* they are not passed through the parser

* :fields:: Default: []. Lets the query parser know what

* fields are available for searching, particularly

* when the "*" is specified as the search field

* :tokenized_fields:: Default: :fields. Lets the query parser know which

100

* fields are tokenized so it knows which fields to

101

* run the analyzer over.

102

* :validate_fields:: Default: false. Set to true if you want an

103

* exception to be raised if there is an attempt to

104

* search a non-existent field

105

* :or_default:: Default: true. Use "OR" as the default boolean

106

* operator

107

* :default_slop:: Default: 0. Default slop to use in PhraseQuery

108

* :handle_parse_errors:: Default: true. QueryParser will quietly handle all

109

* parsing errors internally. If you'd like to handle

110

* them yourself, set this parameter to false.

111

* :clean_string:: Default: true. QueryParser will do a quick

112

* once-over the query string make sure that quotes

113

* and brackets match up and special characters are

114

* escaped

115

* :max_clauses:: Default: 512. the maximum number of clauses

116

* allowed in boolean queries and the maximum number

117

* of terms allowed in multi, prefix, wild-card or

118

* fuzzy queries when those queries are generated by

119

* rewriting other queries

120

* :use_keywords: Default: true. By default AND, OR, NOT and REQ are

121

* keywords used by the query parser. Sometimes this

122

* is undesirable. For example, if your application

123

* allows searching for US states by their

124

* abbreviation, then OR will be a common query

125

* string. By setting :use_keywords to false, OR will

126

* no longer be a keyword allowing searches for the

127

* state of Oregon. You will still be able to use

128

* boolean queries by using the + and - characters.

129

130

static VALUE

131

frt_qp_init(int argc, VALUE *argv, VALUE self)

132

{

133

VALUE roptions = Qnil;

134

VALUE rval;

135

Analyzer *analyzer = NULL;

136

bool has_options = false;

137

138

HashSet *all_fields = NULL;

139

HashSet *tkz_fields = NULL;

140

HashSet *def_fields = NULL;

141

QParser *qp;

142

143

if (rb_scan_args(argc, argv, "01", &roptions) > 0) {

144

if (TYPE(roptions) == T_HASH) {

145

has_options = true;

146

if (Qnil != (rval = rb_hash_aref(roptions, sym_default_field))) {

147

def_fields = frt_get_fields(rval);

148

}

149

if (Qnil != (rval = rb_hash_aref(roptions, sym_analyzer))) {

150

analyzer = frt_get_cwrapped_analyzer(rval);

151

}

152

if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {

153

all_fields = frt_get_fields(rval);

154

}

155

if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {

156

all_fields = frt_get_fields(rval);

157

}

158

if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {

159

tkz_fields = frt_get_fields(rval);

160

}

161

} else {

162

def_fields = frt_get_fields(roptions);

163

roptions = Qnil;

164

}

165

}

166

if (all_fields == NULL) {

167

all_fields = hs_new_str(&free);

168

}

169

170

if (!analyzer) {

171

analyzer = mb_standard_analyzer_new(true);

172

}

173

174

qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);

175

qp->allow_any_fields = true;

176

qp->clean_str = true;

177

qp->handle_parse_errors = true;

178

/* handle options */

179

if (roptions != Qnil) {

180

if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) {

181

qp->handle_parse_errors = RTEST(rval);

182

}

183

if (Qnil != (rval = rb_hash_aref(roptions, sym_validate_fields))) {

184

qp->allow_any_fields = !RTEST(rval);

185

}

186

if (Qnil != (rval = rb_hash_aref(roptions, sym_wild_card_downcase))) {

187

qp->wild_lower = RTEST(rval);

188

}

189

if (Qnil != (rval = rb_hash_aref(roptions, sym_or_default))) {

190

qp->or_default = RTEST(rval);

191

}

192

if (Qnil != (rval = rb_hash_aref(roptions, sym_default_slop))) {

193

qp->def_slop = FIX2INT(rval);

194

}

195

if (Qnil != (rval = rb_hash_aref(roptions, sym_clean_string))) {

196

qp->clean_str = RTEST(rval);

197

}

198

if (Qnil != (rval = rb_hash_aref(roptions, sym_max_clauses))) {

199

qp->max_clauses = FIX2INT(rval);

200

}

201

if (Qnil != (rval = rb_hash_aref(roptions, sym_use_keywords))) {

202

qp->use_keywords = RTEST(rval);

203

}

204

}

205

Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp);

206

object_add(qp, self);

207

return self;

208

}

209

210

#define GET_QP QParser *qp = (QParser *)DATA_PTR(self)

211

212

* call-seq:

213

* query_parser.parse(query_string) -> Query

214

215

* Parse a query string returning a Query object if parsing was successful.

216

* Will raise a QueryParseException if unsuccessful.

217

218

static VALUE

219

frt_qp_parse(VALUE self, VALUE rstr)

220

{

221

const char *msg = NULL;

222

volatile VALUE rq;

223

GET_QP;

224

rstr = rb_obj_as_string(rstr);

225

TRY

226

rq = frt_get_q(qp_parse(qp, rs2s(rstr)));

227

break;

228

default:

229

msg = xcontext.msg;

230

HANDLED();

231

XENDTRY

232

233

if (msg) {

234

rb_raise(cQueryParseException, msg);

235

}

236

237

return rq;

238

}

239

240

241

* call-seq:

242

* query_parser.fields -> Array of Symbols

243

244

* Returns the list of all fields that the QueryParser knows about.

245

246

static VALUE

247

frt_qp_get_fields(VALUE self)

248

{

249

GET_QP;

250

int i;

251

HashSet *fields = qp->all_fields;

252

VALUE rfields = rb_ary_new();

253

254

for (i = 0; i < fields->size; i++) {

255

rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));

256

}

257

258

return rfields;

259

}

260

261

262

* call-seq:

263

* query_parser.fields = fields -> self

264

265

* Set the list of fields. These fields are expanded for searches on "*".

266

267

static VALUE

268

frt_qp_set_fields(VALUE self, VALUE rfields)

269

{

270

GET_QP;

271

HashSet *fields = frt_get_fields(rfields);

272

273

if (qp->def_fields == qp->all_fields) {

274

qp->def_fields = NULL;

275

}

276

if (fields == NULL) {

277

fields = hs_new_str(&free);

278

}

279

hs_destroy(qp->all_fields);

280

qp->all_fields = fields;

281

if (qp->def_fields == NULL) {

282

qp->def_fields = fields;

283

}

284

285

return self;

286

}

287

288

289

* call-seq:

290

* query_parser.tokenized_fields -> Array of Symbols

291

292

* Returns the list of all tokenized_fields that the QueryParser knows about.

293

294

static VALUE

295

frt_qp_get_tkz_fields(VALUE self)

296

{

297

GET_QP;

298

int i;

299

HashSet *fields = qp->tokenized_fields;

300

if (fields) {

301

VALUE rfields = rb_ary_new();

302

303

for (i = 0; i < fields->size; i++) {

304

rb_ary_push(rfields, ID2SYM(rb_intern((char *)fields->elems[i])));

305

}

306

307

return rfields;

308

}

309

else {

310

return Qnil;

311

}

312

}

313

314

315

* call-seq:

316

* query_parser.tokenized_fields = fields -> self

317

318

* Set the list of tokenized_fields. These tokenized_fields are tokenized in

319

* the queries. If this is set to Qnil then all fields will be tokenized.

320

321

static VALUE

322

frt_qp_set_tkz_fields(VALUE self, VALUE rfields)

323

{

324

GET_QP;

325

if (qp->tokenized_fields) hs_destroy(qp->tokenized_fields);

326

qp->tokenized_fields = frt_get_fields(rfields);

327

return self;

328

}

329

330

/****************************************************************************

331

332

* Init function

333

334

****************************************************************************/

335

336

/* rdoc hack

337

extern VALUE mFerret = rb_define_module("Ferret");

338

extern VALUE cQueryParser = rb_define_module_under(mFerret, "QueryParser");

339

340

341

342

* Document-class: Ferret::QueryParser::QueryParseException

343

344

* == Summary

345

346

* Exception raised when there is an error parsing the query string passed to

347

* QueryParser.

348

349

void

350

Init_QueryParseException(void)

351

{

352

cQueryParseException = rb_define_class_under(cQueryParser,

353

"QueryParseException",

354

rb_eStandardError);

355

}

356

357

358

* Document-class: Ferret::QueryParser

359

360

* == Summary

361

362

* The QueryParser is used to transform user submitted query strings into

363

* QueryObjects. Ferret using its own Query Language known from now on as

364

* Ferret Query Language or FQL.

365

366

* == Ferret Query Language

367

368

* === Preamble

369

370

* The following characters are special characters in FQL;

371

372

* :, (, ), [, ], {, }, !, +, ", ~, ^, -, |, <, >, =, *, ?, \

373

374

* If you want to use one of these characters in one of your terms you need

375

* to escape it with a \ character. \ escapes itself. The exception to this

376

* rule is within Phrases which a strings surrounded by double quotes (and

377

* will be explained further bellow in the section on PhraseQueries). In

378

* Phrases, only ", | and <> have special meaning and need to be escaped if

379

* you want the literal value. <> is escaped \<\>.

380

381

* In the following examples I have only written the query string. This would

382

* be parse like;

383

384

* query = query_parser.parse("pet:(dog AND cat)")

385

* puts query # => "+pet:dog +pet:cat"

386

387

* === TermQuery

388

389

* A term query is the most basic query of all and is what most of the other

390

* queries are built upon. The term consists of a single word. eg;

391

392

* 'term'

393

394

* Note that the analyzer will be run on the term and if it splits the term

395

* in two then it will be turned into a phrase query. For example, with the

396

* plain Ferret::Analysis::Analyzer, the following;

397

398

* 'dave12balmain'

399

400

* is equivalent to;

401

402

* '"dave balmain"'

403

404

* Which we will explain now...

405

406

* === PhraseQuery

407

408

* A phrase query is a string of terms surrounded by double quotes. For

409

* example you could write;

410

411

* '"quick brown fox"'

412

413

* But if a "fast" fox is just as good as a quick one you could use the |

414

* character to specify alternate terms.

415

416

* '"quick|speedy|fast brown fox"'

417

418

* What if we don't care what colour the fox is. We can use the <> to specify

419

* a place setter. eg;

420

421

* '"quick|speedy|fast <> fox"'

422

423

* This will match any word in between quick and fox. Alternatively we could

424

* set the "slop" for the phrase which allows a certain variation in the

425

* match of the phrase. The slop for a phrase is an integer indicating how

426

* many positions you are allowed to move the terms to get a match. Read more

427

* about the slop factor in Ferret::Search::PhraseQuery. To set the slop

428

* factor for a phrase you can type;

429

430

* '"big house"~2'

431

432

* This would match "big house", "big red house", "big red brick house" and

433

* even "house big". That's right, you don't need to have th terms in order

434

* if you allow some slop in your phrases. (See Ferret::Search::Spans if you

435

* need a phrase type query with ordered terms.)

436

437

* These basic queries will be run on the default field which is set when you

438

* create the query_parser. But what if you want to search a different field.

439

* You'll be needing a ...

440

441

* === FieldQuery

442

443

* A field query is any field prefixed by <fieldname>:. For example, to

444

* search for all instances of the term "ski" in field "sport", you'd write;

445

446

* 'sport:ski'

447

* Or we can apply a field to phrase;

448

449

* 'sport:"skiing is fun"'

450

451

* Now we have a few types of queries, we'll be needing to glue them together

452

* with a ...

453

454

* === BooleanQuery

455

456

* There are a couple of ways of writing boolean queries. Firstly you can

457

* specify which terms are required, optional or required not to exist (not).

458

459

* * '+' or "REQ" can be used to indicate a required query. "REQ" must be

460

* surrounded by white space.

461

* * '-', '!' or "NOT" are used to indicate query that is required to be

462

* false. "NOT" must be surrounded by white space.

463

* * all other queries are optional if the above symbols are used.

464

465

* Some examples;

466

467

* '+sport:ski -sport:snowboard sport:toboggan'

468

* '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'

469

470

* You may also use the boolean operators "AND", "&&", "OR" and "||". eg;

471

472

* 'sport:ski AND NOT sport:snowboard OR sport:toboggan'

473

* 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'

474

475

* You can set the default operator when you create the query parse.

476

477

* === RangeQuery

478

479

* A range query finds all documents with terms between the two query terms.

480

* This can be very useful in particular for dates. eg;

481

482

* 'date:[20050725 20050905]' # all dates >= 20050725 and <= 20050905

483

* 'date:[20050725 20050905}' # all dates >= 20050725 and < 20050905

484

* 'date:{20050725 20050905]' # all dates > 20050725 and <= 20050905

485

* 'date:{20050725 20050905}' # all dates > 20050725 and < 20050905

486

487

* You can also do open ended queries like this;

488

489

* 'date:[20050725>' # all dates >= 20050725

490

* 'date:{20050725>' # all dates > 20050725

491

* 'date:<20050905]' # all dates <= 20050905

492

* 'date:<20050905}' # all dates < 20050905

493

494

* Or like this;

495

496

* 'date: >= 20050725'

497

* 'date: > 20050725'

498

* 'date: <= 20050905'

499

* 'date: < 20050905'

500

501

* If you prefer the above style you could use a boolean query but like this;

502

503

* 'date:( >= 20050725 AND <= 20050905)'

504

505

* But rangequery only solution shown first will be faster.

506

507

* === WildQuery

508

509

* A wild query is a query using the pattern matching characters * and ?. *

510

* matches 0 or more characters while ? matches a single character. This type

511

* of query can be really useful for matching hierarchical categories for

512

* example. Let's say we had this structure;

513

514

* /sport/skiing

515

* /sport/cycling

516

* /coding1/ruby

517

* /coding1/c

518

* /coding2/python

519

* /coding2/perl

520

521

* If you wanted all categories with programming languages you could use the

522

* query;

523

524

* 'category:/coding?/?*'

525

526

* Note that this query can be quite expensive if not used carefully. In the

527

* example above there would be no problem but you should be careful not use

528

* the wild characters at the beginning of the query as it'll have to iterate

529

* through every term in that field. Having said that, some fields like the

530

* category field above will only have a small number of distinct fields so

531

* this could be okay.

532

533

* === FuzzyQuery

534

535

* This is like the sloppy phrase query above, except you are now adding slop

536

* to a term. Basically it measures the Levenshtein distance between two

537

* terms and if the value is below the slop threshold the term is a match.

538

* This time though the slop must be a float between 0 and 1.0, 1.0 being a

539

* perfect match and 0 being far from a match. The default is set to 0.5 so

540

* you don't need to give a slop value if you don't want to. You can set the

541

* default in the Ferret::Search::FuzzyQuery class. Here are a couple of

542

* examples;

543

544

* 'content:ferret~'

545

* 'content:Ostralya~0.4'

546

547

* Note that this query can be quite expensive. If you'd like to use this

548

* query, you may want to set a minimum prefix length in the FuzzyQuery

549

* class. This can substantially reduce the number of terms that the query

550

* will iterate over.

551

552

553

void

554

Init_QueryParser(void)

555

{

556

/* hash keys */

557

sym_wild_card_downcase = ID2SYM(rb_intern("wild_card_downcase"));

558

sym_fields = ID2SYM(rb_intern("fields"));

559

sym_all_fields = ID2SYM(rb_intern("all_fields"));

560

sym_tkz_fields = ID2SYM(rb_intern("tokenized_fields"));

561

sym_default_field = ID2SYM(rb_intern("default_field"));

562

sym_validate_fields = ID2SYM(rb_intern("validate_fields"));

563

sym_or_default = ID2SYM(rb_intern("or_default"));

564

sym_default_slop = ID2SYM(rb_intern("default_slop"));

565

sym_handle_parse_errors = ID2SYM(rb_intern("handle_parse_errors"));

566

sym_clean_string = ID2SYM(rb_intern("clean_string"));

567

sym_max_clauses = ID2SYM(rb_intern("max_clauses"));

568

sym_use_keywords = ID2SYM(rb_intern("use_keywords"));

569

570

/* QueryParser */

571

cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);

572

rb_define_alloc_func(cQueryParser, frt_data_alloc);

573

574

rb_define_method(cQueryParser, "initialize", frt_qp_init, -1);

575

rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);

576

rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);

577

rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);

578

rb_define_method(cQueryParser, "tokenized_fields",

579

frt_qp_get_tkz_fields, 0);

580

rb_define_method(cQueryParser, "tokenized_fields=",

581

frt_qp_set_tkz_fields, 1);

582

583

Init_QueryParseException();

584

}

585

Older »