~ubuntu-branches/ubuntu/wily/recoll/wily

Committer: Package Import Robot
Author(s): Kartik Mistry
Date: 2015-08-03 14:16:32 UTC
mfrom: (33.1.1 sid)
Revision ID: package-import@ubuntu.com-20150803141632-w5a1cr8ub2rkyvfe

Tags: 1.21.0-1

http://bugs.debian.org/793636

* New upstream release.
* debian/control:
  + Build-depend on python3-dev, python-dev, not python3-all-dev and
    python-all-dev. Thanks to Steve Langasek for patch. (Closes: #793636)
  + Added Build-depends on bison.

files added:
filters/rcl7z

filters/rclcheckneedretry.sh

index/checkretryfailed.cpp

index/checkretryfailed.h

qtgui/i18n/recoll_da.qm

qtgui/i18n/recoll_da.ts

qtgui/rclm_idx.cpp

qtgui/rclm_preview.cpp

qtgui/rclm_saveload.cpp

qtgui/rclm_view.cpp

qtgui/rclm_wins.cpp

qtgui/xmltosd.cpp

qtgui/xmltosd.h

query/wasaparse.cpp

query/wasaparse.y

query/wasaparserdriver.h

rcldb/searchdatatox.cpp

utils/md5ut.cpp

utils/md5ut.h

files removed:
query/wasastringtoquery.cpp

query/wasastringtoquery.h

query/wasatorcl.cpp

files modified:
.pc/fix-python-install.patch/recollinstall.in

INSTALL

Makefile.in

README

VERSION

common/autoconfig.h.in

common/rclconfig.cpp

common/rclconfig.h

common/rclinit.cpp

common/textsplit.h

configure

configure.ac

debian/changelog

debian/control

doc/man/recollindex.1

doc/man/recollq.1

doc/user/RCL.INDEXING.CONFIG.html

doc/user/RCL.INDEXING.EXTATTR.html

doc/user/RCL.INDEXING.MONITOR.html

doc/user/RCL.INDEXING.PERIODIC.html

doc/user/RCL.INDEXING.STORAGE.html

doc/user/RCL.INDEXING.html

doc/user/RCL.INSTALL.BUILDING.html

doc/user/RCL.INSTALL.CONFIG.html

doc/user/RCL.INSTALL.html

doc/user/RCL.PROGRAM.FIELDS.html

doc/user/RCL.PROGRAM.html

doc/user/RCL.SEARCH.LANG.html

doc/user/RCL.SEARCH.PTRANS.html

doc/user/RCL.SEARCH.html

doc/user/index.html

doc/user/usermanual.html

doc/user/usermanual.xml

filters/rcldoc

filters/rclokulnote

filters/rclpdf

filters/rclscribus

index/Makefile

index/beaglequeue.cpp

index/fsindexer.cpp

index/fsindexer.h

index/indexer.cpp

index/indexer.h

index/mimetype.cpp

index/recollindex.cpp

internfile/mh_exec.cpp

internfile/mh_execm.cpp

internfile/mh_html.cpp

internfile/mh_mail.cpp

internfile/mh_mbox.cpp

internfile/mh_text.cpp

internfile/mimehandler.cpp

internfile/uncomp.cpp

kde/kioslave/kio_recoll/dirif.cpp

kde/kioslave/kio_recoll/htmlif.cpp

kde/kioslave/kio_recoll/kio_recoll.cpp

kde/kioslave/kio_recoll/kio_recoll.h

lib/mkMake.in

php/recoll/recoll.cpp

python/recoll/pyrecoll.cpp

python/recoll/setup.py.in

qtgui/advsearch_w.cpp

qtgui/advsearch_w.h

qtgui/advshist.cpp

qtgui/advshist.h

qtgui/confgui/confgui.cpp

qtgui/confgui/confguiindex.cpp

qtgui/crontool.cpp

qtgui/fragbuts.cpp

qtgui/guiutils.cpp

qtgui/guiutils.h

qtgui/i18n/recoll_cs.qm

qtgui/i18n/recoll_cs.ts

qtgui/i18n/recoll_de.qm

qtgui/i18n/recoll_de.ts

qtgui/i18n/recoll_el.qm

qtgui/i18n/recoll_el.ts

qtgui/i18n/recoll_es.qm

qtgui/i18n/recoll_es.ts

qtgui/i18n/recoll_fr.qm

qtgui/i18n/recoll_fr.ts

qtgui/i18n/recoll_it.qm

qtgui/i18n/recoll_it.ts

qtgui/i18n/recoll_lt.qm

qtgui/i18n/recoll_lt.ts

qtgui/i18n/recoll_ru.qm

qtgui/i18n/recoll_ru.ts

qtgui/i18n/recoll_tr.qm

qtgui/i18n/recoll_tr.ts

qtgui/i18n/recoll_uk.qm

qtgui/i18n/recoll_uk.ts

qtgui/i18n/recoll_xx.ts

qtgui/i18n/recoll_zh.qm

qtgui/i18n/recoll_zh.ts

qtgui/i18n/recoll_zh_CN.qm

qtgui/i18n/recoll_zh_CN.ts

qtgui/main.cpp

qtgui/multisave.cpp

qtgui/preview_w.cpp

qtgui/preview_w.h

qtgui/rclmain.ui

qtgui/rclmain_w.cpp

qtgui/rclmain_w.h

qtgui/recoll.pro.in

qtgui/reslist.cpp

qtgui/reslist.h

qtgui/respopup.cpp

qtgui/restable.cpp

qtgui/snippets_w.cpp

qtgui/spell_w.cpp

qtgui/ssearch_w.cpp

qtgui/ssearch_w.h

qtgui/uiprefs.ui

qtgui/uiprefs_w.cpp

qtgui/viewaction_w.cpp

query/Makefile

query/docseqdb.h

query/docseqhist.h

query/recollq.cpp

query/reslistpager.cpp

query/wasatorcl.h

rcldb/rcldb.cpp

rcldb/rcldb.h

rcldb/rcldups.cpp

rcldb/searchdata.cpp

rcldb/searchdata.h

rcldb/termproc.h

recollinstall.in

sampleconf/mimeconf

sampleconf/mimemap

sampleconf/mimeview

sampleconf/mimeview.mac

sampleconf/recoll.conf.in

utils/Makefile

utils/circache.cpp

utils/closefrom.cpp

utils/closefrom.h

utils/conftree.h

utils/debuglog.cpp

utils/execmd.cpp

utils/execmd.h

utils/md5.cpp

utils/md5.h

utils/netcon.cpp

utils/netcon.h

utils/pathut.cpp

utils/pathut.h

utils/pxattr.cpp

utils/readfile.cpp

utils/refcntr.h

Show diffs side-by-side

added added

removed removed

rcldb/searchdata.cpp

#include <vector>

#include <algorithm>

#include <sstream>

#include <iostream>

using namespace std;

#include "xapian.h"

typedef vector<SearchDataClause *>::iterator qlist_it_t;

typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;

static const int original_term_wqf_booster = 10;

void SearchData::commoninit()

{

m_haveDates = false;

{

LOGDEB0(("SearchData::~SearchData\n"));

for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)

delete *it;

}

// Expand categories and mime type wild card exps Categories are

// expanded against the configuration, mimetypes against the index

// (for wildcards).

bool SearchData::expandFileTypes(Db &db, vector<string>& tps)

{

const RclConfig *cfg = db.getConf();

if (!cfg) {

LOGFATAL(("Db::expandFileTypes: null configuration!!\n"));

return false;

}

vector<string> exptps;

for (vector<string>::iterator it = tps.begin(); it != tps.end(); it++) {

if (cfg->isMimeCategory(*it)) {

vector<string>tps;

cfg->getMimeCatTypes(*it, tps);

exptps.insert(exptps.end(), tps.begin(), tps.end());

} else {

TermMatchResult res;

string mt = stringtolower((const string&)*it);

// We set casesens|diacsens to get an equivalent of ixTermMatch()

db.termMatch(Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(),

mt, res, -1, "mtype");

100

if (res.entries.empty()) {

101

exptps.push_back(it->c_str());

102

} else {

103

for (vector<TermMatchEntry>::const_iterator rit =

104

res.entries.begin(); rit != res.entries.end(); rit++) {

105

exptps.push_back(strip_prefix(rit->term));

106

}

107

}

108

}

109

}

110

sort(exptps.begin(), exptps.end());

111

exptps.erase(unique(exptps.begin(), exptps.end()), exptps.end());

112

113

tps = exptps;

114

return true;

115

}

116

117

static const char *maxXapClauseMsg =

118

"Maximum Xapian query size exceeded. Increase maxXapianClauses "

119

"in the configuration. ";

120

static const char *maxXapClauseCaseDiacMsg =

121

"Or try to use case (C) or diacritics (D) sensitivity qualifiers, or less "

122

"wildcards ?"

123

;

124

125

bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,

126

vector<SearchDataClause*>& query,

127

string& reason, void *d)

128

{

129

Xapian::Query xq;

130

for (qlist_it_t it = query.begin(); it != query.end(); it++) {

131

Xapian::Query nq;

132

if (!(*it)->toNativeQuery(db, &nq)) {

133

LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",

134

(*it)->getReason().c_str()));

135

reason += (*it)->getReason() + " ";

136

return false;

137

}

138

if (nq.empty()) {

139

LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));

140

continue;

141

}

142

// If this structure is an AND list, must use AND_NOT for excl clauses.

143

// Else this is an OR list, and there can't be excl clauses (checked by

144

// addClause())

145

Xapian::Query::op op;

146

if (tp == SCLT_AND) {

147

if ((*it)->getexclude()) {

148

op = Xapian::Query::OP_AND_NOT;

149

} else {

150

op = Xapian::Query::OP_AND;

151

}

152

} else {

153

op = Xapian::Query::OP_OR;

154

}

155

if (xq.empty()) {

156

if (op == Xapian::Query::OP_AND_NOT)

157

xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);

158

else

159

xq = nq;

160

} else {

161

xq = Xapian::Query(op, xq, nq);

162

}

163

if (int(xq.get_length()) >= getMaxCl()) {

164

LOGERR(("%s\n", maxXapClauseMsg));

165

m_reason += maxXapClauseMsg;

166

if (!o_index_stripchars)

167

m_reason += maxXapClauseCaseDiacMsg;

168

return false;

169

}

170

}

171

172

LOGDEB0(("SearchData::clausesToQuery: got %d clauses\n", xq.get_length()));

173

174

if (xq.empty())

175

xq = Xapian::Query::MatchAll;

176

177

*((Xapian::Query *)d) = xq;

178

return true;

179

}

180

181

bool SearchData::toNativeQuery(Rcl::Db &db, void *d)

182

{

183

LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));

184

m_reason.erase();

185

186

db.getConf()->getConfParam("maxTermExpand", &m_maxexp);

187

db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);

188

189

// Walk the clause list translating each in turn and building the

190

// Xapian query tree

191

Xapian::Query xq;

192

if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {

193

LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n",

194

m_reason.c_str()));

195

return false;

196

}

197

198

if (m_haveDates) {

199

// If one of the extremities is unset, compute db extremas

200

if (m_dates.y1 == 0 || m_dates.y2 == 0) {

201

int minyear = 1970, maxyear = 2100;

202

if (!db.maxYearSpan(&minyear, &maxyear)) {

203

LOGERR(("Can't retrieve index min/max dates\n"));

204

//whatever, go on.

205

}

206

207

if (m_dates.y1 == 0) {

208

m_dates.y1 = minyear;

209

m_dates.m1 = 1;

210

m_dates.d1 = 1;

211

}

212

if (m_dates.y2 == 0) {

213

m_dates.y2 = maxyear;

214

m_dates.m2 = 12;

215

m_dates.d2 = 31;

216

}

217

}

218

LOGDEB(("Db::toNativeQuery: date interval: %d-%d-%d/%d-%d-%d\n",

219

m_dates.y1, m_dates.m1, m_dates.d1,

220

m_dates.y2, m_dates.m2, m_dates.d2));

221

Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,

222

m_dates.y2, m_dates.m2, m_dates.d2);

223

if (dq.empty()) {

224

LOGINFO(("Db::toNativeQuery: date filter is empty\n"));

225

}

226

// If no probabilistic query is provided then promote the daterange

227

// filter to be THE query instead of filtering an empty query.

228

if (xq.empty()) {

229

LOGINFO(("Db::toNativeQuery: proba query is empty\n"));

230

xq = dq;

231

} else {

232

xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);

233

}

234

}

235

236

237

if (m_minSize != size_t(-1) || m_maxSize != size_t(-1)) {

238

Xapian::Query sq;

239

char min[50], max[50];

240

sprintf(min, "%lld", (long long)m_minSize);

241

sprintf(max, "%lld", (long long)m_maxSize);

242

if (m_minSize == size_t(-1)) {

243

string value(max);

244

leftzeropad(value, 12);

245

sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);

246

} else if (m_maxSize == size_t(-1)) {

247

string value(min);

248

leftzeropad(value, 12);

249

sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);

250

} else {

251

string minvalue(min);

252

leftzeropad(minvalue, 12);

253

string maxvalue(max);

254

leftzeropad(maxvalue, 12);

255

sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE,

256

minvalue, maxvalue);

257

}

258

259

// If no probabilistic query is provided then promote the

260

// filter to be THE query instead of filtering an empty query.

261

if (xq.empty()) {

262

LOGINFO(("Db::toNativeQuery: proba query is empty\n"));

263

xq = sq;

264

} else {

265

xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);

266

}

267

}

268

269

// Add the autophrase if any

270

if (m_autophrase.isNotNull()) {

271

Xapian::Query apq;

272

if (m_autophrase->toNativeQuery(db, &apq)) {

273

xq = xq.empty() ? apq :

274

Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);

275

}

276

}

277

278

// Add the file type filtering clause if any

279

if (!m_filetypes.empty()) {

280

expandFileTypes(db, m_filetypes);

281

282

Xapian::Query tq;

283

for (vector<string>::iterator it = m_filetypes.begin();

284

it != m_filetypes.end(); it++) {

285

string term = wrap_prefix(mimetype_prefix) + *it;

286

LOGDEB0(("Adding file type term: [%s]\n", term.c_str()));

287

tq = tq.empty() ? Xapian::Query(term) :

288

Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));

289

}

290

xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);

291

}

292

293

// Add the neg file type filtering clause if any

294

if (!m_nfiletypes.empty()) {

295

expandFileTypes(db, m_nfiletypes);

296

297

Xapian::Query tq;

298

for (vector<string>::iterator it = m_nfiletypes.begin();

299

it != m_nfiletypes.end(); it++) {

300

string term = wrap_prefix(mimetype_prefix) + *it;

301

LOGDEB0(("Adding negative file type term: [%s]\n", term.c_str()));

302

tq = tq.empty() ? Xapian::Query(term) :

303

Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));

304

}

305

xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);

306

}

307

308

*((Xapian::Query *)d) = xq;

309

return true;

delete *it;

310

}

311

312

// This is called by the GUI simple search if the option is set: add

316

bool SearchData::maybeAddAutoPhrase(Rcl::Db& db, double freqThreshold)

317

{

318

LOGDEB0(("SearchData::maybeAddAutoPhrase()\n"));

// cerr << "BEFORE SIMPLIFY\n"; dump(cerr);

simplify();

// cerr << "AFTER SIMPLIFY\n"; dump(cerr);

319

if (!m_query.size()) {

320

LOGDEB2(("SearchData::maybeAddAutoPhrase: empty query\n"));

321

return false;

LOGDEB2(("SearchData::maybeAddAutoPhrase: empty query\n"));

return false;

322

}

323

324

string field;

326

// Walk the clause list. If we find any non simple clause or different

327

// field names, bail out.

328

for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {

329

SClType tp = (*it)->m_tp;

330

if (tp != SCLT_AND && tp != SCLT_OR) {

331

LOGDEB2(("SearchData::maybeAddAutoPhrase: rejected clause\n"));

332

return false;

333

}

334

SearchDataClauseSimple *clp =

335

dynamic_cast<SearchDataClauseSimple*>(*it);

336

if (clp == 0) {

337

LOGDEB2(("SearchData::maybeAddAutoPhrase: dyncast failed\n"));

338

return false;

339

}

340

if (it == m_query.begin()) {

341

field = clp->getfield();

342

} else {

343

if (clp->getfield().compare(field)) {

344

LOGDEB2(("SearchData::maybeAddAutoPhrase: diff. fields\n"));

345

return false;

346

}

347

}

348

349

// If there are wildcards or quotes in there, bail out

350

if (clp->gettext().find_first_of("\"*[?") != string::npos) {

351

LOGDEB2(("SearchData::maybeAddAutoPhrase: wildcards\n"));

352

return false;

353

}

354

// Do a simple word-split here, don't bother with the full-blown

355

// textsplit. The autophrase thing is just "best effort", it's

356

// normal that it won't work in strange cases.

357

vector<string> wl;

358

stringToStrings(clp->gettext(), wl);

359

words.insert(words.end(), wl.begin(), wl.end());

SClType tp = (*it)->m_tp;

if (tp != SCLT_AND && tp != SCLT_OR) {

LOGDEB2(("SearchData::maybeAddAutoPhrase: wrong tp %d\n", tp));

100

return false;

101

}

102

SearchDataClauseSimple *clp =

103

dynamic_cast<SearchDataClauseSimple*>(*it);

104

if (clp == 0) {

105

LOGDEB2(("SearchData::maybeAddAutoPhrase: dyncast failed\n"));

106

return false;

107

}

108

if (it == m_query.begin()) {

109

field = clp->getfield();

110

} else {

111

if (clp->getfield().compare(field)) {

112

LOGDEB2(("SearchData::maybeAddAutoPhrase: diff. fields\n"));

113

return false;

114

}

115

}

116

117

// If there are wildcards or quotes in there, bail out

118

if (clp->gettext().find_first_of("\"*[?") != string::npos) {

119

LOGDEB2(("SearchData::maybeAddAutoPhrase: wildcards\n"));

120

return false;

121

}

122

123

// Do a simple word-split here, not the full-blown

124

// textsplit. Spans of stopwords should not be trimmed later

125

// in this function, they will be properly split when the

126

// phrase gets processed by toNativeQuery() later on.

127

vector<string> wl;

128

stringToStrings(clp->gettext(), wl);

129

words.insert(words.end(), wl.begin(), wl.end());

360

130

}

361

131

362

132

365

135

int slack = 0;

366

136

int doccnt = db.docCnt();

367

137

if (!doccnt)

368

doccnt = 1;

138

doccnt = 1;

369

139

string swords;

370

140

for (vector<string>::iterator it = words.begin();

371

it != words.end(); it++) {

372

double freq = double(db.termDocCnt(*it)) / doccnt;

373

if (freq < freqThreshold) {

374

if (!swords.empty())

375

swords.append(1, ' ');

376

swords += *it;

377

} else {

378

LOGDEB0(("Autophrase: [%s] too frequent (%.2f %%)\n",

379

it->c_str(), 100 * freq));

380

slack++;

381

}

141

it != words.end(); it++) {

142

double freq = double(db.termDocCnt(*it)) / doccnt;

143

if (freq < freqThreshold) {

144

if (!swords.empty())

145

swords.append(1, ' ');

146

swords += *it;

147

} else {

148

LOGDEB0(("SearchData::Autophrase: [%s] too frequent (%.2f %%)\n",

149

it->c_str(), 100 * freq));

150

slack++;

151

}

382

152

}

383

153

384

154

// We can't make a phrase with a single word :)

385

155

int nwords = TextSplit::countWords(swords);

386

156

if (nwords <= 1) {

387

LOGDEB2(("SearchData::maybeAddAutoPhrase: ended with 1 word\n"));

388

return false;

157

LOGDEB2(("SearchData::maybeAddAutoPhrase: ended with 1 word\n"));

158

return false;

389

159

}

390

160

391

161

// Increase the slack: we want to be a little more laxist than for

393

163

slack += 1 + nwords / 3;

394

164

395

165

m_autophrase = RefCntr<SearchDataClauseDist>(

396

new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field));

166

new SearchDataClauseDist(SCLT_PHRASE, swords, slack, field));

397

167

return true;

398

168

}

399

169

401

171

bool SearchData::addClause(SearchDataClause* cl)

402

172

{

403

173

if (m_tp == SCLT_OR && cl->getexclude()) {

404

LOGERR(("SearchData::addClause: cant add EXCL to OR list\n"));

405

m_reason = "No Negative (AND_NOT) clauses allowed in OR queries";

406

return false;

174

LOGERR(("SearchData::addClause: cant add EXCL to OR list\n"));

175

m_reason = "No Negative (AND_NOT) clauses allowed in OR queries";

176

return false;

407

177

}

408

178

cl->setParent(this);

409

179

m_haveWildCards = m_haveWildCards || cl->m_haveWildCards;

415

185

bool SearchData::fileNameOnly()

416

186

{

417

187

for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)

418

if (!(*it)->isFileName())

419

return false;

188

if (!(*it)->isFileName())

189

return false;

190

return true;

191

}

192

193

void SearchData::simplify()

194

{

195

for (unsigned int i = 0; i < m_query.size(); i++) {

196

if (m_query[i]->m_tp != SCLT_SUB)

197

continue;

198

//C[est ce dyncast qui crashe??

199

SearchDataClauseSub *clsubp =

200

dynamic_cast<SearchDataClauseSub*>(m_query[i]);

201

if (clsubp == 0) {

202

// ??

203

continue;

204

}

205

if (clsubp->getSub()->m_tp != m_tp)

206

continue;

207

208

clsubp->getSub()->simplify();

209

210

// If this subquery has special attributes, it's not a

211

// candidate for collapsing

212

if (!clsubp->getSub()->m_filetypes.empty() ||

213

!clsubp->getSub()->m_nfiletypes.empty() ||

214

clsubp->getSub()->m_haveDates ||

215

clsubp->getSub()->m_maxSize != size_t(-1) ||

216

clsubp->getSub()->m_minSize != size_t(-1) ||

217

clsubp->getSub()->m_haveWildCards)

218

continue;

219

220

bool allsametp = true;

221

for (qlist_it_t it1 = clsubp->getSub()->m_query.begin();

222

it1 != clsubp->getSub()->m_query.end(); it1++) {

223

// We want all AND or OR clause, and same as our conjunction

224

if (((*it1)->getTp() != SCLT_AND && (*it1)->getTp() != SCLT_OR) ||

225

(*it1)->getTp() != m_tp) {

226

allsametp = false;

227

break;

228

}

229

}

230

if (!allsametp)

231

continue;

232

233

// All ok: delete the clause_sub, and insert the queries from

234

// its searchdata in its place

235

m_query.erase(m_query.begin() + i);

236

m_query.insert(m_query.begin() + i,

237

clsubp->getSub()->m_query.begin(),

238

clsubp->getSub()->m_query.end());

239

for (unsigned int j = i;

240

j < i + clsubp->getSub()->m_query.size(); j++) {

241

m_query[j]->setParent(this);

242

}

243

i += clsubp->getSub()->m_query.size() - 1;

244

245

// We don't want the clauses to be deleted when the parent is, as we

246

// know own them.

247

clsubp->getSub()->m_query.clear();

248

delete clsubp;

249

}

250

}

251

252

bool SearchData::singleSimple()

253

{

254

if (m_query.size() != 1 || !m_filetypes.empty() || !m_nfiletypes.empty() ||

255

m_haveDates || m_maxSize != size_t(-1) || m_minSize != size_t(-1) ||

256

m_haveWildCards)

257

return false;

258

SearchDataClause *clp = *m_query.begin();

259

if (clp->getTp() != SCLT_AND && clp->getTp() != SCLT_OR) {

260

return false;

261

}

420

262

return true;

421

263

}

422

264

424

266

void SearchData::getTerms(HighlightData &hld) const

425

267

{

426

268

for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)

427

(*it)->getTerms(hld);

269

(*it)->getTerms(hld);

428

270

return;

429

271

}

430

272

431

// Splitter callback for breaking a user string into simple terms and

432

// phrases. This is for parts of the user entry which would appear as

433

// a single word because there is no white space inside, but are

434

// actually multiple terms to rcldb (ie term1,term2)

435

class TextSplitQ : public TextSplitP {

436

public:

437

TextSplitQ(Flags flags, const StopList &_stops, TermProc *prc)

438

: TextSplitP(prc, flags),

439

curnostemexp(false), stops(_stops), alltermcount(0), lastpos(0)

440

{}

441

442

bool takeword(const std::string &term, int pos, int bs, int be)

443

{

444

// Check if the first letter is a majuscule in which

445

// case we do not want to do stem expansion. Need to do this

446

// before unac of course...

447

curnostemexp = unaciscapital(term);

448

449

return TextSplitP::takeword(term, pos, bs, be);

450

}

451

452

bool curnostemexp;

453

vector<string> terms;

454

vector<bool> nostemexps;

455

const StopList &stops;

456

// Count of terms including stopwords: this is for adjusting

457

// phrase/near slack

458

int alltermcount;

459

int lastpos;

460

};

461

462

class TermProcQ : public TermProc {

463

public:

464

TermProcQ() : TermProc(0), m_ts(0) {}

465

void setTSQ(TextSplitQ *ts) {m_ts = ts;}

466

467

bool takeword(const std::string &term, int pos, int bs, int be)

468

{

469

m_ts->alltermcount++;

470

if (m_ts->lastpos < pos)

471

m_ts->lastpos = pos;

472

bool noexpand = be ? m_ts->curnostemexp : true;

473

LOGDEB1(("TermProcQ::takeword: pushing [%s] pos %d noexp %d\n",

474

term.c_str(), pos, noexpand));

475

if (m_terms[pos].size() < term.size()) {

476

m_terms[pos] = term;

477

m_nste[pos] = noexpand;

478

}

479

return true;

480

}

481

bool flush()

482

{

483

for (map<int, string>::const_iterator it = m_terms.begin();

484

it != m_terms.end(); it++) {

485

m_ts->terms.push_back(it->second);

486

m_ts->nostemexps.push_back(m_nste[it->first]);

487

}

488

return true;

489

}

490

private:

491

TextSplitQ *m_ts;

492

map<int, string> m_terms;

493

map<int, bool> m_nste;

494

};

495

496

497

#if 1

498

static void listVector(const string& what, const vector<string>&l)

499

{

500

string a;

501

for (vector<string>::const_iterator it = l.begin(); it != l.end(); it++) {

502

a = a + *it + " ";

503

}

504

LOGDEB0(("%s: %s\n", what.c_str(), a.c_str()));

505

}

506

#endif

507

508

/** Expand term into term list, using appropriate mode: stem, wildcards,

509

* diacritics...

510

511

* @param mods stem expansion, case and diacritics sensitivity control.

512

* @param term input single word

513

* @param oexp output expansion list

514

* @param sterm output original input term if there were no wildcards

515

* @param prefix field prefix in index. We could recompute it, but the caller

516

* has it already. Used in the simple case where there is nothing to expand,

517

* and we just return the prefixed term (else Db::termMatch deals with it).

518

519

bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,

520

string& ermsg, int mods,

521

const string& term,

522

vector<string>& oexp, string &sterm,

523

const string& prefix)

524

{

525

LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",

526

mods, m_field.c_str(), term.c_str(), getStemLang().c_str()));

527

sterm.clear();

528

oexp.clear();

529

if (term.empty())

530

return true;

531

532

bool maxexpissoft = false;

533

int maxexpand = getSoftMaxExp();

534

if (maxexpand != -1) {

535

maxexpissoft = true;

536

} else {

537

maxexpand = getMaxExp();

538

}

539

540

bool haswild = term.find_first_of(cstr_minwilds) != string::npos;

541

542

// If there are no wildcards, add term to the list of user-entered terms

543

if (!haswild) {

544

m_hldata.uterms.insert(term);

545

sterm = term;

546

}

547

// No stem expansion if there are wildcards or if prevented by caller

548

bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;

549

if (haswild || getStemLang().empty()) {

550

LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));

551

nostemexp = true;

552

}

553

554

// noexpansion can be modified further down by possible case/diac expansion

555

bool noexpansion = nostemexp && !haswild;

556

557

int termmatchsens = 0;

558

559

bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;

560

bool case_sensitive = (mods & SDCM_CASESENS) != 0;

561

562

if (o_index_stripchars) {

563

diac_sensitive = case_sensitive = false;

564

} else {

565

// If we are working with a raw index, apply the rules for case and

566

// diacritics sensitivity.

567

568

// If any character has a diacritic, we become

569

// diacritic-sensitive. Note that the way that the test is

570

// performed (conversion+comparison) will automatically ignore

571

// accented characters which are actually a separate letter

572

if (getAutoDiac() && unachasaccents(term)) {

573

LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));

574

diac_sensitive = true;

575

}

576

577

// If any character apart the first is uppercase, we become

578

// case-sensitive. The first character is reserved for

579

// turning off stemming. You need to use a query language

580

// modifier to search for Floor in a case-sensitive way.

581

Utf8Iter it(term);

582

it++;

583

if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {

584

LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));

585

case_sensitive = true;

586

}

587

588

// If we are sensitive to case or diacritics turn stemming off

589

if (diac_sensitive || case_sensitive) {

590

LOGDEB0(("expandTerm: diac or case sens set -> stemexpand off\n"));

591

nostemexp = true;

592

}

593

594

if (!case_sensitive || !diac_sensitive)

595

noexpansion = false;

596

}

597

598

if (case_sensitive)

599

termmatchsens |= Db::ET_CASESENS;

600

if (diac_sensitive)

601

termmatchsens |= Db::ET_DIACSENS;

602

603

if (noexpansion) {

604

oexp.push_back(prefix + term);

605

m_hldata.terms[term] = term;

606

LOGDEB(("ExpandTerm: noexpansion: final: %s\n", stringsToString(oexp).c_str()));

607

return true;

608

}

609

610

Db::MatchType mtyp = haswild ? Db::ET_WILD :

611

nostemexp ? Db::ET_NONE : Db::ET_STEM;

612

TermMatchResult res;

613

if (!db.termMatch(mtyp | termmatchsens, getStemLang(), term, res, maxexpand,

614

m_field)) {

615

// Let it go through

616

}

617

618

// Term match entries to vector of terms

619

if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {

620

ermsg = "Maximum term expansion size exceeded."

621

" Maybe use case/diacritics sensitivity or increase maxTermExpand.";

622

return false;

623

}

624

for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();

625

it != res.entries.end(); it++) {

626

oexp.push_back(it->term);

627

}

628

// If the term does not exist at all in the db, the return from

629

// termMatch() is going to be empty, which is not what we want (we

630

// would then compute an empty Xapian query)

631

if (oexp.empty())

632

oexp.push_back(prefix + term);

633

634

// Remember the uterm-to-expansion links

635

for (vector<string>::const_iterator it = oexp.begin();

636

it != oexp.end(); it++) {

637

m_hldata.terms[strip_prefix(*it)] = term;

638

}

639

LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));

640

return true;

641

}

642

643

// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d

644

void multiply_groups(vector<vector<string> >::const_iterator vvit,

645

vector<vector<string> >::const_iterator vvend,

646

vector<string>& comb,

647

vector<vector<string> >&allcombs)

648

{

649

// Remember my string vector and compute next, for recursive calls.

650

vector<vector<string> >::const_iterator myvit = vvit++;

651

652

// Walk the string vector I'm called upon and, for each string,

653

// add it to current result, an call myself recursively on the

654

// next string vector. The last call (last element of the vector of

655

// vectors), adds the elementary result to the output

656

657

// Walk my string vector

658

for (vector<string>::const_iterator strit = (*myvit).begin();

659

strit != (*myvit).end(); strit++) {

660

661

// Add my current value to the string vector we're building

662

comb.push_back(*strit);

663

664

if (vvit == vvend) {

665

// Last call: store current result

666

allcombs.push_back(comb);

667

} else {

668

// Call recursively on next string vector

669

multiply_groups(vvit, vvend, comb, allcombs);

670

}

671

// Pop the value I just added (make room for the next element in my

672

// vector)

673

comb.pop_back();

674

}

675

}

676

677

void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,

678

const string& span,

679

int mods, void * pq)

680

{

681

vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);

682

LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",

683

span.c_str(), (unsigned int)mods));

684

vector<string> exp;

685

string sterm; // dumb version of user term

686

687

string prefix;

688

const FieldTraits *ftp;

689

if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {

690

prefix = wrap_prefix(ftp->pfx);

691

}

692

693

if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix))

694

return;

695

696

// Set up the highlight data. No prefix should go in there

697

for (vector<string>::const_iterator it = exp.begin();

698

it != exp.end(); it++) {

699

m_hldata.groups.push_back(vector<string>(1, it->substr(prefix.size())));

700

m_hldata.slacks.push_back(0);

701

m_hldata.grpsugidx.push_back(m_hldata.ugroups.size() - 1);

702

}

703

704

// Push either term or OR of stem-expanded set

705

Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());

706

m_curcl += exp.size();

707

708

// If sterm (simplified original user term) is not null, give it a

709

// relevance boost. We do this even if no expansion occurred (else

710

// the non-expanded terms in a term list would end-up with even

711

// less wqf). This does not happen if there are wildcards anywhere

712

// in the search.

713

// We normally boost the original term in the stem expansion list. Don't

714

// do it if there are wildcards anywhere, this would skew the results.

715

bool doBoostUserTerm =

716

(m_parentSearch && !m_parentSearch->haveWildCards()) ||

717

(m_parentSearch == 0 && !m_haveWildCards);

718

if (doBoostUserTerm && !sterm.empty()) {

719

xq = Xapian::Query(Xapian::Query::OP_OR, xq,

720

Xapian::Query(prefix+sterm,

721

original_term_wqf_booster));

722

}

723

pqueries.push_back(xq);

724

}

725

726

// User entry element had several terms: transform into a PHRASE or

727

// NEAR xapian query, the elements of which can themselves be OR

728

// queries if the terms get expanded by stemming or wildcards (we

729

// don't do stemming for PHRASE though)

730

void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,

731

TextSplitQ *splitData,

732

int mods, void *pq,

733

bool useNear, int slack)

734

{

735

vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);

736

Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :

737

Xapian::Query::OP_PHRASE;

738

vector<Xapian::Query> orqueries;

739

#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF

740

bool hadmultiple = false;

741

#endif

742

vector<vector<string> >groups;

743

744

string prefix;

745

const FieldTraits *ftp;

746

if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {

747

prefix = wrap_prefix(ftp->pfx);

748

}

749

750

if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {

751

orqueries.push_back(Xapian::Query(prefix + start_of_field_term));

752

slack++;

753

}

754

755

// Go through the list and perform stem/wildcard expansion for each element

756

vector<bool>::iterator nxit = splitData->nostemexps.begin();

757

for (vector<string>::iterator it = splitData->terms.begin();

758

it != splitData->terms.end(); it++, nxit++) {

759

LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));

760

// Adjust when we do stem expansion. Not if disabled by

761

// caller, not inside phrases, and some versions of xapian

762

// will accept only one OR clause inside NEAR.

763

bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)

764

#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF

765

|| hadmultiple

766

#endif // single OR inside NEAR

767

;

768

int lmods = mods;

769

if (nostemexp)

770

lmods |= SearchDataClause::SDCM_NOSTEMMING;

771

string sterm;

772

vector<string> exp;

773

if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))

774

return;

775

LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));

776

listVector("", exp);

777

// groups is used for highlighting, we don't want prefixes in there.

778

vector<string> noprefs;

779

for (vector<string>::const_iterator it = exp.begin();

780

it != exp.end(); it++) {

781

noprefs.push_back(it->substr(prefix.size()));

782

}

783

groups.push_back(noprefs);

784

orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,

785

exp.begin(), exp.end()));

786

m_curcl += exp.size();

787

if (m_curcl >= getMaxCl())

788

return;

789

#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF

790

if (exp.size() > 1)

791

hadmultiple = true;

792

#endif

793

}

794

795

if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) {

796

orqueries.push_back(Xapian::Query(prefix + end_of_field_term));

797

slack++;

798

}

799

800

// Generate an appropriate PHRASE/NEAR query with adjusted slack

801

// For phrases, give a relevance boost like we do for original terms

802

LOGDEB2(("PHRASE/NEAR: alltermcount %d lastpos %d\n",

803

splitData->alltermcount, splitData->lastpos));

804

Xapian::Query xq(op, orqueries.begin(), orqueries.end(),

805

splitData->lastpos + 1 + slack);

806

if (op == Xapian::Query::OP_PHRASE)

807

xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq,

808

original_term_wqf_booster);

809

pqueries.push_back(xq);

810

811

// Add all combinations of NEAR/PHRASE groups to the highlighting data.

812

vector<vector<string> > allcombs;

813

vector<string> comb;

814

multiply_groups(groups.begin(), groups.end(), comb, allcombs);

815

816

// Insert the search groups and slacks in the highlight data, with

817

// a reference to the user entry that generated them:

818

m_hldata.groups.insert(m_hldata.groups.end(),

819

allcombs.begin(), allcombs.end());

820

m_hldata.slacks.insert(m_hldata.slacks.end(), allcombs.size(), slack);

821

m_hldata.grpsugidx.insert(m_hldata.grpsugidx.end(), allcombs.size(),

822

m_hldata.ugroups.size() - 1);

823

}

824

825

// Trim string beginning with ^ or ending with $ and convert to flags

826

static int stringToMods(string& s)

827

{

828

int mods = 0;

829

// Check for an anchored search

830

trimstring(s);

831

if (s.length() > 0 && s[0] == '^') {

832

mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART;

833

s.erase(0, 1);

834

}

835

if (s.length() > 0 && s[s.length()-1] == '$') {

836

mods |= Rcl::SearchDataClause::SDCM_ANCHOREND;

837

s.erase(s.length()-1);

838

}

839

return mods;

840

}

841

842

/**

843

* Turn user entry string (NOT query language) into a list of xapian queries.

844

* We just separate words and phrases, and do wildcard and stem expansion,

845

846

* This is used to process data entered into an OR/AND/NEAR/PHRASE field of

847

* the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user

848

* entry).

849

850

* This appears awful, and it would seem that the split into

851

* terms/phrases should be performed in the upper layer so that we

852

* only receive pure term or near/phrase pure elements here, but in

853

* fact there are things that would appear like terms to naive code,

854

* and which will actually may be turned into phrases (ie: tom:jerry),

855

* in a manner which intimately depends on the index implementation,

856

* so that it makes sense to process this here.

857

858

* The final list contains one query for each term or phrase

859

* - Elements corresponding to a stem-expanded part are an OP_OR

860

* composition of the stem-expanded terms (or a single term query).

861

* - Elements corresponding to phrase/near are an OP_PHRASE/NEAR

862

* composition of the phrase terms (no stem expansion in this case)

863

* @return the subquery count (either or'd stem-expanded terms or phrase word

864

* count)

865

866

bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,

867

string &ermsg, void *pq,

868

int slack, bool useNear)

869

{

870

vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);

871

int mods = m_modifiers;

872

873

LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "

874

"slack %d near %d\n",

875

iq.c_str(), m_field.c_str(), mods, slack, useNear));

876

ermsg.erase();

877

m_curcl = 0;

878

const StopList stops = db.getStopList();

879

880

// Simple whitespace-split input into user-level words and

881

// double-quoted phrases: word1 word2 "this is a phrase".

882

883

// The text splitter may further still decide that the resulting

884

// "words" are really phrases, this depends on separators:

885

// [paul@dom.net] would still be a word (span), but [about:me]

886

// will probably be handled as a phrase.

887

vector<string> phrases;

888

TextSplit::stringToStrings(iq, phrases);

889

890

// Process each element: textsplit into terms, handle stem/wildcard

891

// expansion and transform into an appropriate Xapian::Query

892

try {

893

for (vector<string>::iterator it = phrases.begin();

894

it != phrases.end(); it++) {

895

LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));

896

// Anchoring modifiers

897

int amods = stringToMods(*it);

898

int terminc = amods != 0 ? 1 : 0;

899

mods |= amods;

900

// If there are multiple spans in this element, including

901

// at least one composite, we have to increase the slack

902

// else a phrase query including a span would fail.

903

// Ex: "term0@term1 term2" is onlyspans-split as:

904

// 0 term0@term1 0 12

905

// 2 term2 13 18

906

// The position of term2 is 2, not 1, so a phrase search

907

// would fail.

908

// We used to do word split, searching for

909

// "term0 term1 term2" instead, which may have worse

910

// performance, but will succeed.

911

// We now adjust the phrase/near slack by comparing the term count

912

// and the last position

913

914

// The term processing pipeline:

915

TermProcQ tpq;

916

TermProc *nxt = &tpq;

917

TermProcStop tpstop(nxt, stops); nxt = &tpstop;

918

//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;

919

//tpcommon.onlygrams(true);

920

TermProcPrep tpprep(nxt);

921

if (o_index_stripchars)

922

nxt = &tpprep;

923

924

TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |

925

TextSplit::TXTS_KEEPWILD),

926

stops, nxt);

927

tpq.setTSQ(&splitter);

928

splitter.text_to_words(*it);

929

930

slack += splitter.lastpos - splitter.terms.size() + 1;

931

932

LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));

933

switch (splitter.terms.size() + terminc) {

934

case 0:

935

continue;// ??

936

case 1: {

937

int lmods = mods;

938

if (splitter.nostemexps.front())

939

lmods |= SearchDataClause::SDCM_NOSTEMMING;

940

m_hldata.ugroups.push_back(splitter.terms);

941

processSimpleSpan(db, ermsg, splitter.terms.front(),

942

lmods, &pqueries);

943

}

944

break;

945

default:

946

m_hldata.ugroups.push_back(splitter.terms);

947

processPhraseOrNear(db, ermsg, &splitter, mods, &pqueries,

948

useNear, slack);

949

}

950

if (m_curcl >= getMaxCl()) {

951

ermsg = maxXapClauseMsg;

952

if (!o_index_stripchars)

953

ermsg += maxXapClauseCaseDiacMsg;

954

break;

955

}

956

}

957

} catch (const Xapian::Error &e) {

958

ermsg = e.get_msg();

959

} catch (const string &s) {

960

ermsg = s;

961

} catch (const char *s) {

962

ermsg = s;

963

} catch (...) {

964

ermsg = "Caught unknown exception";

965

}

966

if (!ermsg.empty()) {

967

LOGERR(("stringToXapianQueries: %s\n", ermsg.c_str()));

968

return false;

969

}

970

return true;

971

}

972

973

// Translate a simple OR or AND search clause.

974

bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)

975

{

976

LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",

977

getStemLang().c_str()));

978

979

Xapian::Query *qp = (Xapian::Query *)p;

980

*qp = Xapian::Query();

981

982

Xapian::Query::op op;

983

switch (m_tp) {

984

case SCLT_AND: op = Xapian::Query::OP_AND; break;

985

case SCLT_OR: op = Xapian::Query::OP_OR; break;

986

default:

987

LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));

988

return false;

989

}

990

991

vector<Xapian::Query> pqueries;

992

if (!processUserString(db, m_text, m_reason, &pqueries))

993

return false;

994

if (pqueries.empty()) {

995

LOGERR(("SearchDataClauseSimple: resolved to null query\n"));

996

return true;

997

}

998

999

*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());

1000

if (m_weight != 1.0) {

1001

*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);

1002

}

1003

return true;

1004

}

1005

1006

// Translate a FILENAME search clause. This always comes

1007

// from a "filename" search from the gui or recollq. A query language

1008

// "filename:"-prefixed field will not go through here, but through

1009

// the generic field-processing code.

1010

1011

// We do not split the entry any more (used to do some crazy thing

1012

// about expanding multiple fragments in the past). We just take the

1013

// value blanks and all and expand this against the indexed unsplit

1014

// file names

1015

bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)

1016

{

1017

Xapian::Query *qp = (Xapian::Query *)p;

1018

*qp = Xapian::Query();

1019

1020

int maxexp = getSoftMaxExp();

1021

if (maxexp == -1)

1022

maxexp = getMaxExp();

1023

1024

vector<string> names;

1025

db.filenameWildExp(m_text, names, maxexp);

1026

*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());

1027

1028

if (m_weight != 1.0) {

1029

*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);

1030

}

1031

return true;

1032

}

1033

1034

// Translate a dir: path filtering clause. See comments in .h

1035

bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)

1036

{

1037

LOGDEB(("SearchDataClausePath::toNativeQuery: [%s]\n", m_text.c_str()));

1038

Xapian::Query *qp = (Xapian::Query *)p;

1039

*qp = Xapian::Query();

1040

1041

if (m_text.empty()) {

1042

LOGERR(("SearchDataClausePath: empty path??\n"));

1043

m_reason = "Empty path ?";

1044

return false;

1045

}

1046

1047

vector<Xapian::Query> orqueries;

1048

1049

if (m_text[0] == '/')

1050

orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));

273

void SearchData::dump(ostream& o) const

274

{

275

o << "SearchData: " << " qs " << int(m_query.size()) <<

276

" ft " << m_filetypes.size() << " nft " << m_nfiletypes.size() <<

277

" hd " << m_haveDates << " maxs " << int(m_maxSize) << " mins " <<

278

int(m_minSize) << " wc " << m_haveWildCards << "\n";

279

for (std::vector<SearchDataClause*>::const_iterator it =

280

m_query.begin(); it != m_query.end(); it++) {

281

(*it)->dump(o);

282

o << "\n";

283

}

284

o << "\n";

285

}

286

287

void SearchDataClause::dump(ostream& o) const

288

{

289

o << "SearchDataClause??";

290

}

291

292

void SearchDataClauseSimple::dump(ostream& o) const

293

{

294

o << "ClauseSimple: ";

295

if (m_exclude)

296

o << "- ";

297

o << "[" ;

298

if (!m_field.empty())

299

o << m_field << " : ";

300

o << m_text << "]";

301

}

302

303

void SearchDataClauseFilename::dump(ostream& o) const

304

{

305

o << "ClauseFN: ";

306

if (m_exclude)

307

o << " - ";

308

o << "[" << m_text << "]";

309

}

310

311

void SearchDataClausePath::dump(ostream& o) const

312

{

313

o << "ClausePath: ";

314

if (m_exclude)

315

o << " - ";

316

o << "[" << m_text << "]";

317

}

318

319

void SearchDataClauseDist::dump(ostream& o) const

320

{

321

if (m_tp == SCLT_NEAR)

322

o << "ClauseDist: NEAR: ";

1051

323

else

1052

m_text = path_tildexpand(m_text);

1053

1054

vector<string> vpath;

1055

stringToTokens(m_text, vpath, "/");

1056

1057

for (vector<string>::const_iterator pit = vpath.begin();

1058

pit != vpath.end(); pit++){

1059

1060

string sterm;

1061

vector<string> exp;

1062

if (!expandTerm(db, m_reason,

1063

SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS,

1064

*pit, exp, sterm, wrap_prefix(pathelt_prefix))) {

1065

return false;

1066

}

1067

LOGDEB0(("SDataPath::toNative: exp size %d\n", exp.size()));

1068

listVector("", exp);

1069

if (exp.size() == 1)

1070

orqueries.push_back(Xapian::Query(exp[0]));

1071

else

1072

orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,

1073

exp.begin(), exp.end()));

1074

m_curcl += exp.size();

1075

if (m_curcl >= getMaxCl())

1076

return false;

1077

}

1078

1079

*qp = Xapian::Query(Xapian::Query::OP_PHRASE,

1080

orqueries.begin(), orqueries.end());

1081

1082

if (m_weight != 1.0) {

1083

*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);

1084

}

1085

return true;

324

o << "ClauseDist: PHRA: ";

325

326

if (m_exclude)

327

o << " - ";

328

o << "[";

329

if (!m_field.empty())

330

o << m_field << " : ";

331

o << m_text << "]";

1086

332

}

1087

333

1088

// Translate NEAR or PHRASE clause.

1089

bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)

334

void SearchDataClauseSub::dump(ostream& o) const

1090

335

{

1091

LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));

1092

1093

Xapian::Query *qp = (Xapian::Query *)p;

1094

*qp = Xapian::Query();

1095

1096

vector<Xapian::Query> pqueries;

1097

Xapian::Query nq;

1098

1099

// We produce a single phrase out of the user entry then use

1100

// stringToXapianQueries() to lowercase and simplify the phrase

1101

// terms etc. This will result into a single (complex)

1102

// Xapian::Query.

1103

if (m_text.find('\"') != string::npos) {

1104

m_text = neutchars(m_text, "\"");

1105

}

1106

string s = cstr_dquote + m_text + cstr_dquote;

1107

bool useNear = (m_tp == SCLT_NEAR);

1108

if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))

1109

return false;

1110

if (pqueries.empty()) {

1111

LOGERR(("SearchDataClauseDist: resolved to null query\n"));

1112

return true;

1113

}

1114

1115

*qp = *pqueries.begin();

1116

if (m_weight != 1.0) {

1117

*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);

1118

}

1119

return true;

336

o << "ClauseSub {\n";

337

m_sub.getconstptr()->dump(o);

338

o << "}";

1120

339

}

1121

340

1122

341

} // Namespace Rcl

Older »