3
# The author disclaims copyright to this source code.
5
#*************************************************************************
6
# This file implements tests for prefix-searching in the fts2
7
# component of the SQLite library.
9
# $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $
12
set testdir [file dirname $argv0]
13
source $testdir/tester.tcl
15
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
21
# A large string to prime the pump with.
23
Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
24
iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
25
sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
26
aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
27
ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
28
at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
29
ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
30
luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
31
lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
32
potenti. Cum sociis natoque penatibus et magnis dis parturient
33
montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
34
suscipit nec, consequat quis, risus.
38
CREATE VIRTUAL TABLE t1 USING fts2(c);
40
INSERT INTO t1(rowid, c) VALUES(1, $text);
41
INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
46
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
51
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
54
# Prefix includes exact match
56
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
59
# Make certain everything isn't considered a prefix!
61
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
64
# Prefix across multiple rows.
66
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
69
# Likewise, with multiple hits in one document.
71
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
74
# Prefix which should only hit one document.
76
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
79
# * not at end is dropped.
81
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
84
# Stand-alone * is dropped.
86
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
89
# Phrase-query prefix.
91
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
94
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
97
# Phrase query with multiple prefix matches.
99
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
102
# Phrase query with multiple prefix matches.
104
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
110
# Test across updates (and, by implication, deletes).
112
# Version of text without "lorem".
113
regsub -all {[Ll]orem} $text '' ntext
116
CREATE VIRTUAL TABLE t2 USING fts2(c);
118
INSERT INTO t2(rowid, c) VALUES(1, $text);
119
INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
120
UPDATE t2 SET c = $ntext WHERE rowid = 1;
123
# Can't see lorem as an exact match.
125
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
128
# Can't see a prefix of lorem, either.
130
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
133
# Can see lovely in the other document.
135
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
138
# Can still see other hits.
140
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
143
# Prefix which should only hit one document.
145
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
150
# Test with a segment which will have multiple levels in the tree.
152
# Build a big document with lots of unique terms.
154
foreach c {a b c d e} {
155
regsub -all {[A-Za-z]+} $bigtext "&$c" t
159
# Populate a table with many copies of the big document, so that we
160
# can test the number of hits found. Populate $ret with the expected
161
# hit counts for each row. offsets() returns 4 elements for every
162
# hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
167
CREATE VIRTUAL TABLE t3 USING fts2(c);
169
INSERT INTO t3(rowid, c) VALUES(1, $text);
170
INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
172
for {set i 0} {$i<100} {incr i} {
173
db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
178
# Test that we get the expected number of hits.
181
db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
183
lappend t [expr {$l/4}]
188
# TODO(shess) It would be useful to test a couple edge cases, but I
189
# don't know if we have the precision to manage it from here at this
190
# time. Prefix hits can cross leaves, which the code above _should_
191
# hit by virtue of size. There are two variations on this. If the
192
# tree is 2 levels high, the code will find the leaf-node extent
193
# directly, but if its higher, the code will have to follow two
194
# separate interior branches down the tree. Both should be tested.