1
package org.apache.lucene.util.fst;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.ArrayUtil;
21
import org.apache.lucene.util.RamUsageEstimator;
23
import java.io.IOException;
25
/** Can next() and advance() through the terms in an FST
27
* @lucene.experimental
30
abstract class FSTEnum<T> {
31
protected final FST<T> fst;
33
@SuppressWarnings("unchecked") protected FST.Arc<T>[] arcs = new FST.Arc[10];
34
// outputs are cumulative
35
@SuppressWarnings("unchecked") protected T[] output = (T[]) new Object[10];
37
protected final T NO_OUTPUT;
38
protected final FST.Arc<T> scratchArc = new FST.Arc<T>();
41
protected int targetLength;
43
/** doFloor controls the behavior of advance: if it's true
44
* doFloor is true, advance positions to the biggest
45
* term before target. */
46
protected FSTEnum(FST<T> fst) {
48
NO_OUTPUT = fst.outputs.getNoOutput();
49
fst.getFirstArc(getArc(0));
50
output[0] = NO_OUTPUT;
53
protected abstract int getTargetLabel();
54
protected abstract int getCurrentLabel();
56
protected abstract void setCurrentLabel(int label);
57
protected abstract void grow();
59
/** Rewinds enum state to match the shared prefix between
60
* current term and target term */
61
protected final void rewindPrefix() throws IOException {
63
//System.out.println(" init");
65
fst.readFirstTargetArc(getArc(0), getArc(1));
68
//System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
70
final int currentLimit = upto;
72
while (upto < currentLimit && upto <= targetLength+1) {
73
final int cmp = getCurrentLabel() - getTargetLabel();
78
// seek backwards -- reset this arc to the first arc
79
final FST.Arc<T> arc = getArc(upto);
80
fst.readFirstTargetArc(getArc(upto-1), arc);
81
//System.out.println(" seek first arc");
88
protected void doNext() throws IOException {
89
//System.out.println("FE: next upto=" + upto);
91
//System.out.println(" init");
93
fst.readFirstTargetArc(getArc(0), getArc(1));
96
//System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast());
97
while (arcs[upto].isLast()) {
100
//System.out.println(" eof");
104
fst.readNextArc(arcs[upto]);
110
// TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
111
// SEEK_END)? saves the eq check above?
113
/** Seeks to smallest term that's >= target. */
114
protected void doSeekCeil() throws IOException {
116
//System.out.println(" advance len=" + target.length + " curlen=" + current.length);
118
// TODO: possibly caller could/should provide common
119
// prefix length? ie this work may be redundant if
120
// caller is in fact intersecting against its own
123
//System.out.println("FE.seekCeil upto=" + upto);
125
// Save time by starting at the end of the shared prefix
126
// b/w our current term & the target:
128
//System.out.println(" after rewind upto=" + upto);
130
FST.Arc<T> arc = getArc(upto);
131
int targetLabel = getTargetLabel();
132
//System.out.println(" init targetLabel=" + targetLabel);
134
// Now scan forward, matching the new suffix of the target
137
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);
139
if (arc.bytesPerArc != 0 && arc.label != -1) {
141
// Arcs are fixed array -- use binary search to find
144
final FST<T>.BytesReader in = fst.getBytesReader(0);
145
int low = arc.arcIdx;
146
int high = arc.numArcs-1;
148
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
149
boolean found = false;
150
while (low <= high) {
151
mid = (low + high) >>> 1;
152
in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
153
final int midLabel = fst.readLabel(in);
154
final int cmp = midLabel - targetLabel;
155
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
166
// NOTE: this code is dup'd w/ the code below (in
167
// the outer else clause):
171
fst.readNextRealArc(arc, in);
172
assert arc.arcIdx == mid;
173
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
174
output[upto] = fst.outputs.add(output[upto-1], arc.output);
175
if (targetLabel == FST.END_LABEL) {
178
setCurrentLabel(arc.label);
180
arc = fst.readFirstTargetArc(arc, getArc(upto));
181
targetLabel = getTargetLabel();
183
} else if (low == arc.numArcs) {
185
arc.arcIdx = arc.numArcs-2;
186
fst.readNextRealArc(arc, in);
188
// Dead end (target is after the last arc);
189
// rollback to last fork then push
195
final FST.Arc<T> prevArc = getArc(upto);
196
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
197
if (!prevArc.isLast()) {
198
fst.readNextArc(prevArc);
205
arc.arcIdx = (low > high ? low : high)-1;
206
fst.readNextRealArc(arc, in);
207
assert arc.label > targetLabel;
212
// Arcs are not array'd -- must do linear scan:
213
if (arc.label == targetLabel) {
215
output[upto] = fst.outputs.add(output[upto-1], arc.output);
216
if (targetLabel == FST.END_LABEL) {
219
setCurrentLabel(arc.label);
221
arc = fst.readFirstTargetArc(arc, getArc(upto));
222
targetLabel = getTargetLabel();
223
} else if (arc.label > targetLabel) {
226
} else if (arc.isLast()) {
227
// Dead end (target is after the last arc);
228
// rollback to last fork then push
234
final FST.Arc<T> prevArc = getArc(upto);
235
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
236
if (!prevArc.isLast()) {
237
fst.readNextArc(prevArc);
245
//System.out.println(" next scan");
246
fst.readNextArc(arc);
252
// TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND /
253
// SEEK_END)? saves the eq check above?
254
/** Seeks to largest term that's <= target. */
255
protected void doSeekFloor() throws IOException {
257
// TODO: possibly caller could/should provide common
258
// prefix length? ie this work may be redundant if
259
// caller is in fact intersecting against its own
261
//System.out.println("FE: seek floor upto=" + upto);
263
// Save CPU by starting at the end of the shared prefix
264
// b/w our current term & the target:
267
//System.out.println("FE: after rewind upto=" + upto);
269
FST.Arc<T> arc = getArc(upto);
270
int targetLabel = getTargetLabel();
272
//System.out.println("FE: init targetLabel=" + targetLabel);
274
// Now scan forward, matching the new suffix of the target
276
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
278
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
279
// Arcs are fixed array -- use binary search to find
282
final FST<T>.BytesReader in = fst.getBytesReader(0);
283
int low = arc.arcIdx;
284
int high = arc.numArcs-1;
286
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
287
boolean found = false;
288
while (low <= high) {
289
mid = (low + high) >>> 1;
290
in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
291
final int midLabel = fst.readLabel(in);
292
final int cmp = midLabel - targetLabel;
293
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
304
// NOTE: this code is dup'd w/ the code below (in
305
// the outer else clause):
308
//System.out.println(" match! arcIdx=" + mid);
310
fst.readNextRealArc(arc, in);
311
assert arc.arcIdx == mid;
312
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
313
output[upto] = fst.outputs.add(output[upto-1], arc.output);
314
if (targetLabel == FST.END_LABEL) {
317
setCurrentLabel(arc.label);
319
arc = fst.readFirstTargetArc(arc, getArc(upto));
320
targetLabel = getTargetLabel();
322
} else if (high == -1) {
323
//System.out.println(" before first");
324
// Very first arc is after our target
325
// TODO: if each arc could somehow read the arc just
326
// before, we can save this re-scan. The ceil case
327
// doesn't need this because it reads the next arc
330
// First, walk backwards until we find a first arc
331
// that's before our target label:
332
fst.readFirstTargetArc(getArc(upto-1), arc);
333
if (arc.label < targetLabel) {
334
// Then, scan forwards to the arc just before
336
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
337
fst.readNextArc(arc);
346
targetLabel = getTargetLabel();
350
// There is a floor arc:
351
arc.arcIdx = (low > high ? high : low)-1;
352
//System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
353
fst.readNextRealArc(arc, in);
354
assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
355
assert arc.label < targetLabel;
361
if (arc.label == targetLabel) {
363
output[upto] = fst.outputs.add(output[upto-1], arc.output);
364
if (targetLabel == FST.END_LABEL) {
367
setCurrentLabel(arc.label);
369
arc = fst.readFirstTargetArc(arc, getArc(upto));
370
targetLabel = getTargetLabel();
371
} else if (arc.label > targetLabel) {
372
// TODO: if each arc could somehow read the arc just
373
// before, we can save this re-scan. The ceil case
374
// doesn't need this because it reads the next arc
377
// First, walk backwards until we find a first arc
378
// that's before our target label:
379
fst.readFirstTargetArc(getArc(upto-1), arc);
380
if (arc.label < targetLabel) {
381
// Then, scan forwards to the arc just before
383
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
384
fst.readNextArc(arc);
393
targetLabel = getTargetLabel();
396
} else if (!arc.isLast()) {
397
//System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
398
if (fst.readNextArcLabel(arc) > targetLabel) {
403
fst.readNextArc(arc);
413
private void incr() {
416
if (arcs.length <= upto) {
417
@SuppressWarnings("unchecked") final FST.Arc<T>[] newArcs =
418
new FST.Arc[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
419
System.arraycopy(arcs, 0, newArcs, 0, arcs.length);
422
if (output.length <= upto) {
423
@SuppressWarnings("unchecked") final T[] newOutput =
424
(T[]) new Object[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
425
System.arraycopy(output, 0, newOutput, 0, output.length);
430
// Appends current arc, and then recurses from its target,
431
// appending first arc all the way to the final node
432
private void pushFirst() throws IOException {
434
FST.Arc<T> arc = arcs[upto];
438
output[upto] = fst.outputs.add(output[upto-1], arc.output);
439
if (arc.label == FST.END_LABEL) {
443
//System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto]));
444
setCurrentLabel(arc.label);
447
final FST.Arc<T> nextArc = getArc(upto);
448
fst.readFirstTargetArc(arc, nextArc);
453
// Recurses from current arc, appending last arc all the
454
// way to the first final node
455
private void pushLast() throws IOException {
457
FST.Arc<T> arc = arcs[upto];
461
setCurrentLabel(arc.label);
462
output[upto] = fst.outputs.add(output[upto-1], arc.output);
463
if (arc.label == FST.END_LABEL) {
469
arc = fst.readLastTargetArc(arc, getArc(upto));
473
private FST.Arc<T> getArc(int idx) {
474
if (arcs[idx] == null) {
475
arcs[idx] = new FST.Arc<T>();