~slub.team/goobi-indexserver/3.x

« back to all changes in this revision

Viewing changes to solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java

  • Committer: Sebastian Meyer
  • Date: 2012-08-03 09:12:40 UTC
  • Revision ID: sebastian.meyer@slub-dresden.de-20120803091240-x6861b0vabq1xror
Remove Lucene and Solr source code and add patches instead
Fix Bug #985487: Auto-suggestion for the search interface

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
package org.apache.solr.spelling;
2
 
/**
3
 
 * Licensed to the Apache Software Foundation (ASF) under one or more
4
 
 * contributor license agreements.  See the NOTICE file distributed with
5
 
 * this work for additional information regarding copyright ownership.
6
 
 * The ASF licenses this file to You under the Apache License, Version 2.0
7
 
 * (the "License"); you may not use this file except in compliance with
8
 
 * the License.  You may obtain a copy of the License at
9
 
 *
10
 
 *     http://www.apache.org/licenses/LICENSE-2.0
11
 
 *
12
 
 * Unless required by applicable law or agreed to in writing, software
13
 
 * distributed under the License is distributed on an "AS IS" BASIS,
14
 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 
 * See the License for the specific language governing permissions and
16
 
 * limitations under the License.
17
 
 */
18
 
 
19
 
import java.util.ArrayList;
20
 
import java.util.Arrays;
21
 
import java.util.Iterator;
22
 
import java.util.LinkedHashMap;
23
 
import java.util.List;
24
 
import java.util.Map;
25
 
import java.util.NoSuchElementException;
26
 
import java.util.PriorityQueue;
27
 
 
28
 
import org.apache.lucene.analysis.Token;
29
 
 
30
 
/**
31
 
 * <p>
32
 
 * Given a list of possible Spelling Corrections for multiple mis-spelled words
33
 
 * in a query, This iterator returns Possible Correction combinations ordered by
34
 
 * reasonable probability that such a combination will return actual hits if
35
 
 * re-queried. This implementation simply ranks the Possible Combinations by the
36
 
 * sum of their component ranks.
37
 
 * </p>
38
 
 * 
39
 
 */
40
 
public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
41
 
        private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
42
 
        private Iterator<RankedSpellPossibility> rankedPossibilityIterator = null;
43
 
        private int correctionIndex[];
44
 
        private boolean done = false;
45
 
 
46
 
        @SuppressWarnings("unused")
47
 
        private PossibilityIterator() {
48
 
                throw new AssertionError("You shan't go here.");
49
 
        }
50
 
 
51
 
        /**
52
 
         * <p>
53
 
         * We assume here that the passed-in inner LinkedHashMaps are already sorted
54
 
         * in order of "Best Possible Correction".
55
 
         * </p>
56
 
         * 
57
 
         * @param suggestions
58
 
         */
59
 
        public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
60
 
                for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
61
 
                        Token token = entry.getKey();
62
 
                        List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
63
 
                        for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
64
 
                                SpellCheckCorrection correction = new SpellCheckCorrection();
65
 
                                correction.setOriginal(token);
66
 
                                correction.setCorrection(entry1.getKey());
67
 
                                correction.setNumberOfOccurences(entry1.getValue());
68
 
                                possibleCorrections.add(correction);
69
 
                        }
70
 
                        possibilityList.add(possibleCorrections);
71
 
                }
72
 
 
73
 
                int wrapSize = possibilityList.size();
74
 
                if (wrapSize == 0) {
75
 
                        done = true;
76
 
                } else {
77
 
                        correctionIndex = new int[wrapSize];
78
 
                        for (int i = 0; i < wrapSize; i++) {
79
 
                                int suggestSize = possibilityList.get(i).size();
80
 
                                if (suggestSize == 0) {
81
 
                                        done = true;
82
 
                                        break;
83
 
                                }
84
 
                                correctionIndex[i] = 0;
85
 
                        }
86
 
                }
87
 
                
88
 
                long count = 0;
89
 
                PriorityQueue<RankedSpellPossibility> rankedPossibilities = new PriorityQueue<RankedSpellPossibility>();                
90
 
                while (count < maxEvaluations && internalHasNext()) {
91
 
                        RankedSpellPossibility rsp = internalNext();
92
 
                        count++;                        
93
 
                        
94
 
                        if(rankedPossibilities.size() >= maximumRequiredSuggestions && rsp.getRank() >= rankedPossibilities.peek().getRank()) {
95
 
                                continue;
96
 
                        }
97
 
                        rankedPossibilities.offer(rsp);
98
 
                        if(rankedPossibilities.size() > maximumRequiredSuggestions) {
99
 
                                rankedPossibilities.poll();
100
 
                        }
101
 
                }
102
 
                
103
 
                RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities.size()];
104
 
                for(int i=rankedPossibilities.size() - 1  ; i>=0 ; i--) {
105
 
                        rpArr[i] = rankedPossibilities.remove();
106
 
                }
107
 
                rankedPossibilityIterator = Arrays.asList(rpArr).iterator();            
108
 
        }
109
 
 
110
 
        private boolean internalHasNext() {
111
 
                return !done;
112
 
        }
113
 
 
114
 
        /**
115
 
         * <p>
116
 
         * This method is converting the independent LinkHashMaps containing various
117
 
         * (silo'ed) suggestions for each mis-spelled word into individual
118
 
         * "holistic query corrections", aka. "Spell Check Possibility"
119
 
         * </p>
120
 
         * <p>
121
 
         * Rank here is the sum of each selected term's position in its respective
122
 
         * LinkedHashMap.
123
 
         * </p>
124
 
         * 
125
 
         * @return
126
 
         */
127
 
        private RankedSpellPossibility internalNext() {
128
 
                if (done) {
129
 
                        throw new NoSuchElementException();
130
 
                }
131
 
 
132
 
                List<SpellCheckCorrection> possibleCorrection = new ArrayList<SpellCheckCorrection>();
133
 
                int rank = 0;
134
 
                for (int i = 0; i < correctionIndex.length; i++) {
135
 
                        List<SpellCheckCorrection> singleWordPossibilities = possibilityList.get(i);
136
 
                        SpellCheckCorrection singleWordPossibility = singleWordPossibilities.get(correctionIndex[i]);
137
 
                        rank += correctionIndex[i];
138
 
 
139
 
                        if (i == correctionIndex.length - 1) {
140
 
                                correctionIndex[i]++;
141
 
                                if (correctionIndex[i] == singleWordPossibilities.size()) {
142
 
                                        correctionIndex[i] = 0;
143
 
                                        if (correctionIndex.length == 1) {
144
 
                                                done = true;
145
 
                                        }
146
 
                                        for (int ii = i - 1; ii >= 0; ii--) {
147
 
                                                correctionIndex[ii]++;
148
 
                                                if (correctionIndex[ii] >= possibilityList.get(ii).size() && ii > 0) {
149
 
                                                        correctionIndex[ii] = 0;
150
 
                                                } else {
151
 
                                                        break;
152
 
                                                }
153
 
                                        }
154
 
                                }
155
 
                        }
156
 
                        possibleCorrection.add(singleWordPossibility);
157
 
                }
158
 
                
159
 
                if(correctionIndex[0] == possibilityList.get(0).size())
160
 
                {
161
 
                        done = true;
162
 
                }
163
 
 
164
 
                RankedSpellPossibility rsl = new RankedSpellPossibility();
165
 
                rsl.setCorrections(possibleCorrection);
166
 
                rsl.setRank(rank);
167
 
                return rsl;
168
 
        }
169
 
 
170
 
        public boolean hasNext() {
171
 
                return rankedPossibilityIterator.hasNext();
172
 
        }
173
 
 
174
 
        public RankedSpellPossibility next() {
175
 
                return rankedPossibilityIterator.next();
176
 
        }
177
 
 
178
 
        public void remove() {
179
 
                throw new UnsupportedOperationException();
180
 
        }
181
 
 
182
 
}