1
package org.apache.lucene.facet.search;
4
import java.io.IOException;
5
import java.util.Iterator;
6
import java.util.LinkedHashMap;
7
import java.util.concurrent.ConcurrentHashMap;
8
import java.util.concurrent.ConcurrentLinkedQueue;
10
import org.apache.lucene.index.IndexReader;
12
import org.apache.lucene.facet.index.params.CategoryListParams;
13
import org.apache.lucene.facet.index.params.FacetIndexingParams;
14
import org.apache.lucene.facet.search.cache.CategoryListCache;
15
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
18
* Licensed to the Apache Software Foundation (ASF) under one or more
19
* contributor license agreements. See the NOTICE file distributed with
20
* this work for additional information regarding copyright ownership.
21
* The ASF licenses this file to You under the Apache License, Version 2.0
22
* (the "License"); you may not use this file except in compliance with
23
* the License. You may obtain a copy of the License at
25
* http://www.apache.org/licenses/LICENSE-2.0
27
* Unless required by applicable law or agreed to in writing, software
28
* distributed under the License is distributed on an "AS IS" BASIS,
29
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
30
* See the License for the specific language governing permissions and
31
* limitations under the License.
35
* Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
36
* facet indexing params.
38
* @lucene.experimental
40
public final class TotalFacetCountsCache {
43
* Default size of in memory cache for computed total facet counts.
44
* Set to 2 for the case when an application reopened a reader and
45
* the original one is still in use (Otherwise there will be
46
* switching again and again between the two.)
48
public static final int DEFAULT_CACHE_SIZE = 2;
50
private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
53
* Get the single instance of this cache
55
public static TotalFacetCountsCache getSingleton() {
60
* In-memory cache of TFCs.
62
* <li>It's size is kept within limits through {@link #trimCache()}.
63
* <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
64
* <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
66
* @see #markRecentlyUsed(TFCKey)
68
private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
71
* A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
72
* @see #markRecentlyUsed(TFCKey)
74
private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
76
private int maxCacheSize = DEFAULT_CACHE_SIZE;
78
/** private constructor for singleton pattern */
79
private TotalFacetCountsCache() {
83
* Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
84
* If not in cache, computed here and added to the cache for later use.
85
* @param indexReader the documents index
86
* @param taxonomy the taxonomy index
87
* @param facetIndexingParams facet indexing parameters
88
* @param clCache category list cache for faster computation, can be null
89
* @return the total facet counts.
91
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
92
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
94
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
95
// it is important that this call is not synchronized, so that available TFC
96
// would not wait for one that needs to be computed.
97
TotalFacetCounts tfc = cache.get(key);
99
markRecentlyUsed(key);
102
return computeAndCache(key, clCache);
106
* Mark key as it as recently used.
108
* <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
110
* <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
111
* But unfortunately, Java does not provide one.
112
* Instead, we combine two concurrent objects:
114
* <li>{@link ConcurrentHashMap} for the cached TFCs.
115
* <li>{@link ConcurrentLinkedQueue} for active keys
117
* <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
118
* <li>Checks for a cached item through getTotalCounts() are not synchronized.
119
* Therefore, the case that a needed TFC is in the cache is very fast:
120
* it does not wait for the computation of other TFCs.
121
* <li>computeAndCache() is synchronized, and, has a (double) check of the required
122
* TFC, to avoid computing the same TFC twice.
123
* <li>A race condition in this method (markRecentlyUsed) might result in two copies
124
* of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
125
* where an attempt to remove the same key twice is a no-op.
128
private void markRecentlyUsed(TFCKey key) {
133
private synchronized void trimCache() {
134
// loop until cache is of desired size.
135
while (cache.size()>maxCacheSize ) {
136
TFCKey key = lruKeys.poll();
137
if (key==null) { //defensive
138
// it is defensive since lruKeys presumably covers the cache keys
139
key = cache.keys().nextElement();
141
// remove this element. Note that an attempt to remove with the same key again is a no-op,
142
// which gracefully handles the possible race in markRecentlyUsed().
148
* compute TFC and cache it, after verifying it was not just added - for this
149
* matter this method is synchronized, which is not too bad, because there is
150
* lots of work done in the computations.
152
private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
153
TotalFacetCounts tfc = cache.get(key);
155
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
164
* Load {@link TotalFacetCounts} matching input parameters from the provided outputFile
165
* and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
166
* If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
167
* replaced by the loaded one.
168
* @param inputFile file from which to read the data
169
* @param indexReader the documents index
170
* @param taxonomy the taxonomy index
171
* @param facetIndexingParams the facet indexing parameters
172
* @throws IOException on error
173
* @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
175
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
176
FacetIndexingParams facetIndexingParams) throws IOException {
177
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
178
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
180
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
181
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
184
markRecentlyUsed(key);
188
* Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
189
* making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
190
* If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
191
* not in the cache, this call will first compute them (which will also add them to the cache).
192
* @param outputFile file to store in.
193
* @param indexReader the documents index
194
* @param taxonomy the taxonomy index
195
* @param facetIndexingParams the facet indexing parameters
196
* @param clCache category list cache for faster computation, can be null
197
* @throws IOException on error
198
* @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
199
* @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
201
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
202
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
203
File parentFile = outputFile.getParentFile();
205
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
206
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
208
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
210
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
211
TotalFacetCounts.storeToFile(outputFile, tfc);
214
private static class TFCKey {
215
final IndexReader indexReader;
216
final TaxonomyReader taxonomy;
217
private final Iterable<CategoryListParams> clps;
218
private final int hashCode;
219
private final int nDels; // needed when a reader used for faceted search was just used for deletion.
220
final FacetIndexingParams facetIndexingParams;
222
public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
223
FacetIndexingParams facetIndexingParams) {
224
this.indexReader = indexReader;
225
this.taxonomy = taxonomy;
226
this.facetIndexingParams = facetIndexingParams;
227
this.clps = facetIndexingParams.getAllCategoryListParams();
228
this.nDels = indexReader.numDeletedDocs();
229
hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
233
public int hashCode() {
238
public boolean equals(Object other) {
239
TFCKey o = (TFCKey) other;
240
if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
243
Iterator<CategoryListParams> it1 = clps.iterator();
244
Iterator<CategoryListParams> it2 = o.clps.iterator();
245
while (it1.hasNext() && it2.hasNext()) {
246
if (!it1.next().equals(it2.next())) {
250
return it1.hasNext() == it2.hasNext();
257
public synchronized void clear() {
263
* @return the maximal cache size
265
public int getCacheSize() {
270
* Set the number of TotalFacetCounts arrays that will remain in memory cache.
272
* If new size is smaller than current size, the cache is appropriately trimmed.
274
* Minimal size is 1, so passing zero or negative size would result in size of 1.
275
* @param size new size to set
277
public void setCacheSize(int size) {
278
if (size < 1) size = 1;
279
int origSize = maxCacheSize;
281
if (maxCacheSize < origSize) { // need to trim only if the cache was reduced