1
package org.apache.lucene.facet.search.sampling;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
21
* Parameters for sampling, dictating whether sampling is to take place and how.
23
* @lucene.experimental
25
public class SamplingParams {
28
* Default factor by which more results are requested over the sample set.
29
* @see SamplingParams#getOversampleFactor()
31
public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
34
* Default ratio between size of sample to original size of document set.
35
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
37
public static final double DEFAULT_SAMPLE_RATIO = 0.01;
40
* Default maximum size of sample.
41
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
43
public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
46
* Default minimum size of sample.
47
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
49
public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
52
* Default sampling threshold, if number of results is less than this number - no sampling will take place
53
* @see SamplingParams#getSampleRatio()
55
public static final int DEFAULT_SAMPLING_THRESHOLD = 75000;
57
private int maxSampleSize = DEFAULT_MAX_SAMPLE_SIZE;
58
private int minSampleSize = DEFAULT_MIN_SAMPLE_SIZE;
59
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
60
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
61
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
64
* Return the maxSampleSize.
65
* In no case should the resulting sample size exceed this value.
66
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
68
public final int getMaxSampleSize() {
73
* Return the minSampleSize.
74
* In no case should the resulting sample size be smaller than this value.
75
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
77
public final int getMinSampleSize() {
82
* @return the sampleRatio
83
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
85
public final double getSampleRatio() {
90
* Return the samplingThreshold.
91
* Sampling would be performed only for document sets larger than this.
93
public final int getSamplingThreshold() {
94
return samplingThreshold;
98
* @param maxSampleSize
99
* the maxSampleSize to set
100
* @see #getMaxSampleSize()
102
public void setMaxSampleSize(int maxSampleSize) {
103
this.maxSampleSize = maxSampleSize;
107
* @param minSampleSize
108
* the minSampleSize to set
109
* @see #getMinSampleSize()
111
public void setMinSampleSize(int minSampleSize) {
112
this.minSampleSize = minSampleSize;
117
* the sampleRatio to set
118
* @see #getSampleRatio()
120
public void setSampleRatio(double sampleRatio) {
121
this.sampleRatio = sampleRatio;
125
* Set a sampling-threshold
126
* @see #getSamplingThreshold()
128
public void setSampingThreshold(int sampingThreshold) {
129
this.samplingThreshold = sampingThreshold;
133
* Check validity of sampling settings, making sure that
135
* <li> <code>minSampleSize <= maxSampleSize <= samplingThreshold </code></li>
136
* <li> <code>0 < samplingRatio <= 1 </code></li>
139
* @return true if valid, false otherwise
141
public boolean validate() {
143
samplingThreshold >= maxSampleSize &&
144
maxSampleSize >= minSampleSize &&
150
* Return the oversampleFactor. When sampling, we would collect that much more
151
* results, so that later, when selecting top out of these, chances are higher
152
* to get actual best results. Note that having this value larger than 1 only
153
* makes sense when using a SampleFixer which finds accurate results, such as
154
* <code>TakmiSampleFixer</code>. When this value is smaller than 1, it is
155
* ignored and no oversampling takes place.
157
public final double getOversampleFactor() {
158
return oversampleFactor;
162
* @param oversampleFactor the oversampleFactor to set
163
* @see #getOversampleFactor()
165
public void setOversampleFactor(double oversampleFactor) {
166
this.oversampleFactor = oversampleFactor;
b'\\ No newline at end of file'