2
* The Apache Software License, Version 1.1
5
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in
17
* the documentation and/or other materials provided with the
20
* 3. The end-user documentation included with the redistribution,
21
* if any, must include the following acknowledgment:
22
* "This product includes software developed by the
23
* Apache Software Foundation (http://www.apache.org/)."
24
* Alternately, this acknowledgment may appear in the software itself,
25
* if and wherever such third-party acknowledgments normally appear.
27
* 4. The names "Xerces" and "Apache Software Foundation" must
28
* not be used to endorse or promote products derived from this
29
* software without prior written permission. For written
30
* permission, please contact apache@apache.org.
32
* 5. Products derived from this software may not be called "Apache",
33
* nor may "Apache" appear in their name, without prior written
34
* permission of the Apache Software Foundation.
36
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48
* ====================================================================
50
* This software consists of voluntary contributions made by many
51
* individuals on behalf of the Apache Software Foundation and was
52
* originally based on software copyright (c) 1999, International
53
* Business Machines, Inc., http://www.apache.org. For more
54
* information on the Apache Software Foundation, please see
55
* <http://www.apache.org/>.
2
* Copyright 2000-2002,2004,2005 The Apache Software Foundation.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
58
17
package org.apache.xerces.util;
79
38
* characters are especially prone to this poor hashing behavior.
42
* An instance of <code>SymbolTable</code> has two parameters that affect its
43
* performance: <i>initial capacity</i> and <i>load factor</i>. The
44
* <i>capacity</i> is the number of <i>buckets</i> in the SymbolTable, and the
45
* <i>initial capacity</i> is simply the capacity at the time the SymbolTable
46
* is created. Note that the SymbolTable is <i>open</i>: in the case of a "hash
47
* collision", a single bucket stores multiple entries, which must be searched
48
* sequentially. The <i>load factor</i> is a measure of how full the SymbolTable
49
* is allowed to get before its capacity is automatically increased.
50
* When the number of entries in the SymbolTable exceeds the product of the load
51
* factor and the current capacity, the capacity is increased by calling the
52
* <code>rehash</code> method.<p>
54
* Generally, the default load factor (.75) offers a good tradeoff between
55
* time and space costs. Higher values decrease the space overhead but
56
* increase the time cost to look up an entry (which is reflected in most
57
* <tt>SymbolTable</tt> operations, including <tt>addSymbol</tt> and <tt>containsSymbol</tt>).<p>
59
* The initial capacity controls a tradeoff between wasted space and the
60
* need for <code>rehash</code> operations, which are time-consuming.
61
* No <code>rehash</code> operations will <i>ever</i> occur if the initial
62
* capacity is greater than the maximum number of entries the
63
* <tt>Hashtable</tt> will contain divided by its load factor. However,
64
* setting the initial capacity too high can waste space.<p>
66
* If many entries are to be made into a <code>SymbolTable</code>,
67
* creating it with a sufficiently large capacity may allow the
68
* entries to be inserted more efficiently than letting it perform
69
* automatic rehashing as needed to grow the table. <p>
85
73
* @author Andy Clark
74
* @author John Kim, IBM
87
* @version $Id: SymbolTable.java,v 1.7 2002/02/07 22:15:09 neilg Exp $
76
* @version $Id: SymbolTable.java 320244 2005-03-14 23:09:35Z mrglavas $
89
78
public class SymbolTable {
103
92
protected Entry[] fBuckets = null;
94
/** actual table size **/
106
95
protected int fTableSize;
97
/** The total number of entries in the hash table. */
98
protected transient int fCount;
100
/** The table is rehashed when its size exceeds this threshold. (The
101
* value of this field is (int)(capacity * loadFactor).) */
102
protected int fThreshold;
104
/** The load factor for the SymbolTable. */
105
protected float fLoadFactor;
112
* Constructs a new, empty SymbolTable with the specified initial
113
* capacity and the specified load factor.
115
* @param initialCapacity the initial capacity of the SymbolTable.
116
* @param loadFactor the load factor of the SymbolTable.
117
* @throws IllegalArgumentException if the initial capacity is less
118
* than zero, or if the load factor is nonpositive.
120
public SymbolTable(int initialCapacity, float loadFactor) {
122
if (initialCapacity < 0) {
123
throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity);
126
if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
127
throw new IllegalArgumentException("Illegal Load: " + loadFactor);
130
if (initialCapacity == 0) {
134
fLoadFactor = loadFactor;
135
fTableSize = initialCapacity;
136
fBuckets = new Entry[fTableSize];
137
fThreshold = (int)(fTableSize * loadFactor);
112
/** Constructs a symbol table with a default number of buckets. */
142
* Constructs a new, empty SymbolTable with the specified initial capacity
143
* and default load factor, which is <tt>0.75</tt>.
145
* @param initialCapacity the initial capacity of the hashtable.
146
* @throws IllegalArgumentException if the initial capacity is less
149
public SymbolTable(int initialCapacity) {
150
this(initialCapacity, 0.75f);
154
* Constructs a new, empty SymbolTable with a default initial capacity (101)
155
* and load factor, which is <tt>0.75</tt>.
113
157
public SymbolTable() {
117
/** Constructs a symbol table with a specified number of buckets. */
118
public SymbolTable(int tableSize) {
119
fTableSize = tableSize;
120
fBuckets = new Entry[fTableSize];
158
this(TABLE_SIZE, 0.75f);
133
171
* @param symbol The new symbol.
135
173
public String addSymbol(String symbol) {
137
175
// search for identical symbol
138
176
int bucket = hash(symbol) % fTableSize;
139
int length = symbol.length();
140
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
141
if (length == entry.characters.length) {
142
for (int i = 0; i < length; i++) {
143
if (symbol.charAt(i) != entry.characters[i]) {
177
for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
178
if (entry.symbol.equals(symbol)) {
147
179
return entry.symbol;
183
if (fCount >= fThreshold) {
184
// Rehash the table if the threshold is exceeded
186
bucket = hash(symbol) % fTableSize;
151
189
// create new entry
152
190
Entry entry = new Entry(symbol, fBuckets[bucket]);
153
191
fBuckets[bucket] = entry;
154
193
return entry.symbol;
156
195
} // addSymbol(String):String
220
259
public int hash(char[] buffer, int offset, int length) {
223
for (int i = 0; i < length; i++) {
224
code = code * 37 + buffer[offset + i];
262
for (int i = 0; i < length; ++i) {
263
code = code * 31 + buffer[offset + i];
226
265
return code & 0x7FFFFFF;
228
267
} // hash(char[],int,int):int
270
* Increases the capacity of and internally reorganizes this
271
* SymbolTable, in order to accommodate and access its entries more
272
* efficiently. This method is called automatically when the
273
* number of keys in the SymbolTable exceeds this hashtable's capacity
276
protected void rehash() {
278
int oldCapacity = fBuckets.length;
279
Entry[] oldTable = fBuckets;
281
int newCapacity = oldCapacity * 2 + 1;
282
Entry[] newTable = new Entry[newCapacity];
284
fThreshold = (int)(newCapacity * fLoadFactor);
286
fTableSize = fBuckets.length;
288
for (int i = oldCapacity ; i-- > 0 ;) {
289
for (Entry old = oldTable[i] ; old != null ; ) {
293
int index = hash(e.characters, 0, e.characters.length) % newCapacity;
294
e.next = newTable[index];
231
301
* Returns true if the symbol table already contains the specified