1
package com.lddubeau.ddb;
3
import java.io.BufferedReader;
4
import java.io.IOException;
5
import java.io.InputStreamReader;
6
import java.net.HttpURLConnection;
7
import java.net.MalformedURLException;
9
import java.util.HashSet;
10
import java.util.Hashtable;
12
import java.util.regex.Pattern;
13
import java.util.zip.GZIPInputStream;
16
* This class is designed to model simple indices that some web dictionaries
17
* export. These indices are expected to contain only a list of the words
18
* present in the dictionary. Internally, the class caches the indices so if two
19
* Index objects are created with the same URL, only one data structure exists
20
* internally. The index object itself merely knows what its URL is.?
24
public final class Index
26
private final URL url;
29
* This constructor creates a new Index object which allows to check whether
30
* a word exists in an index.
33
* The URL where the index is located on the web.
34
* @throws MalformedURLException
35
* When the URL is incorrect.
37
public Index(String url) throws MalformedURLException
41
throw new NullPointerException("url is null");
43
this.url = new URL(url);
47
* This method verifies whether a term exists in the index.
51
* @return True if the term is present in the index, false if not.
53
public boolean exists(String term)
57
throw new NullPointerException("term is null");
59
return exists(this.url, term);
63
* This method returns the length of the longest term in the index.
66
public int getLongestTermLength()
68
return getLongestTermLength(this.url);
71
private static final class DatedSet
73
public final long date;
75
public final Set<String> set;
77
public final int longest;
79
public DatedSet(long date, Set<String> set, int longest)
83
this.longest = longest;
87
private static Hashtable<String, DatedSet> indices = new Hashtable<String, DatedSet>();
89
private static boolean exists(URL url, String term)
91
return getDatedSet(url).set.contains(term);
94
private static int getLongestTermLength(URL url)
96
return getDatedSet(url).longest;
99
private static DatedSet getDatedSet(URL url)
101
String url_str = url.toString().intern();
103
* Although there is no support for concurrent access of Index objects,
104
* we need to synchronize at this point. This is required because
105
* multiple libraries which do not talk to each other could be using
106
* this code simultaneously. If library A access an Index object with
107
* URL U at the same time library B access its own Index object with URL
108
* U then, because the two URLs are the same, there is a risk of
109
* concurrent access here.
111
synchronized (indices)
113
DatedSet ds = indices.get(url_str);
118
indices.put(url_str, ds);
125
private static final Pattern head_clean_re = Pattern.compile("<.*?>");
127
private static DatedSet loadIndex(URL url)
131
Set<String> ret = null;
133
HttpURLConnection conn;
136
conn = (HttpURLConnection) url.openConnection();
140
if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
142
date = conn.getDate();
143
BufferedReader reader = new BufferedReader(
144
new InputStreamReader(new GZIPInputStream(conn
145
.getInputStream())));
146
String line = reader.readLine();
147
ret = new HashSet<String>();
150
line = head_clean_re.matcher(line).replaceAll("").intern();
152
if (line.length() > longest)
154
longest = line.length();
156
line = reader.readLine();
161
throw new IOException("bas response code ("
162
+ conn.getResponseCode() + ")when trying to contact: "
166
catch (IOException e)
171
return new DatedSet(date, ret, longest);
174
public static String getVersion()
176
return LastBuild.getVersion();
179
public static String getBuildTime()
181
return LastBuild.getTime();
184
public static int getMajor()
186
return LastBuild.getMajor();
189
public static int getMinor()
191
return LastBuild.getMinor();
194
public static String getVersions()
196
return "ddb-lib version: " + getVersion();
199
public static void main(String [] argv)
203
loadIndex(new URL(argv[0]));