~ldd/java-webdict-library/trunk

« back to all changes in this revision

Viewing changes to jsrc/com/lddubeau/ddb/Index.java

  • Committer: ldd at lddubeau
  • Date: 2008-06-28 00:04:54 UTC
  • Revision ID: ldd@lddubeau.com-20080628000454-mr97fwz910m65jnv
Initial.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
package com.lddubeau.ddb;
 
2
 
 
3
import java.io.BufferedReader;
 
4
import java.io.IOException;
 
5
import java.io.InputStreamReader;
 
6
import java.net.HttpURLConnection;
 
7
import java.net.MalformedURLException;
 
8
import java.net.URL;
 
9
import java.util.HashSet;
 
10
import java.util.Hashtable;
 
11
import java.util.Set;
 
12
import java.util.regex.Pattern;
 
13
import java.util.zip.GZIPInputStream;
 
14
 
 
15
/**
 
16
 * This class is designed to model simple indices that some web dictionaries
 
17
 * export. These indices are expected to contain only a list of the words
 
18
 * present in the dictionary. Internally, the class caches the indices so if two
 
19
 * Index objects are created with the same URL, only one data structure exists
 
20
 * internally. The index object itself merely knows what its URL is.?
 
21
 * 
 
22
 * @author ldd
 
23
 */
 
24
public final class Index
 
25
{
 
26
        private final URL url;
 
27
 
 
28
        /**
 
29
         * This constructor creates a new Index object which allows to check whether
 
30
         * a word exists in an index.
 
31
         * 
 
32
         * @param url
 
33
         *            The URL where the index is located on the web.
 
34
         * @throws MalformedURLException
 
35
         *             When the URL is incorrect.
 
36
         */
 
37
        public Index(String url) throws MalformedURLException
 
38
        {
 
39
                if (url == null)
 
40
                {
 
41
                        throw new NullPointerException("url is null");
 
42
                }
 
43
                this.url = new URL(url);
 
44
        }
 
45
 
 
46
        /**
 
47
         * This method verifies whether a term exists in the index.
 
48
         * 
 
49
         * @param term
 
50
         *            The term to lookup.
 
51
         * @return True if the term is present in the index, false if not.
 
52
         */
 
53
        public boolean exists(String term)
 
54
        {
 
55
                if (term == null)
 
56
                {
 
57
                        throw new NullPointerException("term is null");
 
58
                }
 
59
                return exists(this.url, term);
 
60
        }
 
61
        
 
62
        /**
 
63
         * This method returns the length of the longest term in the index.
 
64
         * @return The length.
 
65
         */
 
66
        public int getLongestTermLength()
 
67
        {
 
68
                return getLongestTermLength(this.url);
 
69
        }
 
70
 
 
71
        private static final class DatedSet
 
72
        {
 
73
                public final long date;
 
74
 
 
75
                public final Set<String> set;
 
76
                
 
77
                public final int longest;
 
78
 
 
79
                public DatedSet(long date, Set<String> set, int longest)
 
80
                {
 
81
                        this.date = date;
 
82
                        this.set = set;
 
83
                        this.longest = longest;
 
84
                }
 
85
        }
 
86
 
 
87
        private static Hashtable<String, DatedSet> indices = new Hashtable<String, DatedSet>();
 
88
 
 
89
        private static boolean exists(URL url, String term)
 
90
        {
 
91
                return getDatedSet(url).set.contains(term);
 
92
        }
 
93
        
 
94
        private static int getLongestTermLength(URL url)
 
95
        {
 
96
                return getDatedSet(url).longest;
 
97
        }
 
98
 
 
99
        private static DatedSet getDatedSet(URL url)
 
100
        {
 
101
                String url_str = url.toString().intern();
 
102
                /*
 
103
                 * Although there is no support for concurrent access of Index objects,
 
104
                 * we need to synchronize at this point. This is required because
 
105
                 * multiple libraries which do not talk to each other could be using
 
106
                 * this code simultaneously. If library A access an Index object with
 
107
                 * URL U at the same time library B access its own Index object with URL
 
108
                 * U then, because the two URLs are the same, there is a risk of
 
109
                 * concurrent access here.
 
110
                 */
 
111
                synchronized (indices)
 
112
                {
 
113
                        DatedSet ds = indices.get(url_str);
 
114
 
 
115
                        if (ds == null)
 
116
                        {
 
117
                                ds = loadIndex(url);
 
118
                                indices.put(url_str, ds);
 
119
                        }
 
120
 
 
121
                        return ds;
 
122
                }
 
123
        }
 
124
        
 
125
        private static final Pattern head_clean_re = Pattern.compile("<.*?>");
 
126
 
 
127
        private static DatedSet loadIndex(URL url)
 
128
        {
 
129
                int longest = 0;
 
130
                long date;
 
131
                Set<String> ret = null;
 
132
 
 
133
                HttpURLConnection conn;
 
134
                try
 
135
                {
 
136
                        conn = (HttpURLConnection) url.openConnection();
 
137
 
 
138
                        conn.connect();
 
139
 
 
140
                        if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
 
141
                        {
 
142
                                date = conn.getDate();
 
143
                                BufferedReader reader = new BufferedReader(
 
144
                                                new InputStreamReader(new GZIPInputStream(conn
 
145
                                                                .getInputStream())));
 
146
                                String line = reader.readLine();
 
147
                                ret = new HashSet<String>();
 
148
                                while (line != null)
 
149
                                {
 
150
                                        line = head_clean_re.matcher(line).replaceAll("").intern();
 
151
                                        ret.add(line);
 
152
                                        if (line.length() > longest)
 
153
                                        {
 
154
                                                longest = line.length();
 
155
                                        }
 
156
                                        line = reader.readLine();
 
157
                                }
 
158
                        }
 
159
                        else
 
160
                        {
 
161
                                throw new IOException("bas response code ("
 
162
                                                + conn.getResponseCode() + ")when trying to contact: "
 
163
                                                + url);
 
164
                        }
 
165
                }
 
166
                catch (IOException e)
 
167
                {
 
168
                        throw new Error(e);
 
169
                }
 
170
 
 
171
                return new DatedSet(date, ret, longest);
 
172
        }
 
173
        
 
174
        public static String getVersion()
 
175
        {
 
176
                return LastBuild.getVersion();
 
177
        }
 
178
        
 
179
        public static String getBuildTime()
 
180
        {
 
181
                return LastBuild.getTime();
 
182
        }
 
183
        
 
184
        public static int getMajor()
 
185
        {
 
186
                return LastBuild.getMajor();
 
187
        }
 
188
        
 
189
        public static int getMinor()
 
190
        {
 
191
                return LastBuild.getMinor();
 
192
        }
 
193
        
 
194
        public static String getVersions()
 
195
        {
 
196
                return "ddb-lib version: " + getVersion();
 
197
        }
 
198
 
 
199
        public static void main(String [] argv)
 
200
        {
 
201
                try
 
202
                {
 
203
                        loadIndex(new URL(argv[0]));
 
204
                }
 
205
                catch (Exception e)
 
206
                {
 
207
                        throw new Error(e);
 
208
                }
 
209
        }
 
210
}