1
package org.apache.lucene.analysis.synonym;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.io.LineNumberReader;
22
import java.io.Reader;
23
import java.text.ParseException;
25
import org.apache.lucene.analysis.Analyzer;
26
import org.apache.lucene.util.CharsRef;
29
* Parser for wordnet prolog format
31
* See http://wordnet.princeton.edu/man/prologdb.5WN.html for a description of the format.
32
* @lucene.experimental
34
// TODO: allow you to specify syntactic categories (e.g. just nouns, etc)
35
public class WordnetSynonymParser extends SynonymMap.Builder {
36
private final boolean expand;
37
private final Analyzer analyzer;
39
public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
42
this.analyzer = analyzer;
45
public void add(Reader in) throws IOException, ParseException {
46
LineNumberReader br = new LineNumberReader(in);
49
String lastSynSetID = "";
50
CharsRef synset[] = new CharsRef[8];
53
while ((line = br.readLine()) != null) {
54
String synSetID = line.substring(2, 11);
56
if (!synSetID.equals(lastSynSetID)) {
57
addInternal(synset, synsetSize);
61
if (synset.length <= synsetSize+1) {
62
CharsRef larger[] = new CharsRef[synset.length * 2];
63
System.arraycopy(synset, 0, larger, 0, synsetSize);
67
synset[synsetSize] = parseSynonym(line, synset[synsetSize]);
69
lastSynSetID = synSetID;
72
// final synset in the file
73
addInternal(synset, synsetSize);
74
} catch (IllegalArgumentException e) {
75
ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
83
private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
85
reuse = new CharsRef(8);
88
int start = line.indexOf('\'')+1;
89
int end = line.lastIndexOf('\'');
91
String text = line.substring(start, end).replace("''", "'");
92
return analyze(analyzer, text, reuse);
95
private void addInternal(CharsRef synset[], int size) throws IOException {
97
return; // nothing to do
101
for (int i = 0; i < size; i++) {
102
for (int j = 0; j < size; j++) {
103
add(synset[i], synset[j], false);
107
for (int i = 0; i < size; i++) {
108
add(synset[i], synset[0], false);