~slub.team/goobi-indexserver/3.x

* <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>

* <filter class="solr.LowerCaseFilterFactory"/>

* <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>

* </analyzer>

* <analyzer type="query">

* <tokenizer class="solr.WhitespaceTokenizerFactory"/>

* <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

* <filter class="solr.StopFilterFactory" ignoreCase="true"/>

* <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>

* <filter class="solr.LowerCaseFilterFactory"/>

* <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>

* </analyzer>

* </fieldtype>

* </pre>

public final class HyphenatedWordsFilter extends TokenFilter {

private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);

private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);

private final StringBuilder hyphenated = new StringBuilder();

private State savedState;

private boolean exhausted = false;

/**

* Creates a new HyphenatedWordsFilter

* @param in TokenStream that will be filtered

public HyphenatedWordsFilter(TokenStream in) {

super(in);

}

/**

* {@inheritDoc}

@Override

public boolean incrementToken() throws IOException {

while (!exhausted && input.incrementToken()) {

char[] term = termAttribute.buffer();

int termLength = termAttribute.length();

if (termLength > 0 && term[termLength - 1] == '-') {

// a hyphenated word

// capture the state of the first token only

if (savedState == null) {

savedState = captureState();

}

hyphenated.append(term, 0, termLength - 1);

} else if (savedState == null) {

// not part of a hyphenated word.

return true;

} else {

// the final portion of a hyphenated word

hyphenated.append(term, 0, termLength);

unhyphenate();

return true;

}

100

exhausted = true;

101

102

if (savedState != null) {

103

// the final term ends with a hyphen

104

// add back the hyphen, for backwards compatibility.

105

hyphenated.append('-');

106

unhyphenate();

107

return true;

108

}

109

110

return false;

111

}

112

113

/**

114

* {@inheritDoc}

115

116

@Override

117

public void reset() throws IOException {

118

super.reset();

119

hyphenated.setLength(0);

120

savedState = null;

121

exhausted = false;

122

}

123

124

// ================================================= Helper Methods ================================================

125

126

/**

127

* Writes the joined unhyphenated term

128

129

private void unhyphenate() {

130

int endOffset = offsetAttribute.endOffset();

131

132

restoreState(savedState);

133

savedState = null;

134

135

char term[] = termAttribute.buffer();

136

int length = hyphenated.length();

137

if (length > termAttribute.length()) {

138

term = termAttribute.resizeBuffer(length);

139

}

140

141

hyphenated.getChars(0, length, term, 0);

142

termAttribute.setLength(length);

143

offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);

144

hyphenated.setLength(0);

145

}

146

}

Older »