2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.analysis;
21
import java.io.IOException;
22
import java.util.ArrayList;
23
import java.util.List;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
27
import org.apache.lucene.analysis.CharStream;
28
import org.apache.lucene.analysis.MappingCharFilter;
29
import org.apache.lucene.analysis.NormalizeCharMap;
30
import org.apache.solr.common.ResourceLoader;
31
import org.apache.solr.common.util.StrUtils;
32
import org.apache.solr.util.plugin.ResourceLoaderAware;
35
* Factory for {@link MappingCharFilter}.
36
* <pre class="prettyprint" >
37
* <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
39
* <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
40
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
42
* </fieldType></pre>
44
* @version $Id: MappingCharFilterFactory.java 1073810 2011-02-23 16:27:55Z koji $
48
public class MappingCharFilterFactory extends BaseCharFilterFactory implements
51
protected NormalizeCharMap normMap;
52
private String mapping;
54
public void inform(ResourceLoader loader) {
55
mapping = args.get( "mapping" );
57
if( mapping != null ){
58
List<String> wlist = null;
60
File mappingFile = new File( mapping );
61
if( mappingFile.exists() ){
62
wlist = loader.getLines( mapping );
65
List<String> files = StrUtils.splitFileNames( mapping );
66
wlist = new ArrayList<String>();
67
for( String file : files ){
68
List<String> lines = loader.getLines( file.trim() );
69
wlist.addAll( lines );
73
catch( IOException e ){
74
throw new RuntimeException( e );
76
normMap = new NormalizeCharMap();
77
parseRules( wlist, normMap );
81
public CharStream create(CharStream input) {
82
return new MappingCharFilter(normMap,input);
85
// "source" => "target"
86
static Pattern p = Pattern.compile( "\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$" );
88
protected void parseRules( List<String> rules, NormalizeCharMap normMap ){
89
for( String rule : rules ){
90
Matcher m = p.matcher( rule );
92
throw new RuntimeException( "Invalid Mapping Rule : [" + rule + "], file = " + mapping );
93
normMap.add( parseString( m.group( 1 ) ), parseString( m.group( 2 ) ) );
97
char[] out = new char[256];
99
protected String parseString( String s ){
101
int len = s.length();
103
while( readPos < len ){
104
char c = s.charAt( readPos++ );
107
throw new RuntimeException( "Invalid escaped char in [" + s + "]" );
108
c = s.charAt( readPos++ );
110
case '\\' : c = '\\'; break;
111
case '"' : c = '"'; break;
112
case 'n' : c = '\n'; break;
113
case 't' : c = '\t'; break;
114
case 'r' : c = '\r'; break;
115
case 'b' : c = '\b'; break;
116
case 'f' : c = '\f'; break;
118
if( readPos + 3 >= len )
119
throw new RuntimeException( "Invalid escaped char in [" + s + "]" );
120
c = (char)Integer.parseInt( s.substring( readPos, readPos + 4 ), 16 );
127
return new String( out, 0, writePos );