提交 a542bbc2 编写于 作者: weixin_43283383's avatar weixin_43283383

lowercased before check the some dict

上级 6480991b
...@@ -35,7 +35,7 @@ https://github.com/medcl/elasticsearch-analysis-ik/blob/master/config/ik/IKAnaly ...@@ -35,7 +35,7 @@ https://github.com/medcl/elasticsearch-analysis-ik/blob/master/config/ik/IKAnaly
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties> <properties>
<comment>IK Analyzer 扩展配置</comment> <comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 --> <!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry> <entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典--> <!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">custom/ext_stopword.dic</entry> <entry key="ext_stopwords">custom/ext_stopword.dic</entry>
...@@ -197,4 +197,9 @@ here is the query result ...@@ -197,4 +197,9 @@ here is the query result
</pre> </pre>
have fun. have fun.
\ No newline at end of file
常见问题:
1.自定义词典为什么没有生效?
清确保你的扩展词典的文本格式为UTF8编码
\ No newline at end of file
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>org.elasticsearch</groupId> <groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-ik</artifactId> <artifactId>elasticsearch-analysis-ik</artifactId>
<version>1.2.3</version> <version>1.2.4</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<description>IK Analyzer for ElasticSearch</description> <description>IK Analyzer for ElasticSearch</description>
<inceptionYear>2009</inceptionYear> <inceptionYear>2009</inceptionYear>
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
</parent> </parent>
<properties> <properties>
<elasticsearch.version>0.90.6</elasticsearch.version> <elasticsearch.version>0.90.2</elasticsearch.version>
</properties> </properties>
<repositories> <repositories>
......
...@@ -25,14 +25,14 @@ ...@@ -25,14 +25,14 @@
*/ */
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;
import java.io.*;
import java.util.Collection;
import java.util.List;
import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.cfg.Configuration;
import java.io.*;
import java.util.Collection;
import java.util.List;
/** /**
* 词典管理类,单子模式 * 词典管理类,单子模式
*/ */
...@@ -152,7 +152,7 @@ public class Dictionary { ...@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInMainDict(char[] charArray , int begin, int length){ public Hit matchInMainDict(char[] charArray , int begin, int length){
return singleton._MainDict.match(charArray, begin, length); return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
} }
/** /**
...@@ -160,7 +160,7 @@ public class Dictionary { ...@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
return singleton._QuantifierDict.match(charArray, begin, length); return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
} }
...@@ -179,7 +179,7 @@ public class Dictionary { ...@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean * @return boolean
*/ */
public boolean isStopWord(char[] charArray , int begin, int length){ public boolean isStopWord(char[] charArray , int begin, int length){
return singleton._StopWords.match(charArray, begin, length).isMatch(); return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch();
} }
/** /**
...@@ -253,7 +253,7 @@ public class Dictionary { ...@@ -253,7 +253,7 @@ public class Dictionary {
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 //加载扩展词典数据到主内存词典中
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册