diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 78a0aaa1993b2a7926f3cf57b41dadc1195ed587..76af79c19fedd87afdbeab69ad80f1530700e3ce 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -1,7 +1,7 @@ /** * IK 中文分词 版本 5.0 * IK Analyzer release 5.0 - * + * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,8 +20,8 @@ * 源代码由林良益(linliangyi2005@gmail.com)提供 * 版权声明 2012,乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio - * - * + * + * */ package org.wltea.analyzer.dic; @@ -62,37 +62,37 @@ public class Dictionary { */ private static Dictionary singleton; - private DictSegment _MainDict; + private DictSegment _MainDict; - private DictSegment _SurnameDict; + private DictSegment _SurnameDict; - private DictSegment _QuantifierDict; + private DictSegment _QuantifierDict; - private DictSegment _SuffixDict; + private DictSegment _SuffixDict; - private DictSegment _PrepDict; + private DictSegment _PrepDict; + + private DictSegment _StopWords; - private DictSegment _StopWords; - /** * 配置对象 */ private Configuration configuration; - public static final ESLogger logger=Loggers.getLogger("ik-analyzer"); - - private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); - - public static final String PATH_DIC_MAIN = "ik/main.dic"; - public static final String PATH_DIC_SURNAME = "ik/surname.dic"; - public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic"; - public static final String PATH_DIC_SUFFIX = "ik/suffix.dic"; - public static final String PATH_DIC_PREP = "ik/preposition.dic"; - public static final String PATH_DIC_STOP = "ik/stopword.dic"; - - private Dictionary(){ - - } + public static ESLogger logger=Loggers.getLogger("ik-analyzer"); + + private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); + + public static final String PATH_DIC_MAIN = "ik/main.dic"; + public static final String PATH_DIC_SURNAME = "ik/surname.dic"; + public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic"; + public static final String PATH_DIC_SUFFIX = "ik/suffix.dic"; + public static final String PATH_DIC_PREP = "ik/preposition.dic"; + public static final String PATH_DIC_STOP = "ik/stopword.dic"; + + private Dictionary(){ + + } /** * 词典初始化 @@ -103,33 +103,34 @@ public class Dictionary { * @return Dictionary */ public static synchronized Dictionary initial(Configuration cfg){ - - synchronized(Dictionary.class){ - if(singleton == null){ - singleton = new Dictionary(); - singleton.configuration=cfg; - singleton.loadMainDict(); - singleton.loadSurnameDict(); - singleton.loadQuantifierDict(); - singleton.loadSuffixDict(); - singleton.loadPrepDict(); - singleton.loadStopWordDict(); - - //建立监控线程 - for(String location:cfg.getRemoteExtDictionarys()){ - //10 秒是初始延迟可以修改的 60是间隔时间 单位秒 - pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); - } - for(String location:cfg.getRemoteExtStopWordDictionarys()){ - pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); + if(singleton == null){ + synchronized(Dictionary.class){ + if(singleton == null){ + singleton = new Dictionary(); + singleton.configuration=cfg; + singleton.loadMainDict(); + singleton.loadSurnameDict(); + singleton.loadQuantifierDict(); + singleton.loadSuffixDict(); + singleton.loadPrepDict(); + singleton.loadStopWordDict(); + + //建立监控线程 + for(String location:cfg.getRemoteExtDictionarys()){ + //10 秒是初始延迟可以修改的 60是间隔时间 单位秒 + pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); + } + for(String location:cfg.getRemoteExtStopWordDictionarys()){ + pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); + } + + return singleton; } - - return singleton; } } return singleton; } - + /** * 获取词典单子实例 * @return Dictionary 单例对象 @@ -140,7 +141,7 @@ public class Dictionary { } return singleton; } - + /** * 批量加载新词条 * @param words Collection词条列表 @@ -155,7 +156,7 @@ public class Dictionary { } } } - + /** * 批量移除(屏蔽)词条 */ @@ -169,7 +170,7 @@ public class Dictionary { } } } - + /** * 检索匹配主词典 * @return Hit 匹配结果描述 @@ -177,15 +178,15 @@ public class Dictionary { public Hit matchInMainDict(char[] charArray){ return singleton._MainDict.match(charArray); } - + /** * 检索匹配主词典 * @return Hit 匹配结果描述 */ public Hit matchInMainDict(char[] charArray , int begin, int length){ - return singleton._MainDict.match(charArray, begin, length); + return singleton._MainDict.match(charArray, begin, length); } - + /** * 检索匹配量词词典 * @return Hit 匹配结果描述 @@ -193,8 +194,8 @@ public class Dictionary { public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ return singleton._QuantifierDict.match(charArray, begin, length); } - - + + /** * 从已匹配的Hit中直接取出DictSegment,继续向下匹配 * @return Hit @@ -203,16 +204,16 @@ public class Dictionary { DictSegment ds = matchedHit.getMatchedDictSegment(); return ds.match(charArray, currentIndex, 1 , matchedHit); } - - + + /** * 判断是否是停止词 * @return boolean */ - public boolean isStopWord(char[] charArray , int begin, int length){ + public boolean isStopWord(char[] charArray , int begin, int length){ return singleton._StopWords.match(charArray, begin, length).isMatch(); - } - + } + /** * 加载主词典及扩展词典 */ @@ -223,13 +224,13 @@ public class Dictionary { //读取主词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error(e.getMessage(), e); + } + try { BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); String theWord = null; @@ -239,26 +240,26 @@ public class Dictionary { _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); - + } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer",e); - }finally{ + }finally{ try { if(is != null){ - is.close(); - is = null; + is.close(); + is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer",e); } } //加载扩展词典 this.loadExtDict(); //加载远程自定义词库 this.loadRemoteExtDict(); - } - + } + /** * 加载用户配置的扩展词典到主词库表 */ @@ -269,13 +270,13 @@ public class Dictionary { InputStream is = null; for(String extDictName : extDictFiles){ //读取扩展词典文件 - logger.info("[Dict Loading] " + extDictName); + logger.info("[Dict Loading] " + extDictName); Path file = PathUtils.get(configuration.getDictRoot(), extDictName); - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } //如果找不到扩展的字典,则忽略 if(is == null){ @@ -286,27 +287,29 @@ public class Dictionary { String theWord = null; do { theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { + if (theWord != null && !"".equals(theWord.trim())) { //加载扩展词典数据到主内存词典中 _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); - + } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ + logger.error("ik-analyzer",e); + }finally{ try { - is.close(); - is = null; + if(is != null){ + is.close(); + is = null; + } } catch (IOException e) { - logger.error("ik-analyzer",e); - } + logger.error("ik-analyzer",e); + } } } - } + } } - - + + /** * 加载远程扩展词典到主词库表 */ @@ -315,14 +318,11 @@ public class Dictionary { for(String location:remoteExtDictFiles){ logger.info("[Dict Loading] " + location); List lists = getRemoteWords(location); - - /** Redundant Nullcheck as the list is initialized in the getRemoteWords method //如果找不到扩展的字典,则忽略 if(lists == null){ logger.error("[Dict Loading] "+location+"加载失败"); continue; - }*/ - + } for(String theWord:lists){ if (theWord != null && !"".equals(theWord.trim())) { //加载扩展词典数据到主内存词典中 @@ -331,14 +331,14 @@ public class Dictionary { } } } - + } - + /** * 从远程服务器上下载自定义词条 */ private static List getRemoteWords(String location){ - + List buffer = new ArrayList(); RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000) .setConnectTimeout(10*1000).setSocketTimeout(60*1000).build(); @@ -350,7 +350,7 @@ public class Dictionary { try { response = httpclient.execute(get); if(response.getStatusLine().getStatusCode()==200){ - + String charset = "UTF-8"; //获取编码,默认为utf-8 if(response.getEntity().getContentType().getValue().contains("charset=")){ @@ -376,49 +376,49 @@ public class Dictionary { } return buffer; } - - - + + + /** * 加载用户扩展的停止词词典 */ private void loadStopWordDict(){ //建立主词典实例 - _StopWords = new DictSegment((char)0); + _StopWords = new DictSegment((char)0); - //读取主词典文件 + //读取主词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - - try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); - String theWord = null; - do { - theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { - _StopWords.fillSegment(theWord.trim().toCharArray()); - } - } while (theWord != null); - - } catch (IOException e) { - logger.error("ik-analyzer",e); - - }finally{ - try { - if(is != null){ - is.close(); - is = null; - } - } catch (IOException e) { - logger.error("ik-analyzer",e); - } - } + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error(e.getMessage(), e); + } + + try { + BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + String theWord = null; + do { + theWord = br.readLine(); + if (theWord != null && !"".equals(theWord.trim())) { + _StopWords.fillSegment(theWord.trim().toCharArray()); + } + } while (theWord != null); + + } catch (IOException e) { + logger.error("ik-analyzer",e); + + }finally{ + try { + if(is != null){ + is.close(); + is = null; + } + } catch (IOException e) { + logger.error("ik-analyzer",e); + } + } //加载扩展停止词典 @@ -426,15 +426,15 @@ public class Dictionary { if(extStopWordDictFiles != null){ is = null; for(String extStopWordDictName : extStopWordDictFiles){ - logger.info("[Dict Loading] " + extStopWordDictName); - - //读取扩展词典文件 - file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName); - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } + logger.info("[Dict Loading] " + extStopWordDictName); + + //读取扩展词典文件 + file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName); + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } //如果找不到扩展的字典,则忽略 if(is == null){ continue; @@ -446,37 +446,36 @@ public class Dictionary { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { //加载扩展停止词典数据到内存中 - _StopWords.fillSegment(theWord.trim().toCharArray()); + _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); - + } catch (IOException e) { - logger.error("ik-analyzer",e); - + logger.error("ik-analyzer",e); + }finally{ try { - is.close(); - is = null; + if(is != null){ + is.close(); + is = null; + } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer",e); } } } } - + //加载远程停用词典 List remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys(); for(String location:remoteExtStopWordDictFiles){ logger.info("[Dict Loading] " + location); List lists = getRemoteWords(location); - - /** Redundant Nullcheck as the list is initialized in the getRemoteWords method //如果找不到扩展的字典,则忽略 if(lists == null){ logger.error("[Dict Loading] "+location+"加载失败"); continue; - }*/ - + } for(String theWord:lists){ if (theWord != null && !"".equals(theWord.trim())) { //加载远程词典数据到主内存中 @@ -485,10 +484,10 @@ public class Dictionary { } } } - - + + } - + /** * 加载量词词典 */ @@ -497,12 +496,12 @@ public class Dictionary { _QuantifierDict = new DictSegment((char)0); //读取量词词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } try { BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); String theWord = null; @@ -512,132 +511,134 @@ public class Dictionary { _QuantifierDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); - + } catch (IOException ioe) { logger.error("Quantifier Dictionary loading exception."); - + }finally{ try { if(is != null){ - is.close(); - is = null; + is.close(); + is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer",e); } } } - private void loadSurnameDict(){ + private void loadSurnameDict(){ - _SurnameDict = new DictSegment((char)0); + _SurnameDict = new DictSegment((char)0); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } - if(is == null){ - throw new RuntimeException("Surname Dictionary not found!!!"); - } - try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); - String theWord; - do { - theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { - _SurnameDict.fillSegment(theWord.trim().toCharArray()); - } - } while (theWord != null); - } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ - try { + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } + if(is == null){ + throw new RuntimeException("Surname Dictionary not found!!!"); + } + try { + BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + String theWord; + do { + theWord = br.readLine(); + if (theWord != null && !"".equals(theWord.trim())) { + _SurnameDict.fillSegment(theWord.trim().toCharArray()); + } + } while (theWord != null); + } catch (IOException e) { + logger.error("ik-analyzer",e); + }finally{ + try { + if(is != null){ + is.close(); + is = null; + } + } catch (IOException e) { + logger.error("ik-analyzer",e); + } + } + } + + + private void loadSuffixDict(){ + + _SuffixDict = new DictSegment((char)0); + Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX); + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } + if(is == null){ + throw new RuntimeException("Suffix Dictionary not found!!!"); + } + try { + + BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + String theWord; + do { + theWord = br.readLine(); + if (theWord != null && !"".equals(theWord.trim())) { + _SuffixDict.fillSegment(theWord.trim().toCharArray()); + } + } while (theWord != null); + } catch (IOException e) { + logger.error("ik-analyzer",e); + }finally{ + try { is.close(); is = null; - } catch (IOException e) { - logger.error("ik-analyzer",e); - } - } - } + } catch (IOException e) { + logger.error("ik-analyzer",e); + } + } + } - private void loadSuffixDict(){ + private void loadPrepDict(){ - _SuffixDict = new DictSegment((char)0); - Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } - if(is == null){ - throw new RuntimeException("Suffix Dictionary not found!!!"); - } - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); - String theWord; - do { - theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { - _SuffixDict.fillSegment(theWord.trim().toCharArray()); - } - } while (theWord != null); - } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ - try { - is.close(); - is = null; - } catch (IOException e) { - logger.error("ik-analyzer",e); - } - } - } - - - private void loadPrepDict(){ - - _PrepDict = new DictSegment((char)0); + _PrepDict = new DictSegment((char)0); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); - InputStream is = null; - try { - is = new FileInputStream(file.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } - if(is == null){ - throw new RuntimeException("Preposition Dictionary not found!!!"); - } - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); - String theWord; - do { - theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { - - _PrepDict.fillSegment(theWord.trim().toCharArray()); - } - } while (theWord != null); - } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ - try { - is.close(); - is = null; - } catch (IOException e) { - logger.error("ik-analyzer",e); - } - } - } - - public void reLoadMainDict(){ - logger.info("重新加载词典..."); + InputStream is = null; + try { + is = new FileInputStream(file.toFile()); + } catch (FileNotFoundException e) { + logger.error("ik-analyzer",e); + } + if(is == null){ + throw new RuntimeException("Preposition Dictionary not found!!!"); + } + try { + + BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + String theWord; + do { + theWord = br.readLine(); + if (theWord != null && !"".equals(theWord.trim())) { + + _PrepDict.fillSegment(theWord.trim().toCharArray()); + } + } while (theWord != null); + } catch (IOException e) { + logger.error("ik-analyzer",e); + }finally{ + try { + is.close(); + is = null; + } catch (IOException e) { + logger.error("ik-analyzer",e); + } + } + } + + public void reLoadMainDict(){ + logger.info("重新加载词典..."); // 新开一个实例加载词典,减少加载过程对当前词典使用的影响 Dictionary tmpDict = new Dictionary(); tmpDict.configuration = getSingleton().configuration; @@ -646,6 +647,6 @@ public class Dictionary { _MainDict = tmpDict._MainDict; _StopWords = tmpDict._StopWords; logger.info("重新加载词典完毕..."); - } - -} + } + +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/dic/Monitor.java b/src/main/java/org/wltea/analyzer/dic/Monitor.java index 64a90302dc7e6afb4dac57619647c82ce5ea03a9..4e0163f798a89e081f57b0bb02cf5dce37cc7357 100644 --- a/src/main/java/org/wltea/analyzer/dic/Monitor.java +++ b/src/main/java/org/wltea/analyzer/dic/Monitor.java @@ -7,9 +7,13 @@ import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpHead; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; public class Monitor implements Runnable { + public static ESLogger logger= Loggers.getLogger("ik-analyzer"); + private static CloseableHttpClient httpclient = HttpClients.createDefault(); /* * 上次更改时间 @@ -19,12 +23,12 @@ public class Monitor implements Runnable { * 资源属性 */ private String eTags; - + /* * 请求地址 */ - private String location; - + private String location; + public Monitor(String location) { this.location = location; this.last_modified = null; @@ -38,16 +42,16 @@ public class Monitor implements Runnable { * ④如果有变化,重新加载词典 * ⑤休眠1min,返回第①步 */ - + public void run() { //超时设置 RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000) .setConnectTimeout(10*1000).setSocketTimeout(15*1000).build(); - + HttpHead head = new HttpHead(location); head.setConfig(rc); - + //设置请求头 if (last_modified != null) { head.setHeader("If-Modified-Since", last_modified); @@ -55,17 +59,17 @@ public class Monitor implements Runnable { if (eTags != null) { head.setHeader("If-None-Match", eTags); } - + CloseableHttpResponse response = null; try { - + response = httpclient.execute(head); - + //返回200 才做操作 if(response.getStatusLine().getStatusCode()==200){ - + if (!response.getLastHeader("Last-Modified").getValue().equalsIgnoreCase(last_modified) - ||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) { + ||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) { // 远程词库有更新,需要重新加载词典,并修改last_modified,eTags Dictionary.getSingleton().reLoadMainDict(); @@ -87,9 +91,9 @@ public class Monitor implements Runnable { response.close(); } } catch (IOException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } } - -} + +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/help/Sleep.java b/src/main/java/org/wltea/analyzer/help/Sleep.java index 06ac4602bd234c19fb4ebad0f666e37d38236d0c..0954c15cc2f8bd0dd50b6e223055a3017755e683 100644 --- a/src/main/java/org/wltea/analyzer/help/Sleep.java +++ b/src/main/java/org/wltea/analyzer/help/Sleep.java @@ -5,8 +5,8 @@ import org.elasticsearch.common.logging.Loggers; public class Sleep { - public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); - + public static ESLogger logger= Loggers.getLogger("ik-analyzer"); + public enum Type{MSEC,SEC,MIN,HOUR}; public static void sleep(Type type,int num){ try { @@ -15,22 +15,22 @@ public class Sleep { Thread.sleep(num); return; case SEC: - Thread.sleep(num*1000L); + Thread.sleep(num*1000); return; case MIN: - Thread.sleep(num*60*1000L); + Thread.sleep(num*60*1000); return; case HOUR: - Thread.sleep(num*60*60*1000L); + Thread.sleep(num*60*60*1000); return; default: - logger.error("输入类型错误,应为MSEC,SEC,MIN,HOUR之一"); + System.err.println("输入类型错误,应为MSEC,SEC,MIN,HOUR之一"); return; } } catch (InterruptedException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } - - -} + + +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java b/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java index 98f5e3bf44aada1018adc9cdbeb74aedcb8b2726..9d730a9889980ed88ded45d41b862d9be6dfd19e 100644 --- a/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java +++ b/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java @@ -1,7 +1,7 @@ /** * IK 中文分词 版本 5.0 * IK Analyzer release 5.0 - * + * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,7 +20,7 @@ * 源代码由林良益(linliangyi2005@gmail.com)提供 * 版权声明 2012,乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio - * + * */ package org.wltea.analyzer.query; @@ -34,6 +34,8 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.Lexeme; @@ -45,6 +47,8 @@ import org.wltea.analyzer.core.Lexeme; */ public class SWMCQueryBuilder { + public static ESLogger logger= Loggers.getLogger("ik-analyzer"); + /** * 生成SWMCQuery * @param fieldName @@ -62,7 +66,7 @@ public class SWMCQueryBuilder { Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode); return _SWMCQuery; } - + /** * 分词切分,并返回结链表 * @param keywords @@ -78,16 +82,16 @@ public class SWMCQueryBuilder { lexemes.add(l); } }catch(IOException e){ - e.printStackTrace(); + logger.error(e.getMessage(), e); } return lexemes; } - - + + /** * 根据分词结果生成SWMC搜索 * @param fieldName -// * @param pathOption + // * @param pathOption * @param quickMode * @return */ @@ -100,7 +104,7 @@ public class SWMCQueryBuilder { int lastLexemeLength = 0; //记录最后词元结束位置 int lastLexemeEnd = -1; - + int shortCount = 0; int totalCount = 0; for(Lexeme l : lexemes){ @@ -110,15 +114,15 @@ public class SWMCQueryBuilder { keywordBuffer_Short.append(' ').append(l.getLexemeText()); shortCount += l.getLength(); } - + if(lastLexemeLength == 0){ - keywordBuffer.append(l.getLexemeText()); + keywordBuffer.append(l.getLexemeText()); }else if(lastLexemeLength == 1 && l.getLength() == 1 && lastLexemeEnd == l.getBeginPosition()){//单字位置相邻,长度为一,合并) keywordBuffer.append(l.getLexemeText()); }else{ keywordBuffer.append(' ').append(l.getLexemeText()); - + } lastLexemeLength = l.getLength(); lastLexemeEnd = l.getEndPosition(); @@ -128,16 +132,16 @@ public class SWMCQueryBuilder { QueryParser qp = new QueryParser(fieldName, new StandardAnalyzer()); qp.setDefaultOperator(QueryParser.AND_OPERATOR); qp.setAutoGeneratePhraseQueries(true); - + if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){ try { //System.out.println(keywordBuffer.toString()); Query q = qp.parse(keywordBuffer_Short.toString()); return q; } catch (ParseException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } - + }else{ if(keywordBuffer.length() > 0){ try { @@ -145,10 +149,10 @@ public class SWMCQueryBuilder { Query q = qp.parse(keywordBuffer.toString()); return q; } catch (ParseException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } } return null; } -} +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java b/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java index d9e23e8dec1e7ada3803e7f1f1aeb292e2854f32..d22fe3cff53d7dbd2fc6b8ed51ac2835548ce766 100644 --- a/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java +++ b/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java @@ -1,7 +1,7 @@ /** * IK 中文分词 版本 5.0.1 * IK Analyzer release 5.0.1 - * + * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,8 +20,8 @@ * 源代码由林良益(linliangyi2005@gmail.com)提供 * 版权声明 2012,乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio - * - * + * + * */ package org.wltea.analyzer.sample; @@ -44,47 +44,47 @@ import org.wltea.analyzer.lucene.IKAnalyzer; */ public class IKAnalzyerDemo { - public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); - + public static ESLogger logger= Loggers.getLogger("ik-analyzer"); + public static void main(String[] args){ //构建IK分词器,使用smart分词模式 Analyzer analyzer = new IKAnalyzer(true); - + //获取Lucene的TokenStream对象 - TokenStream ts = null; + TokenStream ts = null; try { ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATAHELLO")); // ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too")); //获取词元位置属性 - OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); - //获取词元文本属性 - CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); - //获取词元文本属性 - TypeAttribute type = ts.addAttribute(TypeAttribute.class); - - - //重置TokenStream(重置StringReader) - ts.reset(); + OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); + //获取词元文本属性 + CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); + //获取词元文本属性 + TypeAttribute type = ts.addAttribute(TypeAttribute.class); + + + //重置TokenStream(重置StringReader) + ts.reset(); //迭代获取分词结果 while (ts.incrementToken()) { - logger.info(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); + System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //关闭TokenStream(关闭StringReader) ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } finally { //释放TokenStream的所有资源 if(ts != null){ - try { - ts.close(); - } catch (IOException e) { - e.printStackTrace(); - } + try { + ts.close(); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } } - } - + } + } -} +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java index b4a5f602aeb85b91b46efaa5a9f3a8c17955aa88..70bd7a534a9916862e6be23996c6f21b4c2b854d 100644 --- a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java +++ b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java @@ -1,7 +1,7 @@ /** * IK 中文分词 版本 5.0 * IK Analyzer release 5.0 - * + * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,8 +20,8 @@ * 源代码由林良益(linliangyi2005@gmail.com)提供 * 版权声明 2012,乌龙茶工作室 * provided by Linliangyi and copyright 2012 by Oolong studio - * - * + * + * */ package org.wltea.analyzer.sample; @@ -58,14 +58,14 @@ import org.wltea.analyzer.lucene.IKAnalyzer; /** * 使用IKAnalyzer进行Lucene索引和查询的演示 * 2012-3-2 - * + * * 以下是结合Lucene4.0 API的写法 * */ public class LuceneIndexAndSearchDemo { - public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); - + public static ESLogger logger= Loggers.getLogger("ik-analyzer"); + /** * 模拟: * 创建一个单条记录的索引,并对其进行搜索 @@ -74,20 +74,20 @@ public class LuceneIndexAndSearchDemo { public static void main(String[] args){ //Lucene Document的域名 String fieldName = "text"; - //检索内容 + //检索内容 String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。"; - + //实例化IKAnalyzer分词器 Analyzer analyzer = new IKAnalyzer(true); - + Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { //建立内存索引对象 - directory = new RAMDirectory(); - + directory = new RAMDirectory(); + //配置IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); @@ -98,53 +98,53 @@ public class LuceneIndexAndSearchDemo { doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); - - + + //搜索过程********************************** - //实例化搜索器 + //实例化搜索器 ireader = DirectoryReader.open(directory); - isearcher = new IndexSearcher(ireader); - - String keyword = "中文分词工具包"; + isearcher = new IndexSearcher(ireader); + + String keyword = "中文分词工具包"; //使用QueryParser查询分析器构造Query对象 QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); - logger.info("Query = " + query); - + System.out.println("Query = " + query); + //搜索相似度最高的5条记录 TopDocs topDocs = isearcher.search(query , 5); - logger.info("命中:" + topDocs.totalHits); + System.out.println("命中:" + topDocs.totalHits); //输出结果 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++){ Document targetDoc = isearcher.doc(scoreDocs[i].doc); - logger.info("内容:" + targetDoc.toString()); - } - + System.out.println("内容:" + targetDoc.toString()); + } + } catch (CorruptIndexException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } catch (LockObtainFailedException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } catch (IOException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } catch (ParseException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } finally{ if(ireader != null){ try { ireader.close(); } catch (IOException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } if(directory != null){ try { directory.close(); } catch (IOException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } } } -} +} \ No newline at end of file