提交全局搜索引擎工程，采用Apache Lucene搭建

6c8b826d · nicky · 2cd8b481 · 6c8b826d · 6c8b826d · 6c8b826d
6 changed file
--- a/src/project-search/ReadMe.txt
+++ b/src/project-search/ReadMe.txt
+全局搜索引擎
+
+
+### 数据库索引和Lucene检索对比
+
+|比较项   |Lucene检索| 数据库检索|
+|:--------|:--------|:--------|
+|数据检索 |	从Lucene的索引文件中检出 |	由数据库索引检索记录|
+|索引结构 |	Document（文档）|	Record（记录）|
+|全文检索 |	支持 |	不支持|
+|模糊查询 |	支持 |	不支持|
+|结果排序 |	支持排序 |	不能排序|
+
+Lucene搜索的API类主要有4个 IndexSearch，Query，QueryParser，Hits
+
+### Lucene搜索过程
+Lucene的索引结构是文档(Document)形式的，下面简单介绍一下Lucene搜索的过程
+1. 将文档传给分词组件(Tokenizer)，分词组件根据标点符号和停词将文档分成词元(Token)，并将标点符号和停词去掉。
+
+停词是指没有特别意思的词。英语的是指比如a、the等等单词
+
+文章1内容：Tom favorite fruit is apple.
+
+经过分词处理后，变成[Tom][facorite][fruit][apple]
+
+
+2. 再将词元传给语言处理组件(Linguistic Processor)
+
+英语的单词经过语言处理组件处理后，字母变为小写，词元会变成最基本的词根形式，比如likes变成like
+
+经过分词处理后，变成[tom][favorite][fruit][apple]
+
+3. 然后得到的词元传给索引组件(Indexer)，索引组件处理得到索引结构，得到关键字、出现频率、出现位置分别作为词典文件（Term Dictionary）、频率文件（frequencies）和位置文件（positions）保存起来，然后通过二元搜索算法快速查找关键字
+
+|关键字   |文章号[出现频率]| 出现位置|
+|:--------|:--------|:--------|
+|tom | 1[1] | 1 |
+|favorite| 1[2] | 2 |
+|fruit| 1[3] | 3 |
+[apple| 1[4] | 4 |
+
--- a/src/project-search/pom.xml
+++ b/src/project-search/pom.xml
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>taoshop</artifactId>
+        <groupId>org.muses</groupId>
+        <version>1.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>project-search</artifactId>
+    <packaging>jar</packaging>
+
+    <name>project-search Maven Webapp</name>
+    <!-- FIXME change it to the project's website -->
+    <url>http://www.example.com</url>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <maven.compiler.source>1.7</maven.compiler.source>
+        <maven.compiler.target>1.7</maven.compiler.target>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.11</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-queryparser</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-smartcn</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-highlighter</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+    </dependencies>
+
+   <!-- <build>
+        <finalName>project-search</finalName>
+        <pluginManagement>&lt;!&ndash; lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) &ndash;&gt;
+            <plugins>
+                <plugin>
+                    <artifactId>maven-clean-plugin</artifactId>
+                    <version>3.0.0</version>
+                </plugin>
+                &lt;!&ndash; see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_war_packaging &ndash;&gt;
+                <plugin>
+                    <artifactId>maven-resources-plugin</artifactId>
+                    <version>3.0.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.7.0</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-surefire-plugin</artifactId>
+                    <version>2.20.1</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-war-plugin</artifactId>
+                    <version>3.2.0</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-install-plugin</artifactId>
+                    <version>2.5.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-deploy-plugin</artifactId>
+                    <version>2.8.2</version>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>-->
+</project>
--- a/src/project-search/src/test/java/com/test/lucene/LuceneConstant.java
+++ b/src/project-search/src/test/java/com/test/lucene/LuceneConstant.java
+package com.test.lucene;
+
+public class LuceneConstant {
+	
+	private final static String INDEX_DIR = "D:\\lucene";
+	
+	private final static String INDEX_DATA_DIR = "D:\\lucene\\data";
+
+}
--- a/src/project-search/src/test/java/com/test/lucene/LuceneIndexer.java
+++ b/src/project-search/src/test/java/com/test/lucene/LuceneIndexer.java
+package com.test.lucene;
+
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.springframework.stereotype.Component;
+
+
+/**
+ * <pre>
+ * 	Lucene创建索引、全局搜索服务类
+ * </pre>
+ * 
+ * @author nicky
+ * @version 1.00.00
+ * 
+ *          <pre>
+ * 修改记录
+ *    修改后版本:     修改人：  修改日期:2018年04月18日     修改内容:
+ *          </pre>
+ */
+@Component
+public class LuceneIndexer {
+	
+	private volatile static LuceneIndexer instance;
+//	
+//	private LuceneIndexer(){}
+	
+	/**
+	 * 双检锁/双重校验锁（DCL，即 double-checked locking）
+	 * @return instance
+	 */
+//	public static LuceneIndexer getInstance(){
+//		if(instance == null){
+//			synchronized (LuceneIndexer.class) {
+//				if(instance == null){
+//					instance = new LuceneIndexer();
+//				}
+//			}
+//		}
+//		return instance;
+//	}
+	
+//	private static Analyzer analyzer;
+//	private static Directory directory;
+	private IndexWriter indexWriter;
+//	private static IndexWriterConfig config;
+	
+	private final static String INDEX_DIR = "D:\\lucene";
+	
+	private final static String DATA_DIR = "D:\\lucene\\data";
+	
+	private static class SingletonHolder{  
+	      private final static LuceneIndexer instance=new LuceneIndexer();  
+	}  
+	
+	public static LuceneIndexer getInstance(){  
+	      return SingletonHolder.instance;  
+	}  
+	
+	public static boolean createIndex(String indexDir , String dataDir) throws IOException{
+		long startTime = System.currentTimeMillis();//记录索引开始时间
+		
+		Analyzer analyzer = new SmartChineseAnalyzer();
+		Directory directory = FSDirectory.open(Paths.get(indexDir));
+		IndexWriterConfig config = new IndexWriterConfig(analyzer);
+		
+		IndexWriter indexWriter = new IndexWriter(directory, config);
+		
+		File[] files = new File(dataDir).listFiles();
+		for(File file : files){
+			Document doc = new Document();
+			//添加字段
+	        doc.add(new TextField("contents", new FileReader(file))); //添加内容
+	        doc.add(new TextField("fileName", file.getName(), Field.Store.YES)); //添加文件名，并把这个字段存到索引文件里
+	        doc.add(new TextField("fullPath", file.getCanonicalPath(), Field.Store.YES)); //添加文件路径
+	       
+	        indexWriter.addDocument(doc);
+		}
+		
+		
+		
+		System.out.println("共索引了"+indexWriter.numDocs()+"个文件");
+		
+		indexWriter.commit();
+		indexWriter.close();
+		System.out.println("创建索引所用时间："+(System.currentTimeMillis()-startTime));
+		
+		return true;
+	}
+	
+	
+	
+	private void addDocument(File file) throws IOException{
+		Document doc = new Document();
+		//添加字段
+        doc.add(new TextField("contents", new FileReader(file))); //添加内容
+        doc.add(new TextField("fileName", file.getName(), Field.Store.YES)); //添加文件名，并把这个字段存到索引文件里
+        doc.add(new TextField("fullPath", file.getCanonicalPath(), Field.Store.YES)); //添加文件路径
+       
+        indexWriter.addDocument(doc);
+	}
+	
+	private void closeWriter() throws IOException{
+		 if (indexWriter != null) {
+	            indexWriter.close();
+	        }
+	}
+	
+	public static void main(String[] args) {
+		try {
+			boolean r = LuceneIndexer.getInstance().createIndex(INDEX_DIR,DATA_DIR);
+			if(r){
+				System.out.println("创建成功!");
+			}else{
+				System.out.println("创建失败!");
+			}
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		
+	}
+
+
+	
+}
--- a/src/project-search/src/test/java/com/test/lucene/LuceneTest.java
+++ b/src/project-search/src/test/java/com/test/lucene/LuceneTest.java
+package com.test.lucene;
+
+public class LuceneTest {
+
+}
--- a/src/project-search/src/test/java/com/test/lucene/SearchBuilder.java
+++ b/src/project-search/src/test/java/com/test/lucene/SearchBuilder.java
+package com.test.lucene;
+
+public class SearchBuilder {
+
+}