diff --git a/simhash/.idea/.gitignore b/simhash/.idea/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..35410cacdc5e87f985c93a96520f5e11a5c822e4
--- /dev/null
+++ b/simhash/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/simhash/.idea/compiler.xml b/simhash/.idea/compiler.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e5fa1de71db3fef18d34e22b1774d73fadcc8e67
--- /dev/null
+++ b/simhash/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/encodings.xml b/simhash/.idea/encodings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..aa00ffab7828f4818589659c804ec2cfd99baed3
--- /dev/null
+++ b/simhash/.idea/encodings.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/jarRepositories.xml b/simhash/.idea/jarRepositories.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5a2f139ce25c6f225e0cb5fb199704f51273de00
--- /dev/null
+++ b/simhash/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/misc.xml b/simhash/.idea/misc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..82dbec8ad28463aed32007a93ffc07865ae98968
--- /dev/null
+++ b/simhash/.idea/misc.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/pom.xml b/simhash/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e30812d890a7125eee67dfe6fbbecda91f1e637a
--- /dev/null
+++ b/simhash/pom.xml
@@ -0,0 +1,50 @@
+
+
+ 4.0.0
+
+ org.example
+ simhash
+ 1.0-SNAPSHOT
+
+
+ org.apache.commons
+ commons-lang3
+ 3.5
+
+
+ cn.hutool
+ hutool-all
+ 5.7.13
+
+
+ com.hankcs.nlp
+ hanlp-lucene-plugin
+ 1.1.7
+
+
+ junit
+ junit
+ 4.13.2
+
+
+ cn.hutool
+ hutool-http
+ 5.8.14
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ 5.8.2
+ test
+
+
+
+
+ 17
+ 17
+ UTF-8
+
+
+
\ No newline at end of file
diff --git a/simhash/src/main/java/Main.java b/simhash/src/main/java/Main.java
new file mode 100644
index 0000000000000000000000000000000000000000..1355d7ea27337ca49554cf36a7f6f5d7ca28d941
--- /dev/null
+++ b/simhash/src/main/java/Main.java
@@ -0,0 +1,42 @@
+import cn.hutool.core.date.DateUtil;
+import exceptions.FileAnalyseException;
+import exceptions.NotExistFileException;
+import utils.CalculationUtils;
+import utils.CommonUtils;
+import java.util.Map;
+
+public class Main {
+ //合法参数个数为3
+ static final int ARGS_NUM = 3;
+ public static void main(String[] args){
+ // 读取并解析参数
+ if (args.length != ARGS_NUM) {
+ throw new IllegalArgumentException("参数个数不正确");
+ }
+ // 解析文件,处理分词
+ Map originWordCount = null;
+ Map compareWordCount = null;
+ try {
+ //得到原文本的关键词和词频
+ originWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[0]));
+ //以及比对文本的关键词的关键词和词频
+ compareWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[1]));
+ } catch (FileAnalyseException | NotExistFileException e) {
+ e.printStackTrace();
+ }
+ // 获取simHash值
+ String simHash1 = CalculationUtils.calculateSimHash(originWordCount);
+ String simHash2 = CalculationUtils.calculateSimHash(compareWordCount);
+ //计算相似度,保留两位小数
+ double result = CalculationUtils.getSimilarity(simHash1, simHash2);
+ String format = String.format("相似度为:%.2f", result);
+ System.out.println(format);
+ String writeFileContent = "---------------------------------------" + "\n" +
+ "原文件:" + args[0] + "\n" +
+ "对比文件:" + args[1] + "\n" +
+ format + "\n" +
+ "比较时间为:" + DateUtil.now() + "\n";
+ ;
+ CommonUtils.writeFile(args[2],writeFileContent);
+ }
+}
diff --git a/simhash/src/main/java/exceptions/FileAnalyseException.java b/simhash/src/main/java/exceptions/FileAnalyseException.java
new file mode 100644
index 0000000000000000000000000000000000000000..31a1e648ff33e22e5ff05814350a2cd7cdfe7352
--- /dev/null
+++ b/simhash/src/main/java/exceptions/FileAnalyseException.java
@@ -0,0 +1,10 @@
+package exceptions;
+
+/**
+ * 文件解析异常
+ */
+public class FileAnalyseException extends Exception {
+ public FileAnalyseException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/exceptions/HashException.java b/simhash/src/main/java/exceptions/HashException.java
new file mode 100644
index 0000000000000000000000000000000000000000..42596619817b9114b60c3cfbb13d6e44c67c3d82
--- /dev/null
+++ b/simhash/src/main/java/exceptions/HashException.java
@@ -0,0 +1,12 @@
+package exceptions;
+
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * MD5算法hash异常
+ */
+public class HashException extends NoSuchAlgorithmException {
+ public HashException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/exceptions/NotExistFileException.java b/simhash/src/main/java/exceptions/NotExistFileException.java
new file mode 100644
index 0000000000000000000000000000000000000000..377c24e69bf7e1f6a71d67201c29f33526a7a832
--- /dev/null
+++ b/simhash/src/main/java/exceptions/NotExistFileException.java
@@ -0,0 +1,11 @@
+package exceptions;
+
+import java.io.FileNotFoundException;
+/**
+ * 找不到文件的文件解析异常
+ */
+public class NotExistFileException extends FileNotFoundException {
+ public NotExistFileException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/utils/CalculationUtils.java b/simhash/src/main/java/utils/CalculationUtils.java
new file mode 100644
index 0000000000000000000000000000000000000000..da8a6c34059c14d0be0903162da27a2f5341e5d9
--- /dev/null
+++ b/simhash/src/main/java/utils/CalculationUtils.java
@@ -0,0 +1,141 @@
+package utils;
+
+import cn.hutool.core.util.StrUtil;
+import exceptions.HashException;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Map;
+
+/**
+ * 与计算有关的工具类
+ */
+public class CalculationUtils {
+ //hash码长度为128
+ static final int HASH_BIT = 128;
+ /**
+ * 采用MD5算法对关键词进行hash,得到的hash值使用16进制解析,再利用算法取128位二进制数作为hash值
+ * @param word 词语
+ * @return 128位二进制hash值
+ */
+ public static String wordHash(String word) throws HashException {
+ //如果传入词语为null或“”或“ ”
+ if (word == null || StrUtil.isBlank(word) || StrUtil.isEmpty(word)) {
+ throw new HashException("词语为空");
+ }
+ try {
+ // 采用MD5算法进行hash
+ MessageDigest digest = MessageDigest.getInstance("MD5");
+ digest.update(word.getBytes(StandardCharsets.UTF_8));
+ // hash值转为32位16进制的散列值
+ StringBuilder hash = new StringBuilder();
+ for (byte b : digest.digest()) {
+ hash.append(String.format("%02x", b));
+ }
+ // 16进制的散列值转为128位二进制码
+ StringBuilder finalHash = new StringBuilder();
+ String strTemp;
+ for (int i = 0; i < hash.length(); i++) {
+ // 每一位16进制数加上0000,最后截取后4位,得到便是这位数的二进制
+ strTemp = "0000" + Integer.toBinaryString(Integer.parseInt(hash.substring(i, i + 1), 16));
+ finalHash.append(strTemp.substring(strTemp.length() - 4));
+ }
+ // 不为128则为hash异常
+ if (finalHash.length() != HASH_BIT) {
+ throw new HashException("hash值长度不为128");
+ }
+ return finalHash.toString();
+ } catch (NoSuchAlgorithmException e) {
+ throw new HashException("MD5算法异常");
+ }
+ }
+
+ /**
+ * 给二进制hash值加权
+ * @param hash 二进制哈希值
+ * @param weight 权重
+ * @return 加权后的二进制哈希值
+ */
+ public static int[] hashWeight(String hash, int weight) {
+ // 新建一个数组用于存放加权后的二进制哈希值
+ int[] hashArray = new int[HASH_BIT];
+ // 遍历二进制哈希值,0则是-1,1则是1,将每一位加权后存入数组
+ for (int i = 0; i < hash.length(); i++) {
+ if (hash.charAt(i) == '1') {
+ hashArray[i] = weight;
+ } else {
+ hashArray[i] = -1 * weight;
+ }
+ }
+ return hashArray;
+ }
+
+ /**
+ * 合并后的hash进行降维,最终得到simHash
+ * @param mergeHash 合并后的hash值
+ * @return sim哈希值
+ */
+ public static String getSimHash(int[] mergeHash){
+ // 使用StringBuilder存储simHash
+ StringBuilder simHash = new StringBuilder();
+ // 遍历合并后的hash值,大于0则是1,小于0则是0
+ for (int hash : mergeHash) {
+ if (hash > 0) {
+ simHash.append("1");
+ } else {
+ simHash.append("0");
+ }
+ }
+ return simHash.toString();
+ }
+
+ /**
+ * 根据词语得到simHash
+ * @param wordCount 词语及其出现次数
+ * @return simHash
+ */
+ public static String calculateSimHash(Map wordCount){
+ // 新建一个数组用于存放合并后的hash值,初始值为0
+ int[] mergeHash = new int[HASH_BIT];
+ for (int i = 0; i < HASH_BIT; i++) {
+ mergeHash[i] = 0;
+ }
+ // 遍历词语及其出现次数,对每一个词语进行hash加权,然后合并
+ wordCount.forEach((word,count) -> {
+ try {
+ int[] tempHash = hashWeight(wordHash(word),count);//加权后的hash值
+ for (int i = 0; i < tempHash.length; i++) {
+ mergeHash[i] += tempHash[i];
+ }
+ } catch (HashException e) {
+ e.printStackTrace();
+ }
+ });
+ // 降维得到simHash
+ return getSimHash(mergeHash);
+ }
+
+ /**
+ * 计算两个simHash的相似度
+ * @param simHash1 simHash1
+ * @param simHash2 simHash2
+ * @return 相似度
+ */
+ public static double getSimilarity(String simHash1, String simHash2) {
+ // 得到两个simHash的汉明距离
+ // 遍历simHash1和simHash2,不相同则汉明距离加1
+ int hamingDistance = 0;
+ int same=0;
+ for (int i = 0; i < simHash1.length(); i++) {
+ if (simHash1.charAt(i) != simHash2.charAt(i)) {
+ hamingDistance++;
+ }
+ if (simHash1.charAt(i)=='1' && simHash2.charAt(i)=='1') {
+ same++;
+ }
+ }
+ System.out.println("两个simHash的汉明距离为:" + hamingDistance);
+ // 用杰卡德系数计算文本相似度
+ return (double)same/(hamingDistance+same);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/utils/CommonUtils.java b/simhash/src/main/java/utils/CommonUtils.java
new file mode 100644
index 0000000000000000000000000000000000000000..e78d1ae0f3b89e90a31e5061e55185c6f7319404
--- /dev/null
+++ b/simhash/src/main/java/utils/CommonUtils.java
@@ -0,0 +1,71 @@
+package utils;
+
+import cn.hutool.core.io.FileUtil;
+import cn.hutool.core.util.StrUtil;
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.seg.common.Term;
+import exceptions.FileAnalyseException;
+import exceptions.NotExistFileException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+/**
+ * 非计算的工具类
+ */
+public class CommonUtils {
+ //最少关键词数为3
+ public static final int SHORT_WORD_LENGTH = 3;
+ /**
+ * 从文件中读取文本
+ * @param filePath 文件路径
+ * @return 读取出的文本
+ */
+ public static String readFileToStr(String filePath) throws NotExistFileException {
+ try {
+ return FileUtil.readUtf8String(filePath);//返回读取的文本
+ } catch (Exception e) {
+ throw new NotExistFileException("该绝对路径的文件不存在");
+ }
+ }
+
+ /**
+ * 把文本解析并过滤后转为map
+ * @param text 读取的文本
+ * @return 存放词语和词频的map
+ */
+ public static Map analyseText(String text) throws FileAnalyseException {
+ //文本内容为null或“”或“ ”时,文件解析异常
+ if (text == null || StrUtil.isBlank(text) || StrUtil.isEmpty(text)) {
+ throw new FileAnalyseException("文件解析异常,解析内容为空");
+ }
+ // 提取关键词
+ List keyList = HanLP.extractKeyword(text, text.length());
+ //提取出的关键词小于3
+ if (keyList.size() <= SHORT_WORD_LENGTH) {
+ throw new FileAnalyseException("文件解析异常,关键词太少");
+ }
+ // 分词,找出所有词语
+ List termList = HanLP.segment(text);
+ List allWords = termList.stream().map(term -> term.word).collect(Collectors.toList());
+ // 用于存放关键词和词频的map
+ Map wordCount = new HashMap<>(keyList.size());
+ // 遍历全部词语,获取关键词词频,返回存词语和词频的map
+ for (String s:keyList) {
+ wordCount.put(s, Collections.frequency(allWords, s));
+ }
+ return wordCount;
+ }
+
+ /**
+ * 将查重结果写入指定文件
+ * @param filePath 文件路径
+ * @param content 查重结果内容
+ */
+ public static void writeFile(String filePath, String content) {
+ FileUtil.appendString(content, filePath, "utf-8");
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/test/java/MainTest.java b/simhash/src/test/java/MainTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..71146b6297a103f417d71695b2d001dd2fee0a3d
--- /dev/null
+++ b/simhash/src/test/java/MainTest.java
@@ -0,0 +1,252 @@
+import com.hankcs.hanlp.HanLP;
+import exceptions.HashException;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import exceptions.FileAnalyseException;
+import exceptions.NotExistFileException;
+import utils.CalculationUtils;
+import utils.CommonUtils;
+import java.util.Arrays;
+import java.util.Map;
+
+
+public class MainTest {
+ //读取文件后得到的文本
+ static String analyseStr;
+ //两个示例句子
+ static String originSentence = "今天是星期天,天气晴,今天晚上我要去看电影。";
+ static String compareSentence = "今天是周天,天气晴朗,我晚上要去看电影。";
+ //比对结果写入的文件
+ static String writeFilePath = "E:\\测试文本\\write.txt";
+ //原文件
+ static String OrigFilePath = "E:\\测试文本\\orig.txt";
+ //5个比对文件
+ static String CopyFilePath1 = "E:\\测试文本\\orig_0.8_add.txt";
+ static String CopyFilePath2 = "E:\\测试文本\\orig_0.8_del.txt";
+ static String CopyFilePath3 = "E:\\测试文本\\orig_0.8_dis_1.txt";
+ static String CopyFilePath4 = "E:\\测试文本\\orig_0.8_dis_10.txt";
+ static String CopyFilePath5 = "E:\\测试文本\\orig_0.8_dis_15.txt";
+
+ /**
+ * 测试写入文件
+ */
+ @Test
+ void testWriteFile(){
+ CommonUtils.writeFile(writeFilePath, "------successfully content entry------");
+ try {
+ String s = CommonUtils.readFileToStr(writeFilePath);
+ Assertions.assertTrue(s.contains("------successfully content entry------"),"写入文件失败");
+ } catch (NotExistFileException e) {
+ e.printStackTrace();
+ Assertions.fail("写入文件失败");
+ }
+ }
+
+ /**
+ * 测试读取不存在的文件
+ */
+ @Test
+ void testReadFileNotExist(){
+ try {
+ CommonUtils.readFileToStr("E:\\not existing.txt");
+ Assertions.fail("没有抛出异常");
+ } catch (NotExistFileException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ }
+
+ /**
+ * 测试文件解析异常(为null,为“”,为“ ”)
+ */
+ @Test
+ void testFileAnalyseException(){
+ try {
+ CommonUtils.analyseText(null);
+ Assertions.fail("没有抛出异常");
+ } catch (FileAnalyseException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ try {
+ CommonUtils.analyseText("");
+ Assertions.fail("没有抛出异常");
+ } catch (FileAnalyseException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ try {
+ CommonUtils.analyseText(" ");
+ Assertions.fail("没有抛出异常");
+ } catch (FileAnalyseException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ }
+
+ /**
+ * 测试读取文件并查看分词结果
+ */
+ @Test
+ void testReadFile(){
+ try {
+ //测试句子分词
+ System.out.println("分词结果为:"+CommonUtils.analyseText(originSentence));
+ //测试文本分词
+ analyseStr = CommonUtils.readFileToStr(OrigFilePath);
+ System.out.println("分词结果为:"+CommonUtils.analyseText(analyseStr));
+ } catch (Exception e) {
+ e.printStackTrace();
+ Assertions.fail("分词结果有误");
+ }
+ }
+
+ /**
+ * 测试MD5算法hash计算hash,检查所得到hash值是否为128位
+ */
+ @Test
+ void testWordHash(){
+ HanLP.extractKeyword(originSentence, originSentence.length()).forEach(
+ word -> {
+ try {
+ String hash = CalculationUtils.wordHash(word);
+ System.out.println(word +" : "+ hash);
+ Assertions.assertEquals(128, hash.length(), "hash值长度不是128");
+ } catch (HashException e) {
+ Assertions.fail("哈希出错");
+ e.printStackTrace();
+ }
+ }
+ );
+ }
+
+ /**
+ * 测试哈希异常(得到hash值为空)
+ */
+ @Test
+ void testHashException(){
+ try {
+ CalculationUtils.wordHash("");
+ Assertions.fail("没有抛出异常");
+ } catch (HashException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ try {
+ CalculationUtils.wordHash(null);
+ Assertions.fail("没有抛出异常");
+ } catch (HashException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ try {
+ CalculationUtils.wordHash(" ");
+ Assertions.fail("没有抛出异常");
+ } catch (HashException e) {
+ e.printStackTrace();
+ Assertions.assertTrue(true);
+ }
+ }
+
+ /**
+ * 测试加权算法
+ */
+ @Test
+ void testHashWeight(){
+ Map map = null;
+ try {
+ map = CommonUtils.analyseText(originSentence);
+ } catch (FileAnalyseException e) {
+ e.printStackTrace();
+ Assertions.fail("解析错误");
+ }
+ map.forEach((word, count) -> {
+ try {
+ String hash = CalculationUtils.wordHash(word);
+ int[] hashWeight = CalculationUtils.hashWeight(hash,count);
+ //打印加权后的hash值
+ System.out.println(word +" : "+ Arrays.toString(hashWeight));
+ Assertions.assertEquals(128, hashWeight.length, "加权后的hash值长度不是128");
+ } catch (HashException e) {
+ Assertions.fail("哈希出错");
+ e.printStackTrace();
+ }
+ });
+ }
+
+ /**
+ * 测试计算simHash
+ */
+ @Test
+ void testCalculateSimHash() {
+ try {
+ String hash1 = CalculationUtils.calculateSimHash(CommonUtils.analyseText(originSentence));
+ System.out.println("原句子\"" + originSentence + "\"的simHash值为:" + hash1);
+ Assertions.assertEquals(hash1.length(), 128, "hash值长度不是128");
+ String hash2=CalculationUtils.calculateSimHash(CommonUtils.analyseText((CommonUtils.readFileToStr(OrigFilePath))));
+ System.out.println("原文本的simHash值为:" + hash2);
+ Assertions.assertEquals(hash2.length(), 128, "hash值长度不是128");
+ } catch (FileAnalyseException | NotExistFileException e) {
+ e.printStackTrace();
+ }
+ }
+ /**
+ * 测试计算句子相似度
+ */
+ @Test
+ void testGetSimilarity1(){
+ String hash1 = null;
+ String hash2 = null;
+ try {
+ hash1 = CalculationUtils.calculateSimHash(CommonUtils.analyseText(originSentence));
+ hash2 = CalculationUtils.calculateSimHash(CommonUtils.analyseText(compareSentence));
+ } catch (FileAnalyseException e) {
+ e.printStackTrace();
+ Assertions.fail("解析错误");
+ }
+ double similarity = CalculationUtils.getSimilarity(hash1, hash2);
+ String format = String.format("两个句子的相似度为:%.2f", similarity);
+ System.out.println(format);
+ Assertions.assertTrue(0 <= similarity && similarity <= 1, "相似度不在0-1之间");
+ }
+ /**
+ * 测试计算文本相似度
+ */
+ @Test
+ void testGetSimilarity2(){
+ String hash1;
+ String hash2;
+ try {
+ hash1 = CalculationUtils.calculateSimHash(CommonUtils.analyseText(CommonUtils.readFileToStr(OrigFilePath)));
+ hash2 = CalculationUtils.calculateSimHash(CommonUtils.analyseText(CommonUtils.readFileToStr(CopyFilePath1)));
+ double similarity = CalculationUtils.getSimilarity(hash1, hash2);
+ String format = String.format("两个文本的相似度为:%.2f", similarity);
+ System.out.println(format);
+ Assertions.assertTrue(0 <= similarity && similarity <= 1, "相似度不在0-1之间");
+ } catch (FileAnalyseException | NotExistFileException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * 测试主函数
+ */
+ @Test
+ void testMain(){
+ String[] args = new String[3];
+ args[0] = OrigFilePath;
+ args[1]=CopyFilePath1;
+ args[2] = writeFilePath;
+ Main.main(args);
+
+ args[1]=CopyFilePath2;
+ Main.main(args);
+ args[1]=CopyFilePath3;
+ Main.main(args);
+ args[1]=CopyFilePath4;
+ Main.main(args);
+ args[1]=CopyFilePath5;
+ Main.main(args);
+ args[0] = CopyFilePath3;
+ }
+}
\ No newline at end of file
diff --git a/simhash/target/classes/Main.class b/simhash/target/classes/Main.class
new file mode 100644
index 0000000000000000000000000000000000000000..0f561640eb051ae4284750fe8ad3a87322ec6501
Binary files /dev/null and b/simhash/target/classes/Main.class differ
diff --git a/simhash/target/classes/classpath.index b/simhash/target/classes/classpath.index
new file mode 100644
index 0000000000000000000000000000000000000000..710196b307608a6994e17db7259659a6ae4d8f59
Binary files /dev/null and b/simhash/target/classes/classpath.index differ
diff --git a/simhash/target/classes/exceptions/FileAnalyseException.class b/simhash/target/classes/exceptions/FileAnalyseException.class
new file mode 100644
index 0000000000000000000000000000000000000000..bd5ef4da37b065e7b1ab4dcc42274e28beda9337
Binary files /dev/null and b/simhash/target/classes/exceptions/FileAnalyseException.class differ
diff --git a/simhash/target/classes/exceptions/HashException.class b/simhash/target/classes/exceptions/HashException.class
new file mode 100644
index 0000000000000000000000000000000000000000..2fbf9f364c478780d4d255df5ac107eac899f56b
Binary files /dev/null and b/simhash/target/classes/exceptions/HashException.class differ
diff --git a/simhash/target/classes/exceptions/NotExistFileException.class b/simhash/target/classes/exceptions/NotExistFileException.class
new file mode 100644
index 0000000000000000000000000000000000000000..73b9c64ae1e951e99e1d6bcbfd73c090eeefbace
Binary files /dev/null and b/simhash/target/classes/exceptions/NotExistFileException.class differ
diff --git a/simhash/target/classes/utils/CalculationUtils.class b/simhash/target/classes/utils/CalculationUtils.class
new file mode 100644
index 0000000000000000000000000000000000000000..fab98327300ab60c841461105c940433d2e89267
Binary files /dev/null and b/simhash/target/classes/utils/CalculationUtils.class differ
diff --git a/simhash/target/classes/utils/CommonUtils.class b/simhash/target/classes/utils/CommonUtils.class
new file mode 100644
index 0000000000000000000000000000000000000000..a66b00e332940e3a56bcd0d0ec90a6908d10a234
Binary files /dev/null and b/simhash/target/classes/utils/CommonUtils.class differ
diff --git a/simhash/target/test-classes/MainTest.class b/simhash/target/test-classes/MainTest.class
new file mode 100644
index 0000000000000000000000000000000000000000..940d97752b520cd25ea4271b532957d8c47adc3f
Binary files /dev/null and b/simhash/target/test-classes/MainTest.class differ
diff --git a/simhash/target/test-classes/classpath.index b/simhash/target/test-classes/classpath.index
new file mode 100644
index 0000000000000000000000000000000000000000..c91390c74815757960dbd0886b529702b8074878
Binary files /dev/null and b/simhash/target/test-classes/classpath.index differ