From 72079d3b6980e0fa6dec22b4e06d495346591c03 Mon Sep 17 00:00:00 2001
From: y <123@12.com>
Date: Tue, 7 Mar 2023 16:29:30 +0800
Subject: [PATCH] simbash
---
helloworld.txt | 0
simhash/.idea/.gitignore | 8 +
simhash/.idea/compiler.xml | 13 ++
simhash/.idea/encodings.xml | 7 +
simhash/.idea/jarRepositories.xml | 20 +++
simhash/.idea/misc.xml | 14 ++
simhash/pom.xml | 50 ++++++
simhash/src/main/java/Main.java | 39 +++++
.../java/exceptions/FileAnalyseException.java | 12 ++
.../main/java/exceptions/HashException.java | 14 ++
.../exceptions/NotExistFileException.java | 14 ++
.../src/main/java/utils/CalculationUtils.java | 158 ++++++++++++++++++
simhash/src/main/java/utils/CommonUtils.java | 71 ++++++++
simhash/src/test/java/MainTest.java | 17 ++
simhash/target/classes/Main.class | Bin 0 -> 2480 bytes
simhash/target/classes/classpath.index | Bin 0 -> 136 bytes
.../exceptions/FileAnalyseException.class | Bin 0 -> 374 bytes
.../classes/exceptions/HashException.class | Bin 0 -> 372 bytes
.../exceptions/NotExistFileException.class | Bin 0 -> 387 bytes
.../classes/utils/CalculationUtils.class | Bin 0 -> 5023 bytes
.../target/classes/utils/CommonUtils.class | Bin 0 -> 3869 bytes
simhash/target/test-classes/MainTest.class | Bin 0 -> 842 bytes
simhash/target/test-classes/classpath.index | Bin 0 -> 96 bytes
23 files changed, 437 insertions(+)
delete mode 100644 helloworld.txt
create mode 100644 simhash/.idea/.gitignore
create mode 100644 simhash/.idea/compiler.xml
create mode 100644 simhash/.idea/encodings.xml
create mode 100644 simhash/.idea/jarRepositories.xml
create mode 100644 simhash/.idea/misc.xml
create mode 100644 simhash/pom.xml
create mode 100644 simhash/src/main/java/Main.java
create mode 100644 simhash/src/main/java/exceptions/FileAnalyseException.java
create mode 100644 simhash/src/main/java/exceptions/HashException.java
create mode 100644 simhash/src/main/java/exceptions/NotExistFileException.java
create mode 100644 simhash/src/main/java/utils/CalculationUtils.java
create mode 100644 simhash/src/main/java/utils/CommonUtils.java
create mode 100644 simhash/src/test/java/MainTest.java
create mode 100644 simhash/target/classes/Main.class
create mode 100644 simhash/target/classes/classpath.index
create mode 100644 simhash/target/classes/exceptions/FileAnalyseException.class
create mode 100644 simhash/target/classes/exceptions/HashException.class
create mode 100644 simhash/target/classes/exceptions/NotExistFileException.class
create mode 100644 simhash/target/classes/utils/CalculationUtils.class
create mode 100644 simhash/target/classes/utils/CommonUtils.class
create mode 100644 simhash/target/test-classes/MainTest.class
create mode 100644 simhash/target/test-classes/classpath.index
diff --git a/helloworld.txt b/helloworld.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/simhash/.idea/.gitignore b/simhash/.idea/.gitignore
new file mode 100644
index 0000000..35410ca
--- /dev/null
+++ b/simhash/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/simhash/.idea/compiler.xml b/simhash/.idea/compiler.xml
new file mode 100644
index 0000000..e5fa1de
--- /dev/null
+++ b/simhash/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/encodings.xml b/simhash/.idea/encodings.xml
new file mode 100644
index 0000000..aa00ffa
--- /dev/null
+++ b/simhash/.idea/encodings.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/jarRepositories.xml b/simhash/.idea/jarRepositories.xml
new file mode 100644
index 0000000..5a2f139
--- /dev/null
+++ b/simhash/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/.idea/misc.xml b/simhash/.idea/misc.xml
new file mode 100644
index 0000000..82dbec8
--- /dev/null
+++ b/simhash/.idea/misc.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/simhash/pom.xml b/simhash/pom.xml
new file mode 100644
index 0000000..e30812d
--- /dev/null
+++ b/simhash/pom.xml
@@ -0,0 +1,50 @@
+
+
+ 4.0.0
+
+ org.example
+ simhash
+ 1.0-SNAPSHOT
+
+
+ org.apache.commons
+ commons-lang3
+ 3.5
+
+
+ cn.hutool
+ hutool-all
+ 5.7.13
+
+
+ com.hankcs.nlp
+ hanlp-lucene-plugin
+ 1.1.7
+
+
+ junit
+ junit
+ 4.13.2
+
+
+ cn.hutool
+ hutool-http
+ 5.8.14
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ 5.8.2
+ test
+
+
+
+
+ 17
+ 17
+ UTF-8
+
+
+
\ No newline at end of file
diff --git a/simhash/src/main/java/Main.java b/simhash/src/main/java/Main.java
new file mode 100644
index 0000000..40c8b93
--- /dev/null
+++ b/simhash/src/main/java/Main.java
@@ -0,0 +1,39 @@
+import cn.hutool.core.date.DateUtil;
+import exceptions.FileAnalyseException;
+import exceptions.NotExistFileException;
+import utils.CalculationUtils;
+import utils.CommonUtils;
+import java.util.Map;
+
+public class Main {
+ //合法参数个数为3
+ static final int ARGS_NUM = 3;
+ public static void main(String[] args){
+ // 读取并解析参数
+ if (args.length != ARGS_NUM) {
+ throw new IllegalArgumentException("参数个数不正确");
+ }
+ // 解析文件,处理分词
+ Map originWordCount = null;
+ Map compareWordCount = null;
+ try {
+ originWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[0]));
+ compareWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[1]));
+ } catch (FileAnalyseException | NotExistFileException e) {
+ e.printStackTrace();
+ }
+ // 获取simHash值
+ String simHash1 = CalculationUtils.calculateSimHash(originWordCount);
+ String simHash2 = CalculationUtils.calculateSimHash(compareWordCount);
+ //计算相似度,保留两位小数
+ double result = CalculationUtils.getSimilarity(simHash1, simHash2);
+ String format = String.format("相似度为:%.2f", result);
+ String writeFileContent = "---------------------------------------" + "\n" +
+ "原文件:" + args[0] + "\n" +
+ "对比文件:" + args[1] + "\n" +
+ format + "\n" +
+ "比较时间为:" + DateUtil.now() + "\n";
+ ;
+ CommonUtils.writeFile(args[2],writeFileContent);
+ }
+}
diff --git a/simhash/src/main/java/exceptions/FileAnalyseException.java b/simhash/src/main/java/exceptions/FileAnalyseException.java
new file mode 100644
index 0000000..11aec77
--- /dev/null
+++ b/simhash/src/main/java/exceptions/FileAnalyseException.java
@@ -0,0 +1,12 @@
+package exceptions;
+
+/**
+ * @author HJW
+ * @date 2022-09-21 12:57
+ * 文件解析异常(转字符串为空或者过滤时没有可用词)
+ */
+public class FileAnalyseException extends Exception {
+ public FileAnalyseException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/exceptions/HashException.java b/simhash/src/main/java/exceptions/HashException.java
new file mode 100644
index 0000000..c9e1902
--- /dev/null
+++ b/simhash/src/main/java/exceptions/HashException.java
@@ -0,0 +1,14 @@
+package exceptions;
+
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * @author HJW
+ * @date 2022-09-21 12:57
+ * hash异常 md5
+ */
+public class HashException extends NoSuchAlgorithmException {
+ public HashException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/exceptions/NotExistFileException.java b/simhash/src/main/java/exceptions/NotExistFileException.java
new file mode 100644
index 0000000..9dc0f96
--- /dev/null
+++ b/simhash/src/main/java/exceptions/NotExistFileException.java
@@ -0,0 +1,14 @@
+package exceptions;
+
+import java.io.FileNotFoundException;
+
+
+/**
+ * @author HJW
+ * 找不到文件的自定义异常
+ */
+public class NotExistFileException extends FileNotFoundException {
+ public NotExistFileException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/utils/CalculationUtils.java b/simhash/src/main/java/utils/CalculationUtils.java
new file mode 100644
index 0000000..664a0a9
--- /dev/null
+++ b/simhash/src/main/java/utils/CalculationUtils.java
@@ -0,0 +1,158 @@
+package utils;
+
+import cn.hutool.core.util.StrUtil;
+import exceptions.HashException;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Map;
+
+/**
+ * 与计算有关的工具类
+ */
+public class CalculationUtils {
+ static final int HASH_BIT = 128;
+ static final int DISTANCE_WAY1 = 16;
+ static final int DISTANCE_WAY2 = 32;
+ static final int DISTANCE_WAY3 = 64;
+
+ /**
+ * 采用MD5进行对词语进行hash,得到的hash值使用16进制解析 再利用算法取128位二进制
+ * @param word 词语
+ * @return 128位二进制
+ */
+ public static String wordHash(String word) throws HashException {
+ //如果传入词语为null或“”或“ ”
+ if (word == null || StrUtil.isBlank(word) || StrUtil.isEmpty(word)) {
+ throw new HashException("词语为空");
+ }
+ try {
+ // 采用MD5算法进行hash
+ MessageDigest digest = MessageDigest.getInstance("MD5");
+ digest.update(word.getBytes(StandardCharsets.UTF_8));
+ // hash值转为32位16进制
+ StringBuilder hash = new StringBuilder();
+ for (byte b : digest.digest()) {
+ hash.append(String.format("%02x", b));
+ }
+
+ // 16进制转为128位2进制码
+ StringBuilder finalHash = new StringBuilder();
+ String strTemp;
+ for (int i = 0; i < hash.length(); i ++) {
+ // 每一位16进制数加上0000 最后截取后面的4位 得到便是这位数的二进制
+ strTemp = "0000" + Integer.toBinaryString(Integer.parseInt(hash.substring(i, i + 1), 16));
+ finalHash.append(strTemp.substring(strTemp.length() - 4));
+ }
+
+ // 不为128直接报错
+ if (finalHash.length() != HASH_BIT) {
+ throw new HashException("hash值长度不为128");
+ }
+
+ return finalHash.toString();
+
+ } catch (NoSuchAlgorithmException e) {
+ throw new HashException("MD5算法异常");
+ }
+
+ }
+
+ /**
+ * 给二进制哈希值加权
+ * @param hash 二进制哈希值
+ * @param weight 权重
+ * @return 加权后的二进制哈希值
+ */
+ public static int[] hashWeight(String hash, int weight) {
+ // 新建一个数组用于存放加权后的二进制哈希值
+ int[] hashArray = new int[HASH_BIT];
+ // 遍历二进制哈希值,0则是-1,1则是1,将每一位加权后存入数组
+ for (int i = 0; i < hash.length(); i++) {
+ if (hash.charAt(i) == '1') {
+ hashArray[i] = weight;
+ } else {
+ hashArray[i] = -1 * weight;
+ }
+ }
+
+ return hashArray;
+ }
+
+ /**
+ * 得到的合并后的hash值进行降维,最终得到simHash
+ * @param mergeHash 合并后的hash值
+ * @return sim哈希值
+ */
+ public static String getSimHash(int[] mergeHash){
+ // 使用StringBuilder存储simHash
+ StringBuilder simHash = new StringBuilder();
+ // 遍历合并后的hash值,大于0则是1,小于0则是0
+ for (int hash : mergeHash) {
+ if (hash > 0) {
+ simHash.append("1");
+ } else {
+ simHash.append("0");
+ }
+ }
+ return simHash.toString();
+ }
+
+
+ /**
+ * 根据词语得到simHash
+ * @param wordCount 词语及其出现次数
+ * @return simHash
+ */
+ public static String calculateSimHash(Map wordCount){
+ // 新建一个数组用于存放合并后的hash值,初始值为0
+ int[] mergeHash = new int[HASH_BIT];
+ for (int i = 0; i < HASH_BIT; i++) {
+ mergeHash[i] = 0;
+ }
+ // 遍历词语及其出现次数,对每一个词语进行hash加权,然后合并
+ wordCount.forEach((word,count) -> {
+ try {
+ int[] tempHash = hashWeight(wordHash(word),count);
+ for (int i = 0; i < tempHash.length; i++) {
+ mergeHash[i] += tempHash[i];
+ }
+ } catch (HashException e) {
+ e.printStackTrace();
+ }
+ });
+
+ // 降维得到simHash
+ return getSimHash(mergeHash);
+ }
+
+ /**
+ * 计算两个simHash的相似度
+ * @param simHash1 simHash1
+ * @param simHash2 simHash2
+ * @return 相似度
+ */
+ public static double getSimilarity(String simHash1, String simHash2) {
+ // 汉明距离
+ int distance = 0;
+ // 遍历simHash1和simHash2,不相同则汉明距离加1
+ for (int i = 0; i < simHash1.length(); i++) {
+ if (simHash1.charAt(i) != simHash2.charAt(i)) {
+ distance++;
+ }
+ }
+// System.out.println("汉明距离为:" + distance);
+ // 更换计算策略
+ if (distance >= 0 && distance <= DISTANCE_WAY1) {
+ return 1 - (double) distance / 256;
+ } else if (distance > 16 && distance <= DISTANCE_WAY2) {
+ return 1 - (double) distance / 128;
+ }else if (distance > 32 && distance <= DISTANCE_WAY3) {
+ return 1 - (double) distance / 64;
+ }else {
+ return 0;
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/simhash/src/main/java/utils/CommonUtils.java b/simhash/src/main/java/utils/CommonUtils.java
new file mode 100644
index 0000000..e78d1ae
--- /dev/null
+++ b/simhash/src/main/java/utils/CommonUtils.java
@@ -0,0 +1,71 @@
+package utils;
+
+import cn.hutool.core.io.FileUtil;
+import cn.hutool.core.util.StrUtil;
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.seg.common.Term;
+import exceptions.FileAnalyseException;
+import exceptions.NotExistFileException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+/**
+ * 非计算的工具类
+ */
+public class CommonUtils {
+ //最少关键词数为3
+ public static final int SHORT_WORD_LENGTH = 3;
+ /**
+ * 从文件中读取文本
+ * @param filePath 文件路径
+ * @return 读取出的文本
+ */
+ public static String readFileToStr(String filePath) throws NotExistFileException {
+ try {
+ return FileUtil.readUtf8String(filePath);//返回读取的文本
+ } catch (Exception e) {
+ throw new NotExistFileException("该绝对路径的文件不存在");
+ }
+ }
+
+ /**
+ * 把文本解析并过滤后转为map
+ * @param text 读取的文本
+ * @return 存放词语和词频的map
+ */
+ public static Map analyseText(String text) throws FileAnalyseException {
+ //文本内容为null或“”或“ ”时,文件解析异常
+ if (text == null || StrUtil.isBlank(text) || StrUtil.isEmpty(text)) {
+ throw new FileAnalyseException("文件解析异常,解析内容为空");
+ }
+ // 提取关键词
+ List keyList = HanLP.extractKeyword(text, text.length());
+ //提取出的关键词小于3
+ if (keyList.size() <= SHORT_WORD_LENGTH) {
+ throw new FileAnalyseException("文件解析异常,关键词太少");
+ }
+ // 分词,找出所有词语
+ List termList = HanLP.segment(text);
+ List allWords = termList.stream().map(term -> term.word).collect(Collectors.toList());
+ // 用于存放关键词和词频的map
+ Map wordCount = new HashMap<>(keyList.size());
+ // 遍历全部词语,获取关键词词频,返回存词语和词频的map
+ for (String s:keyList) {
+ wordCount.put(s, Collections.frequency(allWords, s));
+ }
+ return wordCount;
+ }
+
+ /**
+ * 将查重结果写入指定文件
+ * @param filePath 文件路径
+ * @param content 查重结果内容
+ */
+ public static void writeFile(String filePath, String content) {
+ FileUtil.appendString(content, filePath, "utf-8");
+ }
+}
\ No newline at end of file
diff --git a/simhash/src/test/java/MainTest.java b/simhash/src/test/java/MainTest.java
new file mode 100644
index 0000000..c9879a9
--- /dev/null
+++ b/simhash/src/test/java/MainTest.java
@@ -0,0 +1,17 @@
+import org.junit.jupiter.api.Test;
+public class MainTest {
+ static String writeFilePath = "E:\\测试文本\\write.txt";
+ static String OrigFilePath = "E:\\测试文本\\orig.txt";
+ static String CopyFilePath1 = "E:\\测试文本\\orig_0.8_add.txt";
+ /**
+ * 测试主函数
+ */
+ @Test
+ void testMain(){
+ String[] args = new String[3];
+ args[0] = OrigFilePath;
+ args[1] = CopyFilePath1;
+ args[2] = writeFilePath;
+ Main.main(args);
+ }
+}
\ No newline at end of file
diff --git a/simhash/target/classes/Main.class b/simhash/target/classes/Main.class
new file mode 100644
index 0000000000000000000000000000000000000000..52bc6bc9e84e51a69a679bb356dc83289f94bd74
GIT binary patch
literal 2480
zcma)8-%}e^6#gy*Hra+kQz)fB=r3r20k(cD9hyO!v&~jFxwG%M2qb(6aK$`>UV*v|RkYG=8-D!#B^q{!v8)TfL1N
zsnL~M$4a}lZe|nR(<-(hreJ#%&1ezWR&aI0i6^aG&N450Yk>w^lXGLbp-ovd6F6KA
zXfw8l+#1DBv=xLkt;OiXJqJe0D3!<+>=oD_
zP@1$zi|)9daz#tUKI~U;Ac})t0=BQSP*D+R%##o|?aIu;lr1xwiVk!tI2^?hbP4S6
zd67n@U`UU3pPZ(QFVfO_?woYy*TbvC(p`<54)jEE6usm(tGP7R4QcD{9f2cN=-G_W
zJsL(oqj~mKarw!Um8EZ&i%Wk!`LeTr;JS(za7@8K6ffeX4HY+P6=n@hKv|SVK|6ol
ztBTQ}du4@$iW7KQ!7EV=VrTTkv
ziq{18WX$+{!L=+Sp0R8#J||r*KFYyknrFkbZV5DZRRN0epq-8)iBW;poLtb7mYI?6
zjPA~J<+##xS;kkYI2!QNcj|d0Rb23jeJqNzI42Of#XM?W2|jl&rv<_z7tW@yOG8wvrU!!1ErIwer^BqN!l@q`c
z%2>I)w6%az*eRcwV*;_jw!TMX#D71)7;Mcc7z7qfaZ`m~a^$iTGNz63-^s6WaAKn)
z!<+XBNXChl)$GJ62Ha3;1?)s6G8N1y;Av?n^^Uc$1k|)uurq$HBOcHFo(6q9ZCS44
z+A=?(x%1YX(^!uM6$Ww&Os`ZefsXY?q??P@f)+1_UasCTnQ<+f?cj@G-TI1U&~wt9
zGc>0oWmyY_d?FBu&tX8a!ElNMRZ$Nnx5}~gA(GNC(k$>q7&is3_C4R9y7KY8)z3d%
ze*BP4N~kOMe_6f%X+R)UKL7FbgVnna|GfKixt9svf6;N%)a;}o9Y=E%FxQ=%XsgDC
zf<=M$|5x?SMu|EcfWliE5WKF&@jB->_*8=wzrwq>IZjX#=PWqa^*n-jP*R!X=)DHQ
zIL|R!R=}Hhi%$_;K$;R73zvEk%5L^Nt-ptc$B0~tZFz)--$Y7G^wbU%(Ntt5_k{e_
z#CHf!_B}-V(IO5__Hq~s^|ghH=pL+#^@V;#&86C%bw$L}MI0Yeh(OaerHI55Mu4Gk
zPu)OUxQH_k7)3L>c-!pq=RF9)b9{_#3uvHYThYXIjGtzH+R;M&owV42UbON)*@ffS
zjRf|xcH1${EA?F*!aO>W$Kg`oR~TF`ai?$*Ucb6Ytr^ZST*KSAL=u{D74L9WL#$z3
z#ue%%c+*_vDn#6VVqPQL)7)v0GKKgNyvJ1-l1MvfSw@?!pv_X2;k#DUHvK~{`Acg~
jfyRFb1^YtWBFon`gwi>OX?TO*JZwIxTsbJ<7TW#=lrpJr
literal 0
HcmV?d00001
diff --git a/simhash/target/classes/classpath.index b/simhash/target/classes/classpath.index
new file mode 100644
index 0000000000000000000000000000000000000000..710196b307608a6994e17db7259659a6ae4d8f59
GIT binary patch
literal 136
zcmb1OU}j)oU<6_q*lo|{_!0U;yF8~ilx)9c!XqJE~=b;D8l|<*-rcwdWfYA|42saki8*jD6HqB
lqvL1Lll%fCvVC=aH?S!K?0u@x#Fik@V;ildS6fhkoeyA*Ss?%b
literal 0
HcmV?d00001
diff --git a/simhash/target/classes/exceptions/HashException.class b/simhash/target/classes/exceptions/HashException.class
new file mode 100644
index 0000000000000000000000000000000000000000..b13d9027bd8738487895a841c52869310a3f7ffa
GIT binary patch
literal 372
zcmZ`#O-sZu6r9xBYIR**P{f-T@dNA+SX~fNJe0ju_MVNQjkJj*sp!x0BzW)#_@l&^
zs-TDoJl+iR<~`=^{pA(FDMlG0ETo86v516W@0Q>5+)2?{-cj0z)oGvihT{6)sTR*qq(_|I_6khan}UiB7#4r0hk(^(-eRlnrdHOE1YU8EF6
yc#J-UL3NqXgxJ6*heyv~Pt--osD|Kx5ZeGN)NOsLh#3$fJ=QSnz19gu*!TdDVOW&_
literal 0
HcmV?d00001
diff --git a/simhash/target/classes/exceptions/NotExistFileException.class b/simhash/target/classes/exceptions/NotExistFileException.class
new file mode 100644
index 0000000000000000000000000000000000000000..0318cb4fd72d8fbf922646f96ae0fa7864cfe6be
GIT binary patch
literal 387
zcmah_O-sW-6r2}hqS0C{=s^(FlOJGyK(!ZXPobAe@7r{VPtsjUHqyV8^U3k11<_*KVy}!HyxIr&LfVCLmHr5dljvmyLO4+6(F7?>Dk!{R;xXksVV`~V}
zH5+yV!v0y-N=s#mbmA&DMgM#nV}o$4zx8!07Q-di&fER432*IO6S^52J#H2=UEQf!
zDNHZ3xhkisVvoPb(A{%QIL-bu?i1pLu4`4aR=)xgl8J4qT>ChL!#~qq`7@j#mS+4T
w`O=NdO+f=;Js(|MK7&@W1xRH29s%EC2V1foe5!=$2ogPZ&~3eT1qImq0Hi)%XaE2J
literal 0
HcmV?d00001
diff --git a/simhash/target/classes/utils/CalculationUtils.class b/simhash/target/classes/utils/CalculationUtils.class
new file mode 100644
index 0000000000000000000000000000000000000000..17eec45b74425e88d2783a66ca3419e2603382ef
GIT binary patch
literal 5023
zcmcIod3+S*8Ga@^yR%s)WLXH1kRT#pvPsw^6htGGBwUL*tO=nQElhTY&A?_S?#?37
zR&7gvwpDtd)~i*D2WnM3YFHy;Pp#Ho_SV*3;!*$WU+S+)pKoS1Y?9UXpN1bZ^Ih-v
zywCf*?|ic_d@z0jz#6wk!h!?J)BqDdD?Ktke2S%
zFU#twnBH1}tWgECB+S?M#q<%&FjJYxW-T+cA>Z_4wgmOfqX*tR
z`p|2+@i*=t494l_1wG@kRrh>&P&cYH2XWE;k3^NiV
z2N~TWS6V8rrQ?DVwlfmETl+TdSS_K-FOH$8%&u*(E0Bp8
zwq5=&87b#v{rx@W!NO-_$lG|HU&EgRTP0tq(e)_vPn&(uc!tY
zKL$BA#5_KE&GENxdgkQ#y|3jCFnya_RtxIlDs)freeayNV_4}%MR=5)*09bR3fu?2`PUmPEbXfvZ$pjcX(XnAE+-uqcH>5EDS`YVB&mw{C>-
z<)3fePW#S>3!MjH>WeDABm(ch+)T%XAt9jaR9r9A>^D+Yzeq+g^D8R8im!1~5wUH$
zkr<-)^NOexoAq~3emZa;p;Ht+^M|NNa@|#;Xys!rwu0A
z6Y4Z$TC!hD8=~!Y%GQuUjnD5C#7|dhD{)e*QafXnvC@6|FryZlo`0NTNT_iJCbT%|
z4MJ10AW%Z(wDtLd!fLULd7E_NDiO5H@`T9`lee%g0!dPAz9}!;Lj{jYC>bFBz(gw0
zU*F|!7)u4Dwscw>71N>$o*)J)H=#QiluTY_L6T=@u|6TZvkFDa9gCG&niIG>%&e$g
z<&Nw53?S;clcO>^l8?-)$y3jtMp|n@k6R~O*V1BXuaQV;RyIxgwoF0zzgj6$Y{;yW
zbViaAvdGujMN*&ulbeQh>nVGe#pctGEqsg{qQ;86s7z|ZgK=%iRF0Q4NeDB&r<*P4
z$va=yPn#Ac>V&SU^TQ5JB*`0d0y4XZ9crc5%%)@dMnmLH#nke>TvRchHQG(nVp3=$
zUAi@7#xvzU{4#)F;UxvX7H^f`NLX_ERc562ntODfa>OajdC_dtVwRaEleZKy=}vTv
zZswUNsb`jSn&zJDNMXobdJFJRTG&S=6`B5D&IYatdYtSvc;@iqcjT&xe-gd>LjZro
z%L-mm@hAM5MSGfeopu-rUE7uSZN_k6>&5@Sryohfq(O(=LY5{C+Y}aN!c=yG_{)?m
zaF1IBLCGJ$`pMtl#M|WWDxSr2LA=J|R!GM5coynfjiyq1x+AG&GI~bA8xodIBfpqp
z6uiZA??+E%IdtTOGl{Q+uMrvZaU*Vmgm17_f}7dCrJ#K)+qV_8Z)f|P1?_J^;aYx0
z@p=a>M0l5Y_lA!_dek;~C$C}{D8nIMRc8Xejl*n}igzWaK;#xj-mc3M7AG8b65QsTDlugtdc#eycOOG
z`7o9?R(Mye@->83$xowXyQjjJ!|L7~){dcl+kUTf4c3J8o6v1uDR1isR@+qi#^h5qc%jFK35Ah
zAOQ~qBU>QOq*c5w;K&(R%#WrDFX5-Sf-##K}5kL62tipYCk}LZAkB8_3TsJ
zOyV4IN#HRqWt&H&QgNi>lTgQ9?2~zqkgnr_F3;MUFzGyuMd6w;>LZG2$LC+DC
zHq?|X80y*YVT@jFc!WzF))w0O*gy|A(e!5W7p0Uhz}aj!@>F+-GsE1%kEOQP;-nOz
zufiMg^tdfuA3q_#izn#6@Ng#N_u*GCCe8Q0N1jT>!P5nNjB|XPv&Pj)XSl23#9GfP
zxk9d?z8WgztQ{itXya5LN5<ardwTYl5aqwZ_2b9@PvY_t)aURSfmpU>T4lRG%xj82R+yO0
zdnIMFl%hPuSpuyevuv_OJqhC}9OvCf#R%LIo@UEQc`+t_fD!mrl(?k7M>n2fPYJ$9
z>c3B6J6d^*HiT9*&zcFHRN6YJ7h<^+K4P~8a*?qLhT=MA&Ov724Gh4IXk%Emuvqpp
z$9CY>3B}0_iO4fSw1ywLiqr5TKJmcGxj*J;UU9znAxqLc$}Kc!PB36s2FLB^7(6d+
zG%*g>vLJmqhXbrm6kOwp!%Krcn!X~T$
literal 0
HcmV?d00001
diff --git a/simhash/target/classes/utils/CommonUtils.class b/simhash/target/classes/utils/CommonUtils.class
new file mode 100644
index 0000000000000000000000000000000000000000..a66b00e332940e3a56bcd0d0ec90a6908d10a234
GIT binary patch
literal 3869
zcmb7HZF>~e8NN@l*-4gxgyqFXkccLj1aZNtKo--&nwaR4SQ3UvRRSBui#z*4=+*E21FXI%YKYsP(^@U5<^QVgMzgB$X-4GhFT4udQU`@5anCor$oPuT*w_}~a
zx=-brW0DbO;U=pjvwH~JH0AupVpCdXNPjA)n+ZMIaZsAPL&XMZj~YFxv^}HXPJtF*
zRx+F2re@4!{Yl*=aF3h9&2wize(R;;?MFAHrtbt6+AT^l4`N36iQG
zvol)4eq5h9YGuX++WhHyr1pEPIuON4P;?o#;eG|5SMh+1W`)D<;=09OC
z!m0swtM~%G$coR}EZvl((l6((O3Jwz4YPmtsEFYafjTXnHfG4;68VXonUG~0>GfWS
zLN6Xw(5Ipwj|tpXMq!C!>kp12|gFA#L6=*8PkPrx$?UzIt^putz
zVEH-)e^6@ZDki|e6fm9T)g{~hctc3yu!3n723Qhn%9cH7$S_fCn8>(o&NA5S$D}M3
z^^H1rxJEnJcNWqLo>Gwk&A7lg{`*Z^pVXPMazrz7`rw32zSLQM2_~Pz5d}w890T1i
zFNa8AnQc19oJ+;zg*8l}q(_h83Q(C${X6)Bl?)zJ+fKsH}P29A`H;v%R-Ullg49
zVtxd?6|X|PgCp92@8Ven-&1iMCphdY#t^%jM~$2fkzqYULIUm>3RgNKnnQhq`-TsV
z4(@y8P`qd0(cwM;(NBb8>#ttel*ojXovU_S&VW%%n@mwE9G)&0W64J<_`w+XEG1x8gb~)gJPxF_0?8
zeeS##augeel9Q%p=Q8Btv0J0}e;7WR;dqj4^MtNq*cVT!<~ad5LD}CD+SP`Z8XMPI
zef+j=WuoozTLR)Aubyp(aM>4k-CA8BG-Ty62`~7jSozGoPtFkD8unP0o#la_-mBYF
z)_Arth>Ib7fZr+jP{l|1JsmmY#u+O~^N2OAN8(OG)X*k)Q?N3eNy}3HufO_X@|0W$
zyk(3VdbTxgS<|_6m3iSqbBW?G3(znAzi+xOKU>!iv}ZZSMj0aU(I(7&>S|jW?!H*P
zCBjz-;X%x+co*lJaGrCt-!%11%+Rt~J*(gfZ&Ve4R>MZYH6GcYEy}~)1ri(Sw1#s|
zpc&8MB%j~sR}Fr^6PME+LH<6^l?b1L&$`Yzhz}f*7x*i;fmL{szp5*MQ}`jj8t@~o
z)HupdqSkTJ)R~8JF}{Gt1L2i(SasFW2++-PN=SPv_yq3Y$_CnQaqO<8ZV(~7gqI!p
zCj1yDDD@Nmr~e*>+OP0Ez}-gr{V86h6$UMib4}sSna)3BC4H|wjSwH}U&rzVv>XU;
zoWtE$J3B9-HIL5t<^tGW!L#1xc4sq>oqO*s;9-Gz>;?+xsX-o(7x1MTT*J131?)fI
zD$ZfFE5O%-0$p{@bp?D~;B~agH{OP?p{*;}9B2-1X%6Nwm4{h?U5n9kpZu*kaM6ig
z=6^pi13D4rg>^k|+-=xO=(k}fugBe_>L9lB+Od;Fb>TGcvu|RTgHbn;I7uFUMr49A
zW4z{I)P`pm#Azba%?r@a`Bp<4y?BFfwY0RI7TzLCzi_+;=ugU@S(N|fqWqaf`Cn09
zV!sJ>;f8-fDM~F
z!%ye&jnQ&($v;7X9^Nf_7YjiamP@~UJI5qNQ%LrVXRchQAK^O^*BYm;+&X=5Nu9oj
j-*CSczjdVE#|1uR4bO7z5=mabWv+y{@&{bSAJP0D(O^pJ
literal 0
HcmV?d00001
diff --git a/simhash/target/test-classes/MainTest.class b/simhash/target/test-classes/MainTest.class
new file mode 100644
index 0000000000000000000000000000000000000000..232debddd15f6020ee04f567fe236e24a519af77
GIT binary patch
literal 842
zcmZuv-EI;=6#fQwVWCUGtjF`a75it(G7-;)6k=>
z+MOfbZmTG2VK-{~40%t4{*z&^>Bk4W8PJR4MJ*mQcr4U?_GrjsF<{7gbG^1nJ4w3D
z=HYRN;H6W>p-G+&Ln(TGLkt8}d=Q3_