From 72079d3b6980e0fa6dec22b4e06d495346591c03 Mon Sep 17 00:00:00 2001 From: y <123@12.com> Date: Tue, 7 Mar 2023 16:29:30 +0800 Subject: [PATCH] simbash --- helloworld.txt | 0 simhash/.idea/.gitignore | 8 + simhash/.idea/compiler.xml | 13 ++ simhash/.idea/encodings.xml | 7 + simhash/.idea/jarRepositories.xml | 20 +++ simhash/.idea/misc.xml | 14 ++ simhash/pom.xml | 50 ++++++ simhash/src/main/java/Main.java | 39 +++++ .../java/exceptions/FileAnalyseException.java | 12 ++ .../main/java/exceptions/HashException.java | 14 ++ .../exceptions/NotExistFileException.java | 14 ++ .../src/main/java/utils/CalculationUtils.java | 158 ++++++++++++++++++ simhash/src/main/java/utils/CommonUtils.java | 71 ++++++++ simhash/src/test/java/MainTest.java | 17 ++ simhash/target/classes/Main.class | Bin 0 -> 2480 bytes simhash/target/classes/classpath.index | Bin 0 -> 136 bytes .../exceptions/FileAnalyseException.class | Bin 0 -> 374 bytes .../classes/exceptions/HashException.class | Bin 0 -> 372 bytes .../exceptions/NotExistFileException.class | Bin 0 -> 387 bytes .../classes/utils/CalculationUtils.class | Bin 0 -> 5023 bytes .../target/classes/utils/CommonUtils.class | Bin 0 -> 3869 bytes simhash/target/test-classes/MainTest.class | Bin 0 -> 842 bytes simhash/target/test-classes/classpath.index | Bin 0 -> 96 bytes 23 files changed, 437 insertions(+) delete mode 100644 helloworld.txt create mode 100644 simhash/.idea/.gitignore create mode 100644 simhash/.idea/compiler.xml create mode 100644 simhash/.idea/encodings.xml create mode 100644 simhash/.idea/jarRepositories.xml create mode 100644 simhash/.idea/misc.xml create mode 100644 simhash/pom.xml create mode 100644 simhash/src/main/java/Main.java create mode 100644 simhash/src/main/java/exceptions/FileAnalyseException.java create mode 100644 simhash/src/main/java/exceptions/HashException.java create mode 100644 simhash/src/main/java/exceptions/NotExistFileException.java create mode 100644 simhash/src/main/java/utils/CalculationUtils.java create mode 100644 simhash/src/main/java/utils/CommonUtils.java create mode 100644 simhash/src/test/java/MainTest.java create mode 100644 simhash/target/classes/Main.class create mode 100644 simhash/target/classes/classpath.index create mode 100644 simhash/target/classes/exceptions/FileAnalyseException.class create mode 100644 simhash/target/classes/exceptions/HashException.class create mode 100644 simhash/target/classes/exceptions/NotExistFileException.class create mode 100644 simhash/target/classes/utils/CalculationUtils.class create mode 100644 simhash/target/classes/utils/CommonUtils.class create mode 100644 simhash/target/test-classes/MainTest.class create mode 100644 simhash/target/test-classes/classpath.index diff --git a/helloworld.txt b/helloworld.txt deleted file mode 100644 index e69de29..0000000 diff --git a/simhash/.idea/.gitignore b/simhash/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/simhash/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/simhash/.idea/compiler.xml b/simhash/.idea/compiler.xml new file mode 100644 index 0000000..e5fa1de --- /dev/null +++ b/simhash/.idea/compiler.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/simhash/.idea/encodings.xml b/simhash/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/simhash/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/simhash/.idea/jarRepositories.xml b/simhash/.idea/jarRepositories.xml new file mode 100644 index 0000000..5a2f139 --- /dev/null +++ b/simhash/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/simhash/.idea/misc.xml b/simhash/.idea/misc.xml new file mode 100644 index 0000000..82dbec8 --- /dev/null +++ b/simhash/.idea/misc.xml @@ -0,0 +1,14 @@ + + + + + + + + + + \ No newline at end of file diff --git a/simhash/pom.xml b/simhash/pom.xml new file mode 100644 index 0000000..e30812d --- /dev/null +++ b/simhash/pom.xml @@ -0,0 +1,50 @@ + + + 4.0.0 + + org.example + simhash + 1.0-SNAPSHOT + + + org.apache.commons + commons-lang3 + 3.5 + + + cn.hutool + hutool-all + 5.7.13 + + + com.hankcs.nlp + hanlp-lucene-plugin + 1.1.7 + + + junit + junit + 4.13.2 + + + cn.hutool + hutool-http + 5.8.14 + + + org.junit.jupiter + junit-jupiter-api + 5.8.2 + test + + + + + 17 + 17 + UTF-8 + + + \ No newline at end of file diff --git a/simhash/src/main/java/Main.java b/simhash/src/main/java/Main.java new file mode 100644 index 0000000..40c8b93 --- /dev/null +++ b/simhash/src/main/java/Main.java @@ -0,0 +1,39 @@ +import cn.hutool.core.date.DateUtil; +import exceptions.FileAnalyseException; +import exceptions.NotExistFileException; +import utils.CalculationUtils; +import utils.CommonUtils; +import java.util.Map; + +public class Main { + //合法参数个数为3 + static final int ARGS_NUM = 3; + public static void main(String[] args){ + // 读取并解析参数 + if (args.length != ARGS_NUM) { + throw new IllegalArgumentException("参数个数不正确"); + } + // 解析文件,处理分词 + Map originWordCount = null; + Map compareWordCount = null; + try { + originWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[0])); + compareWordCount = CommonUtils.analyseText(CommonUtils.readFileToStr(args[1])); + } catch (FileAnalyseException | NotExistFileException e) { + e.printStackTrace(); + } + // 获取simHash值 + String simHash1 = CalculationUtils.calculateSimHash(originWordCount); + String simHash2 = CalculationUtils.calculateSimHash(compareWordCount); + //计算相似度,保留两位小数 + double result = CalculationUtils.getSimilarity(simHash1, simHash2); + String format = String.format("相似度为:%.2f", result); + String writeFileContent = "---------------------------------------" + "\n" + + "原文件:" + args[0] + "\n" + + "对比文件:" + args[1] + "\n" + + format + "\n" + + "比较时间为:" + DateUtil.now() + "\n"; + ; + CommonUtils.writeFile(args[2],writeFileContent); + } +} diff --git a/simhash/src/main/java/exceptions/FileAnalyseException.java b/simhash/src/main/java/exceptions/FileAnalyseException.java new file mode 100644 index 0000000..11aec77 --- /dev/null +++ b/simhash/src/main/java/exceptions/FileAnalyseException.java @@ -0,0 +1,12 @@ +package exceptions; + +/** + * @author HJW + * @date 2022-09-21 12:57 + * 文件解析异常(转字符串为空或者过滤时没有可用词) + */ +public class FileAnalyseException extends Exception { + public FileAnalyseException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/simhash/src/main/java/exceptions/HashException.java b/simhash/src/main/java/exceptions/HashException.java new file mode 100644 index 0000000..c9e1902 --- /dev/null +++ b/simhash/src/main/java/exceptions/HashException.java @@ -0,0 +1,14 @@ +package exceptions; + +import java.security.NoSuchAlgorithmException; + +/** + * @author HJW + * @date 2022-09-21 12:57 + * hash异常 md5 + */ +public class HashException extends NoSuchAlgorithmException { + public HashException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/simhash/src/main/java/exceptions/NotExistFileException.java b/simhash/src/main/java/exceptions/NotExistFileException.java new file mode 100644 index 0000000..9dc0f96 --- /dev/null +++ b/simhash/src/main/java/exceptions/NotExistFileException.java @@ -0,0 +1,14 @@ +package exceptions; + +import java.io.FileNotFoundException; + + +/** + * @author HJW + * 找不到文件的自定义异常 + */ +public class NotExistFileException extends FileNotFoundException { + public NotExistFileException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/simhash/src/main/java/utils/CalculationUtils.java b/simhash/src/main/java/utils/CalculationUtils.java new file mode 100644 index 0000000..664a0a9 --- /dev/null +++ b/simhash/src/main/java/utils/CalculationUtils.java @@ -0,0 +1,158 @@ +package utils; + +import cn.hutool.core.util.StrUtil; +import exceptions.HashException; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Map; + +/** + * 与计算有关的工具类 + */ +public class CalculationUtils { + static final int HASH_BIT = 128; + static final int DISTANCE_WAY1 = 16; + static final int DISTANCE_WAY2 = 32; + static final int DISTANCE_WAY3 = 64; + + /** + * 采用MD5进行对词语进行hash,得到的hash值使用16进制解析 再利用算法取128位二进制 + * @param word 词语 + * @return 128位二进制 + */ + public static String wordHash(String word) throws HashException { + //如果传入词语为null或“”或“ ” + if (word == null || StrUtil.isBlank(word) || StrUtil.isEmpty(word)) { + throw new HashException("词语为空"); + } + try { + // 采用MD5算法进行hash + MessageDigest digest = MessageDigest.getInstance("MD5"); + digest.update(word.getBytes(StandardCharsets.UTF_8)); + // hash值转为32位16进制 + StringBuilder hash = new StringBuilder(); + for (byte b : digest.digest()) { + hash.append(String.format("%02x", b)); + } + + // 16进制转为128位2进制码 + StringBuilder finalHash = new StringBuilder(); + String strTemp; + for (int i = 0; i < hash.length(); i ++) { + // 每一位16进制数加上0000 最后截取后面的4位 得到便是这位数的二进制 + strTemp = "0000" + Integer.toBinaryString(Integer.parseInt(hash.substring(i, i + 1), 16)); + finalHash.append(strTemp.substring(strTemp.length() - 4)); + } + + // 不为128直接报错 + if (finalHash.length() != HASH_BIT) { + throw new HashException("hash值长度不为128"); + } + + return finalHash.toString(); + + } catch (NoSuchAlgorithmException e) { + throw new HashException("MD5算法异常"); + } + + } + + /** + * 给二进制哈希值加权 + * @param hash 二进制哈希值 + * @param weight 权重 + * @return 加权后的二进制哈希值 + */ + public static int[] hashWeight(String hash, int weight) { + // 新建一个数组用于存放加权后的二进制哈希值 + int[] hashArray = new int[HASH_BIT]; + // 遍历二进制哈希值,0则是-1,1则是1,将每一位加权后存入数组 + for (int i = 0; i < hash.length(); i++) { + if (hash.charAt(i) == '1') { + hashArray[i] = weight; + } else { + hashArray[i] = -1 * weight; + } + } + + return hashArray; + } + + /** + * 得到的合并后的hash值进行降维,最终得到simHash + * @param mergeHash 合并后的hash值 + * @return sim哈希值 + */ + public static String getSimHash(int[] mergeHash){ + // 使用StringBuilder存储simHash + StringBuilder simHash = new StringBuilder(); + // 遍历合并后的hash值,大于0则是1,小于0则是0 + for (int hash : mergeHash) { + if (hash > 0) { + simHash.append("1"); + } else { + simHash.append("0"); + } + } + return simHash.toString(); + } + + + /** + * 根据词语得到simHash + * @param wordCount 词语及其出现次数 + * @return simHash + */ + public static String calculateSimHash(Map wordCount){ + // 新建一个数组用于存放合并后的hash值,初始值为0 + int[] mergeHash = new int[HASH_BIT]; + for (int i = 0; i < HASH_BIT; i++) { + mergeHash[i] = 0; + } + // 遍历词语及其出现次数,对每一个词语进行hash加权,然后合并 + wordCount.forEach((word,count) -> { + try { + int[] tempHash = hashWeight(wordHash(word),count); + for (int i = 0; i < tempHash.length; i++) { + mergeHash[i] += tempHash[i]; + } + } catch (HashException e) { + e.printStackTrace(); + } + }); + + // 降维得到simHash + return getSimHash(mergeHash); + } + + /** + * 计算两个simHash的相似度 + * @param simHash1 simHash1 + * @param simHash2 simHash2 + * @return 相似度 + */ + public static double getSimilarity(String simHash1, String simHash2) { + // 汉明距离 + int distance = 0; + // 遍历simHash1和simHash2,不相同则汉明距离加1 + for (int i = 0; i < simHash1.length(); i++) { + if (simHash1.charAt(i) != simHash2.charAt(i)) { + distance++; + } + } +// System.out.println("汉明距离为:" + distance); + // 更换计算策略 + if (distance >= 0 && distance <= DISTANCE_WAY1) { + return 1 - (double) distance / 256; + } else if (distance > 16 && distance <= DISTANCE_WAY2) { + return 1 - (double) distance / 128; + }else if (distance > 32 && distance <= DISTANCE_WAY3) { + return 1 - (double) distance / 64; + }else { + return 0; + } + + } + +} \ No newline at end of file diff --git a/simhash/src/main/java/utils/CommonUtils.java b/simhash/src/main/java/utils/CommonUtils.java new file mode 100644 index 0000000..e78d1ae --- /dev/null +++ b/simhash/src/main/java/utils/CommonUtils.java @@ -0,0 +1,71 @@ +package utils; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.StrUtil; +import com.hankcs.hanlp.HanLP; +import com.hankcs.hanlp.seg.common.Term; +import exceptions.FileAnalyseException; +import exceptions.NotExistFileException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + + +/** + * 非计算的工具类 + */ +public class CommonUtils { + //最少关键词数为3 + public static final int SHORT_WORD_LENGTH = 3; + /** + * 从文件中读取文本 + * @param filePath 文件路径 + * @return 读取出的文本 + */ + public static String readFileToStr(String filePath) throws NotExistFileException { + try { + return FileUtil.readUtf8String(filePath);//返回读取的文本 + } catch (Exception e) { + throw new NotExistFileException("该绝对路径的文件不存在"); + } + } + + /** + * 把文本解析并过滤后转为map + * @param text 读取的文本 + * @return 存放词语和词频的map + */ + public static Map analyseText(String text) throws FileAnalyseException { + //文本内容为null或“”或“ ”时,文件解析异常 + if (text == null || StrUtil.isBlank(text) || StrUtil.isEmpty(text)) { + throw new FileAnalyseException("文件解析异常,解析内容为空"); + } + // 提取关键词 + List keyList = HanLP.extractKeyword(text, text.length()); + //提取出的关键词小于3 + if (keyList.size() <= SHORT_WORD_LENGTH) { + throw new FileAnalyseException("文件解析异常,关键词太少"); + } + // 分词,找出所有词语 + List termList = HanLP.segment(text); + List allWords = termList.stream().map(term -> term.word).collect(Collectors.toList()); + // 用于存放关键词和词频的map + Map wordCount = new HashMap<>(keyList.size()); + // 遍历全部词语,获取关键词词频,返回存词语和词频的map + for (String s:keyList) { + wordCount.put(s, Collections.frequency(allWords, s)); + } + return wordCount; + } + + /** + * 将查重结果写入指定文件 + * @param filePath 文件路径 + * @param content 查重结果内容 + */ + public static void writeFile(String filePath, String content) { + FileUtil.appendString(content, filePath, "utf-8"); + } +} \ No newline at end of file diff --git a/simhash/src/test/java/MainTest.java b/simhash/src/test/java/MainTest.java new file mode 100644 index 0000000..c9879a9 --- /dev/null +++ b/simhash/src/test/java/MainTest.java @@ -0,0 +1,17 @@ +import org.junit.jupiter.api.Test; +public class MainTest { + static String writeFilePath = "E:\\测试文本\\write.txt"; + static String OrigFilePath = "E:\\测试文本\\orig.txt"; + static String CopyFilePath1 = "E:\\测试文本\\orig_0.8_add.txt"; + /** + * 测试主函数 + */ + @Test + void testMain(){ + String[] args = new String[3]; + args[0] = OrigFilePath; + args[1] = CopyFilePath1; + args[2] = writeFilePath; + Main.main(args); + } +} \ No newline at end of file diff --git a/simhash/target/classes/Main.class b/simhash/target/classes/Main.class new file mode 100644 index 0000000000000000000000000000000000000000..52bc6bc9e84e51a69a679bb356dc83289f94bd74 GIT binary patch literal 2480 zcma)8-%}e^6#gy*Hra+kQz)fB=r3r20k(cD9hyO!v&~jFxwG%M2qb(6aK$`>UV*v|RkYG=8-D!#B^q{!v8)TfL1N zsnL~M$4a}lZe|nR(<-(hreJ#%&1ezWR&aI0i6^aG&N450Yk>w^lXGLbp-ovd6F6KA zXfw8l+#1DBv=xLkt;OiXJqJe0D3!<+>=oD_ zP@1$zi|)9daz#tUKI~U;Ac})t0=BQSP*D+R%##o|?aIu;lr1xwiVk!tI2^?hbP4S6 zd67n@U`UU3pPZ(QFVfO_?woYy*TbvC(p`<54)jEE6usm(tGP7R4QcD{9f2cN=-G_W zJsL(oqj~mKarw!Um8EZ&i%Wk!`LeTr;JS(za7@8K6ffeX4HY+P6=n@hKv|SVK|6ol ztBTQ}du4@$iW7KQ!7EV=VrTTkv ziq{18WX$+{!L=+Sp0R8#J||r*KFYyknrFkbZV5DZRRN0epq-8)iBW;poLtb7mYI?6 zjPA~J<+##xS;kkYI2!QNcj|d0Rb23jeJqNzI42Of#XM?W2|jl&rv<_z7tW@yOG8wvrU!!1ErIwer^BqN!l@q`c z%2>I)w6%az*eRcwV*;_jw!TMX#D71)7;Mcc7z7qfaZ`m~a^$iTGNz63-^s6WaAKn) z!<+XBNXChl)$GJ62Ha3;1?)s6G8N1y;Av?n^^Uc$1k|)uurq$HBOcHFo(6q9ZCS44 z+A=?(x%1YX(^!uM6$Ww&Os`ZefsXY?q??P@f)+1_UasCTnQ<+f?cj@G-TI1U&~wt9 zGc>0oWmyY_d?FBu&tX8a!ElNMRZ$Nnx5}~gA(GNC(k$>q7&is3_C4R9y7KY8)z3d% ze*BP4N~kOMe_6f%X+R)UKL7FbgVnna|GfKixt9svf6;N%)a;}o9Y=E%FxQ=%XsgDC zf<=M$|5x?SMu|EcfWliE5WKF&@jB->_*8=wzrwq>IZjX#=PWqa^*n-jP*R!X=)DHQ zIL|R!R=}Hhi%$_;K$;R73zvEk%5L^Nt-ptc$B0~tZFz)--$Y7G^wbU%(Ntt5_k{e_ z#CHf!_B}-V(IO5__Hq~s^|ghH=pL+#^@V;#&86C%bw$L}MI0Yeh(OaerHI55Mu4Gk zPu)OUxQH_k7)3L>c-!pq=RF9)b9{_#3uvHYThYXIjGtzH+R;M&owV42UbON)*@ffS zjRf|xcH1${EA?F*!aO>W$Kg`oR~TF`ai?$*Ucb6Ytr^ZST*KSAL=u{D74L9WL#$z3 z#ue%%c+*_vDn#6VVqPQL)7)v0GKKgNyvJ1-l1MvfSw@?!pv_X2;k#DUHvK~{`Acg~ jfyRFb1^YtWBFon`gwi>OX?TO*JZwIxTsbJ<7TW#=lrpJr literal 0 HcmV?d00001 diff --git a/simhash/target/classes/classpath.index b/simhash/target/classes/classpath.index new file mode 100644 index 0000000000000000000000000000000000000000..710196b307608a6994e17db7259659a6ae4d8f59 GIT binary patch literal 136 zcmb1OU}j)oU<6_q*lo|{_!0U;yF8~ilx)9c!XqJE~=b;D8l|<*-rcwdWfYA|42saki8*jD6HqB lqvL1Lll%fCvVC=aH?S!K?0u@x#Fik@V;ildS6fhkoeyA*Ss?%b literal 0 HcmV?d00001 diff --git a/simhash/target/classes/exceptions/HashException.class b/simhash/target/classes/exceptions/HashException.class new file mode 100644 index 0000000000000000000000000000000000000000..b13d9027bd8738487895a841c52869310a3f7ffa GIT binary patch literal 372 zcmZ`#O-sZu6r9xBYIR**P{f-T@dNA+SX~fNJe0ju_MVNQjkJj*sp!x0BzW)#_@l&^ zs-TDoJl+iR<~`=^{pA(FDMlG0ETo86v516W@0Q>5+)2?{-cj0z)oGvihT{6)sTR*qq(_|I_6khan}UiB7#4r0hk(^(-eRlnrdHOE1YU8EF6 yc#J-UL3NqXgxJ6*heyv~Pt--osD|Kx5ZeGN)NOsLh#3$fJ=QSnz19gu*!TdDVOW&_ literal 0 HcmV?d00001 diff --git a/simhash/target/classes/exceptions/NotExistFileException.class b/simhash/target/classes/exceptions/NotExistFileException.class new file mode 100644 index 0000000000000000000000000000000000000000..0318cb4fd72d8fbf922646f96ae0fa7864cfe6be GIT binary patch literal 387 zcmah_O-sW-6r2}hqS0C{=s^(FlOJGyK(!ZXPobAe@7r{VPtsjUHqyV8^U3k11<_*KVy}!HyxIr&LfVCLmHr5dljvmyLO4+6(F7?>Dk!{R;xXksVV`~V} zH5+yV!v0y-N=s#mbmA&DMgM#nV}o$4zx8!07Q-di&fER432*IO6S^52J#H2=UEQf! zDNHZ3xhkisVvoPb(A{%QIL-bu?i1pLu4`4aR=)xgl8J4qT>ChL!#~qq`7@j#mS+4T w`O=NdO+f=;Js(|MK7&@W1xRH29s%EC2V1foe5!=$2ogPZ&~3eT1qImq0Hi)%XaE2J literal 0 HcmV?d00001 diff --git a/simhash/target/classes/utils/CalculationUtils.class b/simhash/target/classes/utils/CalculationUtils.class new file mode 100644 index 0000000000000000000000000000000000000000..17eec45b74425e88d2783a66ca3419e2603382ef GIT binary patch literal 5023 zcmcIod3+S*8Ga@^yR%s)WLXH1kRT#pvPsw^6htGGBwUL*tO=nQElhTY&A?_S?#?37 zR&7gvwpDtd)~i*D2WnM3YFHy;Pp#Ho_SV*3;!*$WU+S+)pKoS1Y?9UXpN1bZ^Ih-v zywCf*?|ic_d@z0jz#6wk!h!?J)BqDdD?Ktke2S% zFU#twnBH1}tWgECB+S?M#q<%&FjJYxW-T+cA>Z_4wgmOfqX*tR z`p|2+@i*=t494l_1wG@kRrh>&P&cYH2XWE;k3^NiV z2N~TWS6V8rrQ?DVwlfmETl+TdSS_K-FOH$8%&u*(E0Bp8 zwq5=&87b#v{rx@W!NO-_$lG|HU&EgRTP0tq(e)_vPn&(uc!tY zKL$BA#5_KE&GENxdgkQ#y|3jCFnya_RtxIlDs)freeayNV_4}%MR=5)*09bR3fu?2`PUmPEbXfvZ$pjcX(XnAE+-uqcH>5EDS`YVB&mw{C>- z<)3fePW#S>3!MjH>WeDABm(ch+)T%XAt9jaR9r9A>^D+Yzeq+g^D8R8im!1~5wUH$ zkr<-)^NOexoAq~3emZa;p;Ht+^M|NNa@|#;Xys!rwu0A z6Y4Z$TC!hD8=~!Y%GQuUjnD5C#7|dhD{)e*QafXnvC@6|FryZlo`0NTNT_iJCbT%| z4MJ10AW%Z(wDtLd!fLULd7E_NDiO5H@`T9`lee%g0!dPAz9}!;Lj{jYC>bFBz(gw0 zU*F|!7)u4Dwscw>71N>$o*)J)H=#QiluTY_L6T=@u|6TZvkFDa9gCG&niIG>%&e$g z<&Nw53?S;clcO>^l8?-)$y3jtMp|n@k6R~O*V1BXuaQV;RyIxgwoF0zzgj6$Y{;yW zbViaAvdGujMN*&ulbeQh>nVGe#pctGEqsg{qQ;86s7z|ZgK=%iRF0Q4NeDB&r<*P4 z$va=yPn#Ac>V&SU^TQ5JB*`0d0y4XZ9crc5%%)@dMnmLH#nke>TvRchHQG(nVp3=$ zUAi@7#xvzU{4#)F;UxvX7H^f`NLX_ERc562ntODfa>OajdC_dtVwRaEleZKy=}vTv zZswUNsb`jSn&zJDNMXobdJFJRTG&S=6`B5D&IYatdYtSvc;@iqcjT&xe-gd>LjZro z%L-mm@hAM5MSGfeopu-rUE7uSZN_k6>&5@Sryohfq(O(=LY5{C+Y}aN!c=yG_{)?m zaF1IBLCGJ$`pMtl#M|WWDxSr2LA=J|R!GM5coynfjiyq1x+AG&GI~bA8xodIBfpqp z6uiZA??+E%IdtTOGl{Q+uMrvZaU*Vmgm17_f}7dCrJ#K)+qV_8Z)f|P1?_J^;aYx0 z@p=a>M0l5Y_lA!_dek;~C$C}{D8nIMRc8Xejl*n}igzWaK;#xj-mc3M7AG8b65QsTDlugtdc#eycOOG z`7o9?R(Mye@->83$xowXyQjjJ!|L7~){dcl+kUTf4c3J8o6v1uDR1isR@+qi#^h5qc%jFK35Ah zAOQ~qBU>QOq*c5w;K&(R%#WrDFX5-Sf-##K}5kL62tipYCk}LZAkB8_3TsJ zOyV4IN#HRqWt&H&QgNi>lTgQ9?2~zqkgnr_F3;MUFzGyuMd6w;>LZG2$LC+DC zHq?|X80y*YVT@jFc!WzF))w0O*gy|A(e!5W7p0Uhz}aj!@>F+-GsE1%kEOQP;-nOz zufiMg^tdfuA3q_#izn#6@Ng#N_u*GCCe8Q0N1jT>!P5nNjB|XPv&Pj)XSl23#9GfP zxk9d?z8WgztQ{itXya5LN5<ardwTYl5aqwZ_2b9@PvY_t)aURSfmpU>T4lRG%xj82R+yO0 zdnIMFl%hPuSpuyevuv_OJqhC}9OvCf#R%LIo@UEQc`+t_fD!mrl(?k7M>n2fPYJ$9 z>c3B6J6d^*HiT9*&zcFHRN6YJ7h<^+K4P~8a*?qLhT=MA&Ov724Gh4IXk%Emuvqpp z$9CY>3B}0_iO4fSw1ywLiqr5TKJmcGxj*J;UU9znAxqLc$}Kc!PB36s2FLB^7(6d+ zG%*g>vLJmqhXbrm6kOwp!%Krcn!X~T$ literal 0 HcmV?d00001 diff --git a/simhash/target/classes/utils/CommonUtils.class b/simhash/target/classes/utils/CommonUtils.class new file mode 100644 index 0000000000000000000000000000000000000000..a66b00e332940e3a56bcd0d0ec90a6908d10a234 GIT binary patch literal 3869 zcmb7HZF>~e8NN@l*-4gxgyqFXkccLj1aZNtKo--&nwaR4SQ3&#UvRRSBui#z*4=+*E21FXI%YKYsP(^@U5<^QVgMzgB$X-4GhFT4udQU`@5anCor$oPuT*w_}~a zx=-brW0DbO;U=pjvwH~JH0AupVpCdXNPjA)n+ZMIaZsAPL&XMZj~YFxv^}HXPJtF* zRx+F2re@4!{Yl*=aF3h9&2wize(R;;?MFAHrtbt6+AT^l4`N36iQG zvol)4eq5h9YGuX++WhHyr1pEPIuON4P;?o#;eG|5SMh+1W`)D<;=09OC z!m0swtM~%G$coR}EZvl((l6((O3Jwz4YPmtsEFYafjTXnHfG4;68VXonUG~0>GfWS zLN6Xw(5Ipwj|tpXMq!C!>kp12|gFA#L6=*8PkPrx$?UzIt^putz zVEH-)e^6@ZDki|e6fm9T)g{~hctc3yu!3n723Qhn%9cH7$S_fCn8>(o&NA5S$D}M3 z^^H1rxJEnJcNWqLo>Gwk&A7lg{`*Z^pVXPMazrz7`rw32zSLQM2_~Pz5d}w890T1i zFNa8AnQc19oJ+;z&#g*8l}q(_h83Q(C${X6)Bl?)zJ+fKsH}P29A`H;v%R-Ullg49 zVtxd?6|X|PgCp92@8Ven-&1iMCphdY#t^%jM~$2fkzqYULIUm>3RgNKnnQhq`-TsV z4(@y8P`qd0(cwM;(NBb8>#ttel*ojXovU_S&VW%%n@mwE9G)&0W64J<_`w+XEG1x8gb~)gJPxF_0?8 zeeS##augeel9Q%p=Q8Btv0J0}e;7WR;dqj4^MtNq*cVT!<~ad5LD}CD+SP`Z8XMPI zef+j=WuoozTLR)Aubyp(aM>4k-CA8BG-Ty62`~7jSozGoPtFkD8unP0o#la_-mBYF z)_Arth>Ib7fZr+jP{l|1JsmmY#u+O~^N2OAN8(OG)X*k)Q?N3eNy}3HufO_X@|0W$ zyk(3VdbTxgS<|_6m3iSqbBW?G3(znAzi+xOKU>!iv}ZZSMj0aU(I(7&>S|jW?!H*P zCBjz-;X%x+co*lJaGrCt-!%11%+Rt~J*(gfZ&Ve4R>MZYH6GcYEy}~)1ri(Sw1#s| zpc&8MB%j~sR}Fr^6PME+LH<6^l?b1L&$`Yzhz}f*7x*i;fmL{szp5*MQ}`jj8t@~o z)HupdqSkTJ)R~8JF}{Gt1L2i(SasFW2++-PN=SPv_yq3Y$_CnQaqO<8ZV(~7gqI!p zCj1yDDD@Nmr~e*>+OP0Ez}-gr{V86h6$UMib4}sSna)3BC4H|wjSwH}U&rzVv>XU; zoWtE$J3B9-HIL5t<^tGW!L#1xc4sq>oqO*s;9-Gz>;?+xsX-o(7x1MTT*J131?)fI zD$ZfFE5O%-0$p{@bp?D~;B~agH{OP?p{*;}9B2-1X%6Nwm4{h?U5n9kpZu*kaM6ig z=6^pi13D4rg>^k|+-=xO=(k}fugBe_>L9lB+Od;Fb>TGcvu|RTgHbn;I7uFUMr49A zW4z{I)P`pm#Azba%?r@a`Bp<4y?BFfwY0RI7TzLCzi_+;=ugU@S(N|fqWqaf`Cn09 zV!sJ>;f8-fDM~F z!%ye&jnQ&($v;7X9^Nf_7YjiamP@~UJI5qNQ%LrVXRchQAK^O^*BYm;+&X=5Nu9oj j-*CSczjdVE#|1uR4bO7z5=mabWv+y{@&{bSAJP0D(O^pJ literal 0 HcmV?d00001 diff --git a/simhash/target/test-classes/MainTest.class b/simhash/target/test-classes/MainTest.class new file mode 100644 index 0000000000000000000000000000000000000000..232debddd15f6020ee04f567fe236e24a519af77 GIT binary patch literal 842 zcmZuv-EI;=6#fQwVWCUGtjF`a75it(G7-;)6k=> z+MOfbZmTG2VK-{~40%t4{*z&^>Bk4W8PJR4MJ*mQcr4U?_GrjsF<{7gbG^1nJ4w3D z=HYRN;H6W>p-G+&Ln(TGLkt8}d=Q3_