From 55e584ee6602c155b3a76843c65be0dadd3d0299 Mon Sep 17 00:00:00 2001 From: "Black Kin@.com" Date: Tue, 14 Mar 2023 21:49:11 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9F=A5=E9=87=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- "1/\346\272\220.cpp" | 957 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 957 insertions(+) create mode 100644 "1/\346\272\220.cpp" diff --git "a/1/\346\272\220.cpp" "b/1/\346\272\220.cpp" new file mode 100644 index 0000000..d26952f --- /dev/null +++ "b/1/\346\272\220.cpp" @@ -0,0 +1,957 @@ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct filenm; +struct file1; +struct sentence; +struct node; +struct con; + +struct filenm +{ + string name; + filenm* next = NULL; + node* head = NULL; + +}; + + +struct file1 +{ + filenm* path = NULL; + sentence* head = NULL; + int num = 0; +}; + +struct sentence +{ + + string s1; + string s2; + int len1 = 0; + int len2 = 0; + sentence* next = NULL; +}; + + +struct node +{ + + int flags = 0; + file1* a_file = NULL; + file1* b_file = NULL; + string path1; + string path2; + con* head = NULL; + int num = 0; + node* next = NULL; + + double rate = 0; + + int code = 0; +}; + +struct con +{ + + sentence* a_sen = NULL; + sentence* b_sen = NULL; + double similar = 0; + int score = 0; + + con* next = NULL; +}; + +filenm* create_filenm(); +filenm* create_filenm(const string s); +filenm* findlast_filenm(filenm* head); +void create_filenm(filenm* head, const string s); +filenm* dir_Allfile(string path); +filenm* get_files(string path); +sentence* create_sentence(); +sentence* create_sentence(string s); +sentence* findlast_sentence(sentence* head); +void create_sentence(sentence* head, const string s); +file1* create_file1(); +file1* get_file(string path); +con* create_con(); +node* create_node(); + +bool is_exist(const char* s); +bool is_dir(const char* fileName); +bool istxt(string s); +string& replace_all(string& src, const string& old_value, const string& new_value); +void inil_sentence(sentence* s, int); +void inil_sentence(sentence* s); +int ex_dis(string q1, string q2); +int calc_dis(string s1, string s2); +double rate_dis(string s1, string s2, int score); +con* find_best(sentence* one, sentence* many, int); +con* find_best(sentence* head1, sentence* head2); +con* find_best(file1* a, file1* b); +void inil_node(node* n); +void inil_node(node* n, filenm* f1, filenm* f2); +void one_to_many(filenm* f1, filenm* head); +void many_to_many(filenm* head1, filenm* head2); +int get_length(con* m); +int get_length(sentence* s); +int get_length(filenm* f); +void repeat_rate(node* n); + +void print_percent(double d); +void print_filenm(filenm* f); +void print(const string& s); +void print_sentence1(sentence* f); +void print_sentence2(sentence* f); +void print(sentence* s, int flags); +void print(const char* s, double num); +void print(con* m, int); +void print(con* m); +void print(file1* a); +void print(file1* a, int); +void print(node* n); +void print(filenm* f); +void print_info(node* n); +void print_info(filenm* f); +bool print_info(filenm* f, int code); + +void impo02(); +void impo03(); + + + + +filenm* create_filenm() +{ + filenm* f = new filenm(); + if (f == NULL) + { + printf("创建失败!\n"); + return NULL; + } + else + { + f->name = ""; + f->next = NULL; + f->head = NULL; + return f; + } +} + + +filenm* create_filenm(const string s) +{ + filenm* f = create_filenm(); + f->name = s; + return f; +} + +filenm* findlast_filenm(filenm* head) +{ + if (head == NULL) + { + printf("有误\n"); + return NULL; + } + else + { + filenm* f1 = head; + while (f1->next != NULL) + { + f1 = f1->next; + } + return f1; + } +} + +void create_filenm(filenm* head, const string s) +{ + filenm* f = findlast_filenm(head); + filenm* f2 = create_filenm(s); + f->next = f2; +} + +filenm* dir_Allfile(string path) +{ + + filenm* f1 = create_filenm(); + intptr_t hFile = 0; + + struct _finddata_t fileinfo; + string p; + if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1) + { + do + { + string q1 = p.assign(path).append("\\").append(fileinfo.name); + if (istxt(q1)) + { + create_filenm(f1, q1); + } + } while (_findnext(hFile, &fileinfo) == 0); + _findclose(hFile); + } + return f1->next; +} + +filenm* get_files(string path) +{ + if (istxt(path)) + return create_filenm(path); + return dir_Allfile(path); +} + +sentence* create_sentence() +{ + sentence* f = new sentence(); + if (f == NULL) + { + printf("创建失败!\n"); + return NULL; + } + else + {; + f->s1 = ""; + f->s2 = ""; + f->len1 = 0; + f->len2 = 0; + f->next = NULL; + return f; + } +} + +sentence* create_sentence(string s) +{ + sentence* f = create_sentence(); + f->s1 = s; + return f; +} + +sentence* findlast_sentence(sentence* head) +{ + if (head == NULL) + { + printf("传入的sentence指针有误\n"); + return NULL; + } + else + { + sentence* f1 = head; + while (f1->next != NULL) + { + f1 = f1->next; + } + return f1; + } +} + + +void create_sentence(sentence* head, const string s) +{ + sentence* f = findlast_sentence(head); + sentence* f2 = create_sentence(s); + f->next = f2; +} + + +file1* create_file1() +{ + file1* f = new file1(); + if (f == NULL) + { + printf("创建失败!\n"); + return NULL; + } + else + { + f->head = NULL; + f->path = NULL; + f->num = 0; + return f; + } +} + + +file1* get_file(string path) +{ + + FILE* fp; + if ((fp = fopen(path.c_str(), "r")) == NULL) + { + printf("读取文件失败!\n"); + return NULL; + } + + file1* a = create_file1(); + sentence* sen = create_sentence(); + + int num = 0; + char ch = ' '; + string str = ""; + while (fp != NULL && (ch = fgetc(fp)) != EOF) + { + str.push_back(ch); + if (ch == ';') + { + create_sentence(sen, str); + num++; + str = ""; + } + } + create_sentence(sen, str); + num++; + + a->head = sen->next; + a->num = num; + a->path = create_filenm(); + a->path->name = path; + + if (fp != NULL) + { + fclose(fp); + } + + return a; +} + + +con* create_con() +{ + con* f = new con(); + if (f == NULL) + { + printf("创建失败!\n"); + return NULL; + } + else + { + f->a_sen = NULL; + f->b_sen = NULL; + f->next = NULL; + f->similar = 0; + return f; + } +} + + +node* create_node() +{ + node* f = new node(); + if (f == NULL) + { + printf("创建失败!\n"); + return NULL; + } + else + { + f->flags = 0; + f->a_file = NULL; + f->b_file = NULL; + f->head = NULL; + f->num = 0; + f->next = NULL; + return f; + } +} + + +bool is_exist(const char* s) +{ + if (!_access(s, 0)) + { + return true; + } + printf("%s不存在\n", s); + return false; +} + +bool is_dir(const char* fileName) +{ + struct stat buf; + int result; + result = stat(fileName, &buf); + if (S_IFDIR & buf.st_mode) { + + return true; + } + else + return false; +} + +bool istxt(string s) +{ + if (!is_exist(s.c_str())) + return false; + string::size_type idx; + idx = s.find(".txt"); + if (idx == string::npos) + { + return false; + } + else + { + return true; + } +} + + +string& replace_all(string& src, const string& old_value, const string& new_value) +{ + for (string::size_type pos(0); pos != string::npos; pos += new_value.length()) + { + if ((pos = src.find(old_value, pos)) != string::npos) { + src.replace(pos, old_value.length(), new_value); + } + else break; + } + return src; +} + + + +void inil_sentence(sentence* s, int) +{ + s->s2 = s->s1; + +} + +void inil_sentence(sentence* s) +{ + while (s != NULL) + { + inil_sentence(s, 1); + s = s->next; + } +} + +int ex_dis(string q1, string q2) +{ + int len1 = (int)q1.length(); + int len2 = (int)q2.length(); + const char* s1 = q1.c_str(); + const char* s2 = q2.c_str(); + int dp[60][60]; + + + for (int i = 0; i < 60; i++) + { + dp[i][0] = i; + dp[0][i] = i; + } + + + for (int i = 1; i <= len1; i++) + for (int j = 1; j <= len2; j++) + dp[i][j] = s1[i - 1] == s2[j - 1] ? dp[i - 1][j - 1] : min(dp[i - 1][j - 1], min(dp[i - 1][j], dp[i][j - 1])) + 1; + return dp[len1][len2]; +} + + +int calc_dis(string s1, string s2) +{ + + int len1 = (int)s1.length(); + int len2 = (int)s2.length(); + int x = 0; + int dis = 58; + int score = 0; + + while (x < len1 || x < len2) + { + + if (x >= len1) + { + score += len2 - len1; + break; + } + if (x >= len2) + { + score += len1 - len2; + break; + } + + + string s3(s1.substr(x, dis)); + string s4(s2.substr(x, dis)); + x += dis; + + score += ex_dis(s3, s4); + + } + return score; +} + +double rate_dis(string s1, string s2, int score) +{ + int len1 = (int)s1.length(); + int len2 = (int)s2.length(); + + double x1 = score * 1.0 / len1; + double x2 = score * 1.0 / len2; + return x1 < x2 ? x1 : x2; +} + +con* find_best(sentence* one, sentence* many, int) +{ + string s = one->s2; + + int score = calc_dis(s, many->s2); + double min_rate = rate_dis(s, many->s2, score); + sentence* best_sen = many; + many = many->next; + + while (many != NULL) + { + int new_score = calc_dis(s, many->s2); + double new_rate = rate_dis(s, many->s2, new_score); + if (new_rate < min_rate) + { + score = new_score; + min_rate = new_rate; + best_sen = many; + } + many = many->next; + } + con* m = create_con(); + m->a_sen = one; + m->b_sen = best_sen; + m->similar = min_rate; + m->score = score; + + return m; +} + + +con* find_best(sentence* head1, sentence* head2) +{ + con* m_head = create_con(); + con* m = m_head; + + while (head1 != NULL) + { + m->next = find_best(head1, head2, 1); + m = m->next; + head1 = head1->next; + + } + return m_head->next; +} + +con* find_best(file1* a, file1* b) +{ + return find_best(a->head, b->head); +} + + +void inil_node(node* n) +{ + n->a_file = get_file(n->path1); + n->b_file = get_file(n->path2); + + inil_sentence(n->a_file->head); + inil_sentence(n->b_file->head); + + n->head = find_best(n->a_file, n->b_file); + + + repeat_rate(n); + +} + +void inil_node(node* n, filenm* f1, filenm* f2) +{ + n->path1 = f1->name; + n->path2 = f2->name; + inil_node(n); +} + + +void one_to_many(filenm* f1, filenm* head) +{ + + node* n = create_node(); + inil_node(n, f1, head); + f1->head = n; + + filenm* f2 = head; + while (f2->next != NULL) + { + f2 = f2->next; + node* new_node = create_node(); + inil_node(new_node, f1, f2); + + n->next = new_node; + n = n->next; + + } + +} + +void many_to_many(filenm* head1, filenm* head2) +{ + + while (head1 != NULL) + { + one_to_many(head1, head2); + head1 = head1->next; + } +} + +int get_length(con* m) +{ + int l = 0; + while (m != NULL) + { + m = m->next; + l++; + } + + return l; +} + +int get_length(sentence* s) +{ + int num = 0; + while (s != NULL) + { + s = s->next; + num++; + } + return num; +} + +int get_length(filenm* f) +{ + int num = 0; + while (f != NULL) + { + f = f->next; + num++; + } + return num; +} + +void repeat_rate(node* n) +{ + int s1_num = get_length(n->a_file->head); + int s2_num = get_length(n->b_file->head); + + int m_num = get_length(n->head); + + n->rate = 1.0 * m_num / s1_num; + +} + + +void print_percent(double d) +{ + d *= 100; + printf("%.2f%%", d); +} +void print_filenm(filenm* f) +{ + + while (f != NULL) + { + printf("%s\n", f->name.c_str()); + f = f->next; + } + printf("\n"); +} +void print(const string& s) +{ + printf("%s\n", s.c_str()); +} +void print_sentence1(sentence* f) +{ + while (f != NULL) + { + printf("%s\n", f->s1.c_str()); + f = f->next; + } + printf("\n"); +} +void print_sentence2(sentence* f) +{ + while (f != NULL) + { + printf("%s\n", f->s2.c_str()); + f = f->next; + } + printf("\n"); +} +void print(sentence* s, int flags) +{ + if (flags == 1) + { + print(s->s1); + } + if (flags == 2) + { + print(s->s2); + } +} +void print(const char* s, double num) +{ + printf("%s%lf\n", s, num); +} +void print(con* m, int) +{ + print(m->a_sen, 1); + print(m->b_sen, 1); + int l1 = (int)m->a_sen->s2.length(); + int l2 = (int)m->b_sen->s2.length(); + print_percent(1 - m->similar); + printf("\n\n"); +} +void print(con* m) +{ + printf("\n\n"); + int num = 1; + while (m != NULL) + { + print(m, 1); + m = m->next; + + num++; + } +} +void print(file1* a) +{ + print_sentence1(a->head); + get_length(a->head); +} +void print(file1* a, int) +{ + print_sentence2(a->head); + get_length(a->head); +} +void print(node* n) +{ + if (n == NULL) + { + printf("这个文件是空的!\n"); + } + else + { + while (n) + { + printf("\n\n\n"); + print(n->a_file->path->name); + printf("和"); + print(n->b_file->path->name); + printf("\n"); + print(n->head); + + print(n->a_file->path->name); + print(n->b_file->path->name); + printf("\n\n\n"); + break; + + n = n->next; + } + } +} +void print(filenm* f) +{ + print(f->head); +} +void print_info(node* n) +{ + printf("%s\n%s\n", n->a_file->path->name.c_str(), n->b_file->path->name.c_str()); + printf("查重率为%2f\n", n->rate); +} +void print_info(filenm* f) +{ + int code = 0; + while (f != NULL) + { + node* n = f->head; + while (n != NULL) + { + n->code = code; + print_info(n); + + code++; + n = n->next; + } + f = f->next; + } +} +bool print_info(filenm* f, int code) +{ + while (f != NULL) + { + node* n = f->head; + while (n != NULL) + { + if (code == n->code) + { + print(n); + return true; + } + n = n->next; + } + f = f->next; + } + return false; +} + + + +void start(filenm* f1, filenm* f2) +{ + system("cls"); + many_to_many(f1, f2); + +loou: + system("cls"); + print_info(f1); + printf("按 0 退出\n"); + + while (true) + { + int code = 0; + cin >> code; + if (print_info(f1, code)) + { + system("pause"); + goto loou; + return; + } + else if (code == 0) + { + exit(0); + } + else + { + printf("您的输入不合法\n"); + } + } + +} + +void impo03() +{ + printf("\n\n"); + printf("\t请输入第一组文件路径\n"); + printf("\t请输入第二组文件路径\n"); + printf("\t按任意键继续"); + char c = _getch(); + impo02(); + + +} + +void impo02() +{ + printf("\n"); + printf("\t请输入第一组文件路径\n"); + printf("\t请输入第二组文件路径\n"); + printf("\t 按 0 退出\n"); + + string s1; + string s2; + filenm* f1; + filenm* f2; + int num1 = 0; + int num2 = 0; + + + while (true) + { + + printf("\n请输入第一组文件路径及名称:"); + cin >> s1; + if (s1 == "0") + { + exit(0); + } + + if (istxt(s1)) + { + f1 = get_files(s1); + printf("第一组文件为%s\n", s1.c_str()); + break; + } + else if (is_dir(s1.c_str())) + { + f1 = get_files(s1); + num1 = get_length(f1); + if (num1 == 0) + { + printf("此文件中.txt文件份数为0 请重新输入\n"); + } + else + { + printf("此文件中有 %d 份.txt文件\n", num1); + print_filenm(f1); + break; + } + } + } + while (true) + { + printf("\n请输入第二组文件路径及名称:"); + cin >> s2; + if (s2 == "0") + { + exit(0); + } + if (istxt(s2)) + { + f2 = get_files(s2); + printf("第2组文件为%s\n", s2.c_str()); + break; + } + else if (is_dir(s2.c_str())) + { + f2 = get_files(s2); + num2 = get_length(f2); + if (num2 == 0) + { + printf("此文件中.txt文件份数为0 请重新输入\n"); + } + else + { + printf("此文件中有 %d 份.txt文件\n", num2); + print_filenm(f2); + break; + } + } + } + system("pause"); + start(f1, f2); +} + + + + + +int main() +{ + + printf("\n\n\n\n\n"); + printf("\t\t\t\t\t\t查重系统\n\n\n"); + printf("\t\t\t\t\t\t 输入 1 启动程序\n"); + + int c = 0; + while (1) + { + c = _getch(); + if (c == '1') + { + impo02(); + break; + } + + } + +} -- GitLab