提交 55e584ee 编写于 作者: Black Kin@.com's avatar Black Kin@.com

查重

上级 94a84a4b
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <io.h>
#include <string>
#include <iostream>
#include <conio.h>
using namespace std;
struct filenm;
struct file1;
struct sentence;
struct node;
struct con;
struct filenm
{
string name;
filenm* next = NULL;
node* head = NULL;
};
struct file1
{
filenm* path = NULL;
sentence* head = NULL;
int num = 0;
};
struct sentence
{
string s1;
string s2;
int len1 = 0;
int len2 = 0;
sentence* next = NULL;
};
struct node
{
int flags = 0;
file1* a_file = NULL;
file1* b_file = NULL;
string path1;
string path2;
con* head = NULL;
int num = 0;
node* next = NULL;
double rate = 0;
int code = 0;
};
struct con
{
sentence* a_sen = NULL;
sentence* b_sen = NULL;
double similar = 0;
int score = 0;
con* next = NULL;
};
filenm* create_filenm();
filenm* create_filenm(const string s);
filenm* findlast_filenm(filenm* head);
void create_filenm(filenm* head, const string s);
filenm* dir_Allfile(string path);
filenm* get_files(string path);
sentence* create_sentence();
sentence* create_sentence(string s);
sentence* findlast_sentence(sentence* head);
void create_sentence(sentence* head, const string s);
file1* create_file1();
file1* get_file(string path);
con* create_con();
node* create_node();
bool is_exist(const char* s);
bool is_dir(const char* fileName);
bool istxt(string s);
string& replace_all(string& src, const string& old_value, const string& new_value);
void inil_sentence(sentence* s, int);
void inil_sentence(sentence* s);
int ex_dis(string q1, string q2);
int calc_dis(string s1, string s2);
double rate_dis(string s1, string s2, int score);
con* find_best(sentence* one, sentence* many, int);
con* find_best(sentence* head1, sentence* head2);
con* find_best(file1* a, file1* b);
void inil_node(node* n);
void inil_node(node* n, filenm* f1, filenm* f2);
void one_to_many(filenm* f1, filenm* head);
void many_to_many(filenm* head1, filenm* head2);
int get_length(con* m);
int get_length(sentence* s);
int get_length(filenm* f);
void repeat_rate(node* n);
void print_percent(double d);
void print_filenm(filenm* f);
void print(const string& s);
void print_sentence1(sentence* f);
void print_sentence2(sentence* f);
void print(sentence* s, int flags);
void print(const char* s, double num);
void print(con* m, int);
void print(con* m);
void print(file1* a);
void print(file1* a, int);
void print(node* n);
void print(filenm* f);
void print_info(node* n);
void print_info(filenm* f);
bool print_info(filenm* f, int code);
void impo02();
void impo03();
filenm* create_filenm()
{
filenm* f = new filenm();
if (f == NULL)
{
printf("创建失败!\n");
return NULL;
}
else
{
f->name = "";
f->next = NULL;
f->head = NULL;
return f;
}
}
filenm* create_filenm(const string s)
{
filenm* f = create_filenm();
f->name = s;
return f;
}
filenm* findlast_filenm(filenm* head)
{
if (head == NULL)
{
printf("有误\n");
return NULL;
}
else
{
filenm* f1 = head;
while (f1->next != NULL)
{
f1 = f1->next;
}
return f1;
}
}
void create_filenm(filenm* head, const string s)
{
filenm* f = findlast_filenm(head);
filenm* f2 = create_filenm(s);
f->next = f2;
}
filenm* dir_Allfile(string path)
{
filenm* f1 = create_filenm();
intptr_t hFile = 0;
struct _finddata_t fileinfo;
string p;
if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)
{
do
{
string q1 = p.assign(path).append("\\").append(fileinfo.name);
if (istxt(q1))
{
create_filenm(f1, q1);
}
} while (_findnext(hFile, &fileinfo) == 0);
_findclose(hFile);
}
return f1->next;
}
filenm* get_files(string path)
{
if (istxt(path))
return create_filenm(path);
return dir_Allfile(path);
}
sentence* create_sentence()
{
sentence* f = new sentence();
if (f == NULL)
{
printf("创建失败!\n");
return NULL;
}
else
{;
f->s1 = "";
f->s2 = "";
f->len1 = 0;
f->len2 = 0;
f->next = NULL;
return f;
}
}
sentence* create_sentence(string s)
{
sentence* f = create_sentence();
f->s1 = s;
return f;
}
sentence* findlast_sentence(sentence* head)
{
if (head == NULL)
{
printf("传入的sentence指针有误\n");
return NULL;
}
else
{
sentence* f1 = head;
while (f1->next != NULL)
{
f1 = f1->next;
}
return f1;
}
}
void create_sentence(sentence* head, const string s)
{
sentence* f = findlast_sentence(head);
sentence* f2 = create_sentence(s);
f->next = f2;
}
file1* create_file1()
{
file1* f = new file1();
if (f == NULL)
{
printf("创建失败!\n");
return NULL;
}
else
{
f->head = NULL;
f->path = NULL;
f->num = 0;
return f;
}
}
file1* get_file(string path)
{
FILE* fp;
if ((fp = fopen(path.c_str(), "r")) == NULL)
{
printf("读取文件失败!\n");
return NULL;
}
file1* a = create_file1();
sentence* sen = create_sentence();
int num = 0;
char ch = ' ';
string str = "";
while (fp != NULL && (ch = fgetc(fp)) != EOF)
{
str.push_back(ch);
if (ch == ';')
{
create_sentence(sen, str);
num++;
str = "";
}
}
create_sentence(sen, str);
num++;
a->head = sen->next;
a->num = num;
a->path = create_filenm();
a->path->name = path;
if (fp != NULL)
{
fclose(fp);
}
return a;
}
con* create_con()
{
con* f = new con();
if (f == NULL)
{
printf("创建失败!\n");
return NULL;
}
else
{
f->a_sen = NULL;
f->b_sen = NULL;
f->next = NULL;
f->similar = 0;
return f;
}
}
node* create_node()
{
node* f = new node();
if (f == NULL)
{
printf("创建失败!\n");
return NULL;
}
else
{
f->flags = 0;
f->a_file = NULL;
f->b_file = NULL;
f->head = NULL;
f->num = 0;
f->next = NULL;
return f;
}
}
bool is_exist(const char* s)
{
if (!_access(s, 0))
{
return true;
}
printf("%s不存在\n", s);
return false;
}
bool is_dir(const char* fileName)
{
struct stat buf;
int result;
result = stat(fileName, &buf);
if (S_IFDIR & buf.st_mode) {
return true;
}
else
return false;
}
bool istxt(string s)
{
if (!is_exist(s.c_str()))
return false;
string::size_type idx;
idx = s.find(".txt");
if (idx == string::npos)
{
return false;
}
else
{
return true;
}
}
string& replace_all(string& src, const string& old_value, const string& new_value)
{
for (string::size_type pos(0); pos != string::npos; pos += new_value.length())
{
if ((pos = src.find(old_value, pos)) != string::npos) {
src.replace(pos, old_value.length(), new_value);
}
else break;
}
return src;
}
void inil_sentence(sentence* s, int)
{
s->s2 = s->s1;
}
void inil_sentence(sentence* s)
{
while (s != NULL)
{
inil_sentence(s, 1);
s = s->next;
}
}
int ex_dis(string q1, string q2)
{
int len1 = (int)q1.length();
int len2 = (int)q2.length();
const char* s1 = q1.c_str();
const char* s2 = q2.c_str();
int dp[60][60];
for (int i = 0; i < 60; i++)
{
dp[i][0] = i;
dp[0][i] = i;
}
for (int i = 1; i <= len1; i++)
for (int j = 1; j <= len2; j++)
dp[i][j] = s1[i - 1] == s2[j - 1] ? dp[i - 1][j - 1] : min(dp[i - 1][j - 1], min(dp[i - 1][j], dp[i][j - 1])) + 1;
return dp[len1][len2];
}
int calc_dis(string s1, string s2)
{
int len1 = (int)s1.length();
int len2 = (int)s2.length();
int x = 0;
int dis = 58;
int score = 0;
while (x < len1 || x < len2)
{
if (x >= len1)
{
score += len2 - len1;
break;
}
if (x >= len2)
{
score += len1 - len2;
break;
}
string s3(s1.substr(x, dis));
string s4(s2.substr(x, dis));
x += dis;
score += ex_dis(s3, s4);
}
return score;
}
double rate_dis(string s1, string s2, int score)
{
int len1 = (int)s1.length();
int len2 = (int)s2.length();
double x1 = score * 1.0 / len1;
double x2 = score * 1.0 / len2;
return x1 < x2 ? x1 : x2;
}
con* find_best(sentence* one, sentence* many, int)
{
string s = one->s2;
int score = calc_dis(s, many->s2);
double min_rate = rate_dis(s, many->s2, score);
sentence* best_sen = many;
many = many->next;
while (many != NULL)
{
int new_score = calc_dis(s, many->s2);
double new_rate = rate_dis(s, many->s2, new_score);
if (new_rate < min_rate)
{
score = new_score;
min_rate = new_rate;
best_sen = many;
}
many = many->next;
}
con* m = create_con();
m->a_sen = one;
m->b_sen = best_sen;
m->similar = min_rate;
m->score = score;
return m;
}
con* find_best(sentence* head1, sentence* head2)
{
con* m_head = create_con();
con* m = m_head;
while (head1 != NULL)
{
m->next = find_best(head1, head2, 1);
m = m->next;
head1 = head1->next;
}
return m_head->next;
}
con* find_best(file1* a, file1* b)
{
return find_best(a->head, b->head);
}
void inil_node(node* n)
{
n->a_file = get_file(n->path1);
n->b_file = get_file(n->path2);
inil_sentence(n->a_file->head);
inil_sentence(n->b_file->head);
n->head = find_best(n->a_file, n->b_file);
repeat_rate(n);
}
void inil_node(node* n, filenm* f1, filenm* f2)
{
n->path1 = f1->name;
n->path2 = f2->name;
inil_node(n);
}
void one_to_many(filenm* f1, filenm* head)
{
node* n = create_node();
inil_node(n, f1, head);
f1->head = n;
filenm* f2 = head;
while (f2->next != NULL)
{
f2 = f2->next;
node* new_node = create_node();
inil_node(new_node, f1, f2);
n->next = new_node;
n = n->next;
}
}
void many_to_many(filenm* head1, filenm* head2)
{
while (head1 != NULL)
{
one_to_many(head1, head2);
head1 = head1->next;
}
}
int get_length(con* m)
{
int l = 0;
while (m != NULL)
{
m = m->next;
l++;
}
return l;
}
int get_length(sentence* s)
{
int num = 0;
while (s != NULL)
{
s = s->next;
num++;
}
return num;
}
int get_length(filenm* f)
{
int num = 0;
while (f != NULL)
{
f = f->next;
num++;
}
return num;
}
void repeat_rate(node* n)
{
int s1_num = get_length(n->a_file->head);
int s2_num = get_length(n->b_file->head);
int m_num = get_length(n->head);
n->rate = 1.0 * m_num / s1_num;
}
void print_percent(double d)
{
d *= 100;
printf("%.2f%%", d);
}
void print_filenm(filenm* f)
{
while (f != NULL)
{
printf("%s\n", f->name.c_str());
f = f->next;
}
printf("\n");
}
void print(const string& s)
{
printf("%s\n", s.c_str());
}
void print_sentence1(sentence* f)
{
while (f != NULL)
{
printf("%s\n", f->s1.c_str());
f = f->next;
}
printf("\n");
}
void print_sentence2(sentence* f)
{
while (f != NULL)
{
printf("%s\n", f->s2.c_str());
f = f->next;
}
printf("\n");
}
void print(sentence* s, int flags)
{
if (flags == 1)
{
print(s->s1);
}
if (flags == 2)
{
print(s->s2);
}
}
void print(const char* s, double num)
{
printf("%s%lf\n", s, num);
}
void print(con* m, int)
{
print(m->a_sen, 1);
print(m->b_sen, 1);
int l1 = (int)m->a_sen->s2.length();
int l2 = (int)m->b_sen->s2.length();
print_percent(1 - m->similar);
printf("\n\n");
}
void print(con* m)
{
printf("\n\n");
int num = 1;
while (m != NULL)
{
print(m, 1);
m = m->next;
num++;
}
}
void print(file1* a)
{
print_sentence1(a->head);
get_length(a->head);
}
void print(file1* a, int)
{
print_sentence2(a->head);
get_length(a->head);
}
void print(node* n)
{
if (n == NULL)
{
printf("这个文件是空的!\n");
}
else
{
while (n)
{
printf("\n\n\n");
print(n->a_file->path->name);
printf("和");
print(n->b_file->path->name);
printf("\n");
print(n->head);
print(n->a_file->path->name);
print(n->b_file->path->name);
printf("\n\n\n");
break;
n = n->next;
}
}
}
void print(filenm* f)
{
print(f->head);
}
void print_info(node* n)
{
printf("%s\n%s\n", n->a_file->path->name.c_str(), n->b_file->path->name.c_str());
printf("查重率为%2f\n", n->rate);
}
void print_info(filenm* f)
{
int code = 0;
while (f != NULL)
{
node* n = f->head;
while (n != NULL)
{
n->code = code;
print_info(n);
code++;
n = n->next;
}
f = f->next;
}
}
bool print_info(filenm* f, int code)
{
while (f != NULL)
{
node* n = f->head;
while (n != NULL)
{
if (code == n->code)
{
print(n);
return true;
}
n = n->next;
}
f = f->next;
}
return false;
}
void start(filenm* f1, filenm* f2)
{
system("cls");
many_to_many(f1, f2);
loou:
system("cls");
print_info(f1);
printf("按 0 退出\n");
while (true)
{
int code = 0;
cin >> code;
if (print_info(f1, code))
{
system("pause");
goto loou;
return;
}
else if (code == 0)
{
exit(0);
}
else
{
printf("您的输入不合法\n");
}
}
}
void impo03()
{
printf("\n\n");
printf("\t请输入第一组文件路径\n");
printf("\t请输入第二组文件路径\n");
printf("\t按任意键继续");
char c = _getch();
impo02();
}
void impo02()
{
printf("\n");
printf("\t请输入第一组文件路径\n");
printf("\t请输入第二组文件路径\n");
printf("\t 按 0 退出\n");
string s1;
string s2;
filenm* f1;
filenm* f2;
int num1 = 0;
int num2 = 0;
while (true)
{
printf("\n请输入第一组文件路径及名称:");
cin >> s1;
if (s1 == "0")
{
exit(0);
}
if (istxt(s1))
{
f1 = get_files(s1);
printf("第一组文件为%s\n", s1.c_str());
break;
}
else if (is_dir(s1.c_str()))
{
f1 = get_files(s1);
num1 = get_length(f1);
if (num1 == 0)
{
printf("此文件中.txt文件份数为0 请重新输入\n");
}
else
{
printf("此文件中有 %d 份.txt文件\n", num1);
print_filenm(f1);
break;
}
}
}
while (true)
{
printf("\n请输入第二组文件路径及名称:");
cin >> s2;
if (s2 == "0")
{
exit(0);
}
if (istxt(s2))
{
f2 = get_files(s2);
printf("第2组文件为%s\n", s2.c_str());
break;
}
else if (is_dir(s2.c_str()))
{
f2 = get_files(s2);
num2 = get_length(f2);
if (num2 == 0)
{
printf("此文件中.txt文件份数为0 请重新输入\n");
}
else
{
printf("此文件中有 %d 份.txt文件\n", num2);
print_filenm(f2);
break;
}
}
}
system("pause");
start(f1, f2);
}
int main()
{
printf("\n\n\n\n\n");
printf("\t\t\t\t\t\t查重系统\n\n\n");
printf("\t\t\t\t\t\t 输入 1 启动程序\n");
int c = 0;
while (1)
{
c = _getch();
if (c == '1')
{
impo02();
break;
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册