提交 d8910876 编写于 作者: B barrierye

update CheckFile function in data_feed for ignore the space at the end of each...

update CheckFile function in data_feed for ignore the space at the end of each line of data(for example, it may be added '\t' character to the end of the reduce task output when processes data by hadoop, which does not affect the correctness of the data). test=develop
上级 bc34d325
...@@ -200,22 +200,22 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { ...@@ -200,22 +200,22 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) {
for (size_t i = 0; i < all_slots_.size(); ++i) { for (size_t i = 0; i < all_slots_.size(); ++i) {
int num = strtol(endptr, &endptr, 10); int num = strtol(endptr, &endptr, 10);
if (num < 0) { if (num < 0) {
VLOG(1) << "error: the number of ids is a negative number: " << num; VLOG(0) << "error: the number of ids is a negative number: " << num;
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} else if (num == 0) { } else if (num == 0) {
VLOG(1) VLOG(0)
<< "error: the number of ids can not be zero, you need " << "error: the number of ids can not be zero, you need "
"padding it in data generator; or if there is something wrong" "padding it in data generator; or if there is something wrong"
" with the data, please check if the data contains unresolvable " " with the data, please check if the data contains unresolvable "
"characters."; "characters.";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} else if (errno == ERANGE || num > INT_MAX) { } else if (errno == ERANGE || num > INT_MAX) {
VLOG(1) << "error: the number of ids greater than INT_MAX"; VLOG(0) << "error: the number of ids greater than INT_MAX";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
...@@ -223,15 +223,15 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { ...@@ -223,15 +223,15 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) {
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
strtof(endptr, &endptr); strtof(endptr, &endptr);
if (errno == ERANGE) { if (errno == ERANGE) {
VLOG(1) << "error: the value is out of the range of " VLOG(0) << "error: the value is out of the range of "
"representable values for float"; "representable values for float";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
if (i + 1 != num && endptr - str == len) { if (i + 1 != num && endptr - str == len) {
VLOG(1) << "error: there is a wrong with the number of ids."; VLOG(0) << "error: there is a wrong with the number of ids.";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
...@@ -240,32 +240,35 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { ...@@ -240,32 +240,35 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) {
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
strtoull(endptr, &endptr, 10); strtoull(endptr, &endptr, 10);
if (errno == ERANGE) { if (errno == ERANGE) {
VLOG(1) << "error: the value is out of the range of " VLOG(0) << "error: the value is out of the range of "
"representable values for uint64_t"; "representable values for uint64_t";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
if (i + 1 != num && endptr - str == len) { if (i + 1 != num && endptr - str == len) {
VLOG(1) << "error: there is a wrong with the number of ids."; VLOG(0) << "error: there is a wrong with the number of ids.";
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
} }
} else { } else {
VLOG(1) << "error: this type<" << all_slots_type_[i] VLOG(0) << "error: this type<" << all_slots_type_[i]
<< "> is not supported"; << "> is not supported";
return false; return false;
} }
} }
if (endptr - str != len) { while (endptr - str != len) {
VLOG(1) << "error: there is some data at the end of the line."; if (!isspace(*(endptr++))) {
VLOG(1) << "please check line<" << instance_cout << "> in file<" VLOG(0)
<< "error: there is some extra characters at the end of the line.";
VLOG(0) << "please check line<" << instance_cout << "> in file<"
<< filename << ">"; << filename << ">";
return false; return false;
} }
} }
}
VLOG(3) << "instances cout: " << instance_cout; VLOG(3) << "instances cout: " << instance_cout;
VLOG(3) << "The file format is correct"; VLOG(3) << "The file format is correct";
return true; return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册