From d89108766c315c2387e60db208ad842c76fa3313 Mon Sep 17 00:00:00 2001 From: barrierye Date: Sun, 2 Dec 2018 14:22:59 +0800 Subject: [PATCH] update CheckFile function in data_feed for ignore the space at the end of each line of data(for example, it may be added '\t' character to the end of the reduce task output when processes data by hadoop, which does not affect the correctness of the data). test=develop --- paddle/fluid/framework/data_feed.cc | 43 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 851c7eda8..5fb141f3c 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -200,22 +200,22 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { for (size_t i = 0; i < all_slots_.size(); ++i) { int num = strtol(endptr, &endptr, 10); if (num < 0) { - VLOG(1) << "error: the number of ids is a negative number: " << num; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "error: the number of ids is a negative number: " << num; + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } else if (num == 0) { - VLOG(1) + VLOG(0) << "error: the number of ids can not be zero, you need " "padding it in data generator; or if there is something wrong" " with the data, please check if the data contains unresolvable " "characters."; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } else if (errno == ERANGE || num > INT_MAX) { - VLOG(1) << "error: the number of ids greater than INT_MAX"; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "error: the number of ids greater than INT_MAX"; + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } @@ -223,15 +223,15 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { for (int i = 0; i < num; ++i) { strtof(endptr, &endptr); if (errno == ERANGE) { - VLOG(1) << "error: the value is out of the range of " + VLOG(0) << "error: the value is out of the range of " "representable values for float"; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } if (i + 1 != num && endptr - str == len) { - VLOG(1) << "error: there is a wrong with the number of ids."; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "error: there is a wrong with the number of ids."; + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } @@ -240,30 +240,33 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { for (int i = 0; i < num; ++i) { strtoull(endptr, &endptr, 10); if (errno == ERANGE) { - VLOG(1) << "error: the value is out of the range of " + VLOG(0) << "error: the value is out of the range of " "representable values for uint64_t"; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } if (i + 1 != num && endptr - str == len) { - VLOG(1) << "error: there is a wrong with the number of ids."; - VLOG(1) << "please check line<" << instance_cout << "> in file<" + VLOG(0) << "error: there is a wrong with the number of ids."; + VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } } } else { - VLOG(1) << "error: this type<" << all_slots_type_[i] + VLOG(0) << "error: this type<" << all_slots_type_[i] << "> is not supported"; return false; } } - if (endptr - str != len) { - VLOG(1) << "error: there is some data at the end of the line."; - VLOG(1) << "please check line<" << instance_cout << "> in file<" - << filename << ">"; - return false; + while (endptr - str != len) { + if (!isspace(*(endptr++))) { + VLOG(0) + << "error: there is some extra characters at the end of the line."; + VLOG(0) << "please check line<" << instance_cout << "> in file<" + << filename << ">"; + return false; + } } } VLOG(3) << "instances cout: " << instance_cout; -- GitLab