From d62a3dd72d847d7d37031ae6560caca6a020c15c Mon Sep 17 00:00:00 2001 From: barrierye Date: Sun, 2 Dec 2018 21:28:33 +0800 Subject: [PATCH] add the comment for CheckFile function. test=develop --- paddle/fluid/framework/data_feed.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 5fb141f3c..ae52b5dfc 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -259,6 +259,13 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { return false; } } + // It may be added '\t' character to the end of the output of reduce + // task when processes data by Hadoop(when the output of the reduce + // task of Hadoop has only one field, it will add a '\t' at the end + // of the line by default), which does not affect the correctness of + // the data. Therefore, it should be judged that the data is not + // normal when the end of each line of data contains characters + // which are not spaces. while (endptr - str != len) { if (!isspace(*(endptr++))) { VLOG(0) -- GitLab