diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index ae52b5dfcae8bba9b4d9d5adeea9aa2a5273ff84..291d8ffc3c3334c2836e1651a8997984bba084e1 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -262,10 +262,11 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { // It may be added '\t' character to the end of the output of reduce // task when processes data by Hadoop(when the output of the reduce // task of Hadoop has only one field, it will add a '\t' at the end - // of the line by default), which does not affect the correctness of - // the data. Therefore, it should be judged that the data is not - // normal when the end of each line of data contains characters - // which are not spaces. + // of the line by default, and you can use this option to avoid it: + // `-D mapred.textoutputformat.ignoreseparator=true`), which does + // not affect the correctness of the data. Therefore, it should be + // judged that the data is not normal when the end of each line of + // data contains characters which are not spaces. while (endptr - str != len) { if (!isspace(*(endptr++))) { VLOG(0)