diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 5fb141f3c1e4e58dbae19a846fbbb8a0be958b4d..291d8ffc3c3334c2836e1651a8997984bba084e1 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -259,6 +259,14 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { return false; } } + // It may be added '\t' character to the end of the output of reduce + // task when processes data by Hadoop(when the output of the reduce + // task of Hadoop has only one field, it will add a '\t' at the end + // of the line by default, and you can use this option to avoid it: + // `-D mapred.textoutputformat.ignoreseparator=true`), which does + // not affect the correctness of the data. Therefore, it should be + // judged that the data is not normal when the end of each line of + // data contains characters which are not spaces. while (endptr - str != len) { if (!isspace(*(endptr++))) { VLOG(0)