Merge pull request #9780 from chengduoZH/feature/fix_batch_size_is_littler_than_gpu_count

Crash training, if the number of samples is less than the count of devices.

Merge pull request #9780 from chengduoZH/feature/fix_batch_size_is_littler_than_gpu_count
Crash training, if the number of samples is less than the count of devices.
e0babe7c · chengduo · GitHub · b1224da8 · 7e7611d0 · e0babe7c
隐藏空白更改
内联并排

Showing with 7 addition and 1 deletion

paddle/fluid/framework/parallel_executor.cc paddle/fluid/framework/parallel_executor.cc +5 -0

python/paddle/fluid/parallel_executor.py python/paddle/fluid/parallel_executor.py +2 -1

未找到文件。
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -174,6 +174,11 @@ void ParallelExecutor::SplitTensorToPlaces(
    const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
  for (auto it : feed_tensors) {
    auto lod_tensors = it.second.SplitLoDTensor(member_->places_);
+    PADDLE_ENFORCE_EQ(
+        member_->places_.size(), lod_tensors.size(),
+        "The number of samples of current batch is less than the count of "
+        "devices, currently, it is not allowed. (%d vs %d)",
+        member_->places_.size(), lod_tensors.size());
    for (size_t j = 0; j < member_->places_.size(); ++j) {
      // TODO(panxy0718): Do I need to delete this var?
      member_->local_scopes_[j]

--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -87,7 +87,8 @@ class ParallelExecutor(object):
                # performance. Worth tunning for other models in the future.
                num_threads = len(self._places)
            else:
-                min(len(self._places) * 2, multiprocessing.cpu_count())
+                num_threads = min(
+                    len(self._places) * 2, multiprocessing.cpu_count())
        main = main_program
        main = main if main else framework.default_main_program()