diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 74945fb4f2f745b6ca9c48adb0c8b9e6ae1e94a4..99b3065d8df80bd8f482546ef06f6305f5875b9e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -174,6 +174,11 @@ void ParallelExecutor::SplitTensorToPlaces( const std::unordered_map &feed_tensors) { for (auto it : feed_tensors) { auto lod_tensors = it.second.SplitLoDTensor(member_->places_); + PADDLE_ENFORCE_EQ( + member_->places_.size(), lod_tensors.size(), + "The number of samples of current batch is less than the count of " + "devices, currently, it is not allowed. (%d vs %d)", + member_->places_.size(), lod_tensors.size()); for (size_t j = 0; j < member_->places_.size(); ++j) { // TODO(panxy0718): Do I need to delete this var? member_->local_scopes_[j] diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index b93f2f974ca28cfd8d03c0dbbf1d401620a15e53..24dfa6144ae9584f1678e662716da123352430dd 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -87,7 +87,8 @@ class ParallelExecutor(object): # performance. Worth tunning for other models in the future. num_threads = len(self._places) else: - min(len(self._places) * 2, multiprocessing.cpu_count()) + num_threads = min( + len(self._places) * 2, multiprocessing.cpu_count()) main = main_program main = main if main else framework.default_main_program()