make unit test to work

3c5bbf42 · chengduoZH · e39adc86 · 3c5bbf42 · 3c5bbf42 · 3c5bbf42
5 changed file
--- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
@@ -14,7 +14,7 @@
 #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
 #include <algorithm>
-#include "paddle/fluid/framework/details/reduce_util.h"
+#include "paddle/fluid/framework/details/reduce_and_gather.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -121,7 +121,7 @@ void ReduceOpHandle::RunImpl() {
        auto &p = in_places[i];
        auto &lod_tensor = lod_tensors[i];
        int dev_id = boost::get<platform::CUDAPlace>(p).device;
-        auto &nccl_ctx = nccl_ctxs_.at(dev_id);
+        auto &nccl_ctx = nccl_ctxs_->at(dev_id);
        auto stream = nccl_ctx.stream();
        auto comm = nccl_ctx.comm_;

--- a/paddle/fluid/framework/details/reduce_op_handle.h
+++ b/paddle/fluid/framework/details/reduce_op_handle.h
@@ -34,13 +34,15 @@ struct ReduceOpHandle : public OpHandleBase {
  const std::vector<platform::Place> &places_;
 #ifdef PADDLE_WITH_CUDA
-  const platform::NCCLContextMap &nccl_ctxs_;
+  const platform::NCCLContextMap *nccl_ctxs_;
  ReduceOpHandle(const std::vector<Scope *> &local_scopes,
                 const std::vector<platform::Place> &places,
-                 const platform::NCCLContextMap &nccl_ctxs)
+                 const platform::NCCLContextMap *nccl_ctxs)
      : local_scopes_(local_scopes), places_(places), nccl_ctxs_(nccl_ctxs) {
-    for (auto &p_ctx : nccl_ctxs_.contexts_) {
+    if (nccl_ctxs_) {
-      dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
+      for (auto &p_ctx : nccl_ctxs_->contexts_) {
+        dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
+      }
    }
  }
 #else

--- a/paddle/fluid/framework/details/reduce_op_handle_test.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc
@@ -44,7 +44,9 @@ struct TestReduceOpHandle {
      ctxs_[j]->Wait();
    }
 #ifdef PADDLE_WITH_CUDA
-    nccl_ctxs_->WaitAll();
+    if (nccl_ctxs_) {
+      nccl_ctxs_->WaitAll();
+    }
 #endif
  }
@@ -64,6 +66,7 @@ struct TestReduceOpHandle {
        gpu_list_.push_back(p);
        ctxs_.emplace_back(new p::CUDADeviceContext(p));
      }
+      nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
 #else
      PADDLE_THROW("CUDA is not support.");
 #endif
@@ -74,10 +77,10 @@ struct TestReduceOpHandle {
        gpu_list_.push_back(p);
        ctxs_.emplace_back(new p::CPUDeviceContext(p));
      }
-    }
 #ifdef PADDLE_WITH_CUDA
-    nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
+      nccl_ctxs_.reset(nullptr);
 #endif
+    }
  }
  void InitReduceOp(size_t input_scope_idx) {
@@ -87,15 +90,27 @@ struct TestReduceOpHandle {
    }
    local_scopes_[input_scope_idx]->Var("input");
+    if (use_gpu_) {
+#ifdef PADDLE_WITH_CUDA
+      op_handle_.reset(
+          new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
+#else
+      PADDLE_THROW("CUDA is not support.");
+#endif
+    } else {
 #ifdef PADDLE_WITH_CUDA
-    op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_, *nccl_ctxs_));
+      op_handle_.reset(
+          new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
 #else
-    op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
+      op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
 #endif
+    }
    // add input
    for (size_t j = 0; j < gpu_list_.size(); ++j) {
-      op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
+      if (!use_gpu_) {
+        op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
+      }
      vars_.emplace_back(new VarHandle());
      VarHandle *in_var_handle = static_cast<VarHandle *>(vars_.back().get());
      in_var_handle->place_ = gpu_list_[j];
@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
  test_op.InitReduceOp(input_scope_idx);
  test_op.TestReduceSelectedRows(input_scope_idx);
 }
+TEST(ReduceTester, TestCPUReduceTestLodTensor) {
+  TestReduceOpHandle test_op;
+  size_t input_scope_idx = 0;
+  test_op.InitCtxOnGpu(false);
+  test_op.InitReduceOp(input_scope_idx);
+  test_op.TestReduceLodTensors(input_scope_idx);
+}
+#ifdef PADDLE_WITH_CUDA
-// #ifdef PADDLE_WITH_CUDA
+TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
-//
+  TestReduceOpHandle test_op;
-// TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
+  size_t input_scope_idx = 0;
-//  TestReduceOpHandle test_op;
+  test_op.InitCtxOnGpu(true);
-//  size_t input_scope_idx = 0;
+  test_op.InitReduceOp(input_scope_idx);
-//  test_op.InitCtxOnGpu(true);
+  test_op.TestReduceSelectedRows(input_scope_idx);
-//  test_op.InitReduceOp(input_scope_idx);
+}
-//  test_op.TestReduceSelectedRows(input_scope_idx);
-// }
+TEST(ReduceTester, TestGPUReduceTestLodTensor) {
-//
+  TestReduceOpHandle test_op;
-// TEST(ReduceTester, TestCPUReduceTestLodTensor) {
+  size_t input_scope_idx = 0;
-//  TestReduceOpHandle test_op;
+  test_op.InitCtxOnGpu(true);
-//  size_t input_scope_idx = 0;
+  test_op.InitReduceOp(input_scope_idx);
-//  test_op.InitCtxOnGpu(true);
+  test_op.TestReduceLodTensors(input_scope_idx);
-//  test_op.InitReduceOp(input_scope_idx);
+}
-//  test_op.TestReduceLodTensors(input_scope_idx);
+#endif
-// }
-// #endif
 }  // namespace details
 }  // namespace framework

--- a/paddle/fluid/framework/details/reduce_util.h
+++ b/paddle/fluid/framework/details/reduce_util.h
-//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <algorithm>
-#include <vector>
-#include "paddle/fluid/framework/details/reduce_util.h"
-namespace paddle {
-namespace framework {
-namespace details {
-struct ReduceLoDTensor {
-  const std::vector<LoDTensor> &src_tensors_;
-  LoDTensor &dst_tensor_;
-  ReduceLoDTensor(const std::vector<LoDTensor> &src, LoDTensor *dst)
-      : src_tensors_(src), dst_tensor_(*dst) {}
-  template <typename T>
-  void operator()() const {
-    PADDLE_ENFORCE(!src_tensors_.empty());
-    auto &t0 = src_tensors_[0];
-    PADDLE_ENFORCE_NE(t0.numel(), 0);
-    dst_tensor_.Resize(t0.dims());
-    T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
-    std::copy(t0.data<T>(), t0.data<T>() + t0.numel(), dst);
-    for (size_t i = 1; i < src_tensors_.size(); ++i) {
-      auto &t = src_tensors_[i];
-      PADDLE_ENFORCE_EQ(t.dims(), t0.dims());
-      PADDLE_ENFORCE_EQ(t.type(), t0.type());
-      std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,
-                     [](T a, T b) -> T { return a + b; });
-    }
-  }
-};
-}  // namespace details
-}  // namespace framework
-}  // namespace paddle