提交 3c5bbf42 编写于 作者: C chengduoZH

make unit test to work

上级 e39adc86
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
#include <algorithm> #include <algorithm>
#include "paddle/fluid/framework/details/reduce_util.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -121,7 +121,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -121,7 +121,7 @@ void ReduceOpHandle::RunImpl() {
auto &p = in_places[i]; auto &p = in_places[i];
auto &lod_tensor = lod_tensors[i]; auto &lod_tensor = lod_tensors[i];
int dev_id = boost::get<platform::CUDAPlace>(p).device; int dev_id = boost::get<platform::CUDAPlace>(p).device;
auto &nccl_ctx = nccl_ctxs_.at(dev_id); auto &nccl_ctx = nccl_ctxs_->at(dev_id);
auto stream = nccl_ctx.stream(); auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_; auto comm = nccl_ctx.comm_;
......
...@@ -34,13 +34,15 @@ struct ReduceOpHandle : public OpHandleBase { ...@@ -34,13 +34,15 @@ struct ReduceOpHandle : public OpHandleBase {
const std::vector<platform::Place> &places_; const std::vector<platform::Place> &places_;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
const platform::NCCLContextMap &nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
ReduceOpHandle(const std::vector<Scope *> &local_scopes, ReduceOpHandle(const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const platform::NCCLContextMap &nccl_ctxs) const platform::NCCLContextMap *nccl_ctxs)
: local_scopes_(local_scopes), places_(places), nccl_ctxs_(nccl_ctxs) { : local_scopes_(local_scopes), places_(places), nccl_ctxs_(nccl_ctxs) {
for (auto &p_ctx : nccl_ctxs_.contexts_) { if (nccl_ctxs_) {
dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get(); for (auto &p_ctx : nccl_ctxs_->contexts_) {
dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
}
} }
} }
#else #else
......
...@@ -44,7 +44,9 @@ struct TestReduceOpHandle { ...@@ -44,7 +44,9 @@ struct TestReduceOpHandle {
ctxs_[j]->Wait(); ctxs_[j]->Wait();
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
nccl_ctxs_->WaitAll(); if (nccl_ctxs_) {
nccl_ctxs_->WaitAll();
}
#endif #endif
} }
...@@ -64,6 +66,7 @@ struct TestReduceOpHandle { ...@@ -64,6 +66,7 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p); gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CUDADeviceContext(p)); ctxs_.emplace_back(new p::CUDADeviceContext(p));
} }
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW("CUDA is not support.");
#endif #endif
...@@ -74,10 +77,10 @@ struct TestReduceOpHandle { ...@@ -74,10 +77,10 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p); gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new p::CPUDeviceContext(p));
} }
}
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_)); nccl_ctxs_.reset(nullptr);
#endif #endif
}
} }
void InitReduceOp(size_t input_scope_idx) { void InitReduceOp(size_t input_scope_idx) {
...@@ -87,15 +90,27 @@ struct TestReduceOpHandle { ...@@ -87,15 +90,27 @@ struct TestReduceOpHandle {
} }
local_scopes_[input_scope_idx]->Var("input"); local_scopes_[input_scope_idx]->Var("input");
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
op_handle_.reset(
new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
#else
PADDLE_THROW("CUDA is not support.");
#endif
} else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_, *nccl_ctxs_)); op_handle_.reset(
new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
#else #else
op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_)); op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
#endif #endif
}
// add input // add input
for (size_t j = 0; j < gpu_list_.size(); ++j) { for (size_t j = 0; j < gpu_list_.size(); ++j) {
op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get(); if (!use_gpu_) {
op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
}
vars_.emplace_back(new VarHandle()); vars_.emplace_back(new VarHandle());
VarHandle *in_var_handle = static_cast<VarHandle *>(vars_.back().get()); VarHandle *in_var_handle = static_cast<VarHandle *>(vars_.back().get());
in_var_handle->place_ = gpu_list_[j]; in_var_handle->place_ = gpu_list_[j];
...@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) { ...@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
test_op.InitReduceOp(input_scope_idx); test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceSelectedRows(input_scope_idx); test_op.TestReduceSelectedRows(input_scope_idx);
} }
TEST(ReduceTester, TestCPUReduceTestLodTensor) {
TestReduceOpHandle test_op;
size_t input_scope_idx = 0;
test_op.InitCtxOnGpu(false);
test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceLodTensors(input_scope_idx);
}
#ifdef PADDLE_WITH_CUDA
// #ifdef PADDLE_WITH_CUDA TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
// TestReduceOpHandle test_op;
// TEST(ReduceTester, TestGPUReduceTestSelectedRows) { size_t input_scope_idx = 0;
// TestReduceOpHandle test_op; test_op.InitCtxOnGpu(true);
// size_t input_scope_idx = 0; test_op.InitReduceOp(input_scope_idx);
// test_op.InitCtxOnGpu(true); test_op.TestReduceSelectedRows(input_scope_idx);
// test_op.InitReduceOp(input_scope_idx); }
// test_op.TestReduceSelectedRows(input_scope_idx);
// } TEST(ReduceTester, TestGPUReduceTestLodTensor) {
// TestReduceOpHandle test_op;
// TEST(ReduceTester, TestCPUReduceTestLodTensor) { size_t input_scope_idx = 0;
// TestReduceOpHandle test_op; test_op.InitCtxOnGpu(true);
// size_t input_scope_idx = 0; test_op.InitReduceOp(input_scope_idx);
// test_op.InitCtxOnGpu(true); test_op.TestReduceLodTensors(input_scope_idx);
// test_op.InitReduceOp(input_scope_idx); }
// test_op.TestReduceLodTensors(input_scope_idx); #endif
// }
// #endif
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/details/reduce_util.h"
namespace paddle {
namespace framework {
namespace details {
struct ReduceLoDTensor {
const std::vector<LoDTensor> &src_tensors_;
LoDTensor &dst_tensor_;
ReduceLoDTensor(const std::vector<LoDTensor> &src, LoDTensor *dst)
: src_tensors_(src), dst_tensor_(*dst) {}
template <typename T>
void operator()() const {
PADDLE_ENFORCE(!src_tensors_.empty());
auto &t0 = src_tensors_[0];
PADDLE_ENFORCE_NE(t0.numel(), 0);
dst_tensor_.Resize(t0.dims());
T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
std::copy(t0.data<T>(), t0.data<T>() + t0.numel(), dst);
for (size_t i = 1; i < src_tensors_.size(); ++i) {
auto &t = src_tensors_[i];
PADDLE_ENFORCE_EQ(t.dims(), t0.dims());
PADDLE_ENFORCE_EQ(t.type(), t0.type());
std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,
[](T a, T b) -> T { return a + b; });
}
}
};
} // namespace details
} // namespace framework
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册