提交 3c5bbf42 编写于 作者: C chengduoZH

make unit test to work

上级 e39adc86
......@@ -14,7 +14,7 @@
#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
#include <algorithm>
#include "paddle/fluid/framework/details/reduce_util.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
namespace paddle {
namespace framework {
......
......@@ -121,7 +121,7 @@ void ReduceOpHandle::RunImpl() {
auto &p = in_places[i];
auto &lod_tensor = lod_tensors[i];
int dev_id = boost::get<platform::CUDAPlace>(p).device;
auto &nccl_ctx = nccl_ctxs_.at(dev_id);
auto &nccl_ctx = nccl_ctxs_->at(dev_id);
auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_;
......
......@@ -34,15 +34,17 @@ struct ReduceOpHandle : public OpHandleBase {
const std::vector<platform::Place> &places_;
#ifdef PADDLE_WITH_CUDA
const platform::NCCLContextMap &nccl_ctxs_;
const platform::NCCLContextMap *nccl_ctxs_;
ReduceOpHandle(const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap &nccl_ctxs)
const platform::NCCLContextMap *nccl_ctxs)
: local_scopes_(local_scopes), places_(places), nccl_ctxs_(nccl_ctxs) {
for (auto &p_ctx : nccl_ctxs_.contexts_) {
if (nccl_ctxs_) {
for (auto &p_ctx : nccl_ctxs_->contexts_) {
dev_ctxes_[platform::CUDAPlace(p_ctx.first)] = p_ctx.second.ctx_.get();
}
}
}
#else
ReduceOpHandle(const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places)
......
......@@ -44,7 +44,9 @@ struct TestReduceOpHandle {
ctxs_[j]->Wait();
}
#ifdef PADDLE_WITH_CUDA
if (nccl_ctxs_) {
nccl_ctxs_->WaitAll();
}
#endif
}
......@@ -64,6 +66,7 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CUDADeviceContext(p));
}
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
#else
PADDLE_THROW("CUDA is not support.");
#endif
......@@ -74,11 +77,11 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
}
}
#ifdef PADDLE_WITH_CUDA
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
nccl_ctxs_.reset(nullptr);
#endif
}
}
void InitReduceOp(size_t input_scope_idx) {
for (size_t j = 0; j < gpu_list_.size(); ++j) {
......@@ -87,15 +90,27 @@ struct TestReduceOpHandle {
}
local_scopes_[input_scope_idx]->Var("input");
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
op_handle_.reset(
new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
#else
PADDLE_THROW("CUDA is not support.");
#endif
} else {
#ifdef PADDLE_WITH_CUDA
op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_, *nccl_ctxs_));
op_handle_.reset(
new ReduceOpHandle(local_scopes_, gpu_list_, nccl_ctxs_.get()));
#else
op_handle_.reset(new ReduceOpHandle(local_scopes_, gpu_list_));
#endif
}
// add input
for (size_t j = 0; j < gpu_list_.size(); ++j) {
if (!use_gpu_) {
op_handle_->dev_ctxes_[gpu_list_[j]] = ctxs_[j].get();
}
vars_.emplace_back(new VarHandle());
VarHandle *in_var_handle = static_cast<VarHandle *>(vars_.back().get());
in_var_handle->place_ = gpu_list_[j];
......@@ -236,25 +251,31 @@ TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceSelectedRows(input_scope_idx);
}
TEST(ReduceTester, TestCPUReduceTestLodTensor) {
TestReduceOpHandle test_op;
size_t input_scope_idx = 0;
test_op.InitCtxOnGpu(false);
test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceLodTensors(input_scope_idx);
}
#ifdef PADDLE_WITH_CUDA
// #ifdef PADDLE_WITH_CUDA
//
// TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
// TestReduceOpHandle test_op;
// size_t input_scope_idx = 0;
// test_op.InitCtxOnGpu(true);
// test_op.InitReduceOp(input_scope_idx);
// test_op.TestReduceSelectedRows(input_scope_idx);
// }
//
// TEST(ReduceTester, TestCPUReduceTestLodTensor) {
// TestReduceOpHandle test_op;
// size_t input_scope_idx = 0;
// test_op.InitCtxOnGpu(true);
// test_op.InitReduceOp(input_scope_idx);
// test_op.TestReduceLodTensors(input_scope_idx);
// }
// #endif
TEST(ReduceTester, TestGPUReduceTestSelectedRows) {
TestReduceOpHandle test_op;
size_t input_scope_idx = 0;
test_op.InitCtxOnGpu(true);
test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceSelectedRows(input_scope_idx);
}
TEST(ReduceTester, TestGPUReduceTestLodTensor) {
TestReduceOpHandle test_op;
size_t input_scope_idx = 0;
test_op.InitCtxOnGpu(true);
test_op.InitReduceOp(input_scope_idx);
test_op.TestReduceLodTensors(input_scope_idx);
}
#endif
} // namespace details
} // namespace framework
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/details/reduce_util.h"
namespace paddle {
namespace framework {
namespace details {
struct ReduceLoDTensor {
const std::vector<LoDTensor> &src_tensors_;
LoDTensor &dst_tensor_;
ReduceLoDTensor(const std::vector<LoDTensor> &src, LoDTensor *dst)
: src_tensors_(src), dst_tensor_(*dst) {}
template <typename T>
void operator()() const {
PADDLE_ENFORCE(!src_tensors_.empty());
auto &t0 = src_tensors_[0];
PADDLE_ENFORCE_NE(t0.numel(), 0);
dst_tensor_.Resize(t0.dims());
T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
std::copy(t0.data<T>(), t0.data<T>() + t0.numel(), dst);
for (size_t i = 1; i < src_tensors_.size(); ++i) {
auto &t = src_tensors_[i];
PADDLE_ENFORCE_EQ(t.dims(), t0.dims());
PADDLE_ENFORCE_EQ(t.type(), t0.type());
std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,
[](T a, T b) -> T { return a + b; });
}
}
};
} // namespace details
} // namespace framework
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册