未验证 提交 44b45b9f 编写于 作者: Y Yiqun Liu 提交者: GitHub

Correct the use of DeviceContext in unittest sequence_pooling_test and...

Correct the use of DeviceContext in unittest sequence_pooling_test and sequence_padding_test (#22456)

* Add log in memory::Copy for debug purpose.

* Change to use context in DeviceContextPool directly in sequence_pooling_test, instead to new one.

* Change to use context in DeviceContextPool directly in sequence_padding_test, instead to new one.
test=develop

* Change the type of second_dim from size_t to int64_t.
test=develop
上级 b80eef79
...@@ -43,8 +43,10 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>( ...@@ -43,8 +43,10 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
platform::CPUPlace dst_place, void* dst, platform::CUDAPlace src_place, platform::CPUPlace dst_place, void* dst, platform::CUDAPlace src_place,
const void* src, size_t num, cudaStream_t stream) { const void* src, size_t num, cudaStream_t stream) {
if (UNLIKELY(num == 0)) return; if (UNLIKELY(num == 0)) return;
platform::SetDeviceId(src_place.device);
platform::SetDeviceId(src_place.device);
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";
if (stream) { if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CPU"); platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CPU");
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
...@@ -65,6 +67,8 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>( ...@@ -65,6 +67,8 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
if (UNLIKELY(num == 0)) return; if (UNLIKELY(num == 0)) return;
platform::SetDeviceId(dst_place.device); platform::SetDeviceId(dst_place.device);
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";
if (stream) { if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:CPU->GPU"); platform::RecordEvent record_event("GpuMemcpyAsync:CPU->GPU");
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
......
...@@ -16,8 +16,9 @@ limitations under the License. */ ...@@ -16,8 +16,9 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector> #include <vector>
template <typename DeviceContext, typename Place, typename T> template <typename DeviceContext, typename T>
void TestSequencePadding(const paddle::framework::LoD& lod, void TestSequencePadding(const DeviceContext &context,
const paddle::framework::LoD &lod,
const size_t sequence_width) { const size_t sequence_width) {
paddle::framework::LoDTensor cpu_seq; paddle::framework::LoDTensor cpu_seq;
paddle::framework::LoDTensor cpu_seq_back; paddle::framework::LoDTensor cpu_seq_back;
...@@ -38,12 +39,11 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -38,12 +39,11 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
cpu_seq.data<T>()[i] = static_cast<T>(i); cpu_seq.data<T>()[i] = static_cast<T>(i);
} }
auto* place = new Place(); auto place = context.GetPlace();
DeviceContext* context = new DeviceContext(*place); if (paddle::platform::is_cpu_place(place)) {
if (paddle::platform::is_cpu_place(*place)) {
seq = cpu_seq; seq = cpu_seq;
} else { } else {
TensorCopySync(cpu_seq, *place, &seq); TensorCopySync(cpu_seq, place, &seq);
seq.set_lod(lod); seq.set_lod(lod);
} }
...@@ -55,28 +55,28 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -55,28 +55,28 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
static_cast<int64_t>(num_sequences), static_cast<int64_t>(num_sequences),
static_cast<int64_t>(sequence_width)}); static_cast<int64_t>(sequence_width)});
padding.mutable_data<T>(padding_dims, *place); padding.mutable_data<T>(padding_dims, place);
T* pad_value_data = T *pad_value_data =
cpu_pad_value.mutable_data<T>({1}, paddle::platform::CPUPlace()); cpu_pad_value.mutable_data<T>({1}, paddle::platform::CPUPlace());
*pad_value_data = static_cast<T>(0); *pad_value_data = static_cast<T>(0);
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(place)) {
pad_value = cpu_pad_value; pad_value = cpu_pad_value;
} else { } else {
TensorCopySync(cpu_pad_value, *place, &pad_value); TensorCopySync(cpu_pad_value, place, &pad_value);
} }
paddle::operators::math::PaddingLoDTensorFunctor<DeviceContext, T>()( paddle::operators::math::PaddingLoDTensorFunctor<DeviceContext, T>()(
*context, seq, &padding, pad_value, -1, 0, false, context, seq, &padding, pad_value, -1, 0, false,
paddle::operators::math::kLengthBatchWidth); paddle::operators::math::kLengthBatchWidth);
seq_back.set_lod(lod); seq_back.set_lod(lod);
seq_back.mutable_data<T>(seq_dims, *place); seq_back.mutable_data<T>(seq_dims, place);
paddle::operators::math::UnpaddingLoDTensorFunctor<DeviceContext, T>()( paddle::operators::math::UnpaddingLoDTensorFunctor<DeviceContext, T>()(
*context, padding, &seq_back, -1, 0, false, context, padding, &seq_back, -1, 0, false,
paddle::operators::math::kLengthBatchWidth); paddle::operators::math::kLengthBatchWidth);
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(place)) {
cpu_seq_back = seq_back; cpu_seq_back = seq_back;
} else { } else {
TensorCopySync(seq_back, paddle::platform::CPUPlace(), &cpu_seq_back); TensorCopySync(seq_back, paddle::platform::CPUPlace(), &cpu_seq_back);
...@@ -88,33 +88,38 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -88,33 +88,38 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
for (int64_t i = 0; i < cpu_seq.numel(); ++i) { for (int64_t i = 0; i < cpu_seq.numel(); ++i) {
EXPECT_EQ(cpu_seq.data<T>()[i], cpu_seq_back.data<T>()[i]); EXPECT_EQ(cpu_seq.data<T>()[i], cpu_seq_back.data<T>()[i]);
} }
delete place;
delete context;
} }
TEST(Seq2BatchPadding, CPU) { TEST(Seq2BatchPadding, CPU) {
auto place = paddle::platform::CPUPlace();
auto *context = static_cast<paddle::platform::CPUDeviceContext *>(
paddle::platform::DeviceContextPool::Instance().Get(place));
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<size_t>{0, 10});
TestSequencePadding<paddle::platform::CPUDeviceContext, TestSequencePadding<paddle::platform::CPUDeviceContext, float>(*context, lod1,
paddle::platform::CPUPlace, float>(lod1, 16); 16);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
TestSequencePadding<paddle::platform::CPUDeviceContext, TestSequencePadding<paddle::platform::CPUDeviceContext, float>(*context, lod2,
paddle::platform::CPUPlace, float>(lod2, 128); 128);
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TEST(SequencePadding, CUDA) { TEST(SequencePadding, CUDA) {
auto place = paddle::platform::CUDAPlace(0);
auto *context = static_cast<paddle::platform::CUDADeviceContext *>(
paddle::platform::DeviceContextPool::Instance().Get(place));
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<size_t>{0, 10});
TestSequencePadding<paddle::platform::CUDADeviceContext, TestSequencePadding<paddle::platform::CUDADeviceContext, float>(*context,
paddle::platform::CUDAPlace, float>(lod1, 16); lod1, 16);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
TestSequencePadding<paddle::platform::CUDADeviceContext, TestSequencePadding<paddle::platform::CUDADeviceContext, float>(*context,
paddle::platform::CUDAPlace, float>(lod2, 128); lod2, 128);
} }
#endif #endif
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <algorithm>
#include <string> #include <string>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/operators/math/sequence_pooling.h"
...@@ -166,7 +167,7 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> { ...@@ -166,7 +167,7 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> {
auto& lod = input.lod()[lod_level - 1]; auto& lod = input.lod()[lod_level - 1];
const size_t item_dim = output->numel() / output->dims()[0]; const size_t item_dim = output->numel() / output->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(lod.size(), 1); dim3 grid(std::max(lod.size() - 1, 1UL), 1);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_kernel< sequence_pool_kernel<
T, MaxPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, MaxPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
...@@ -330,7 +331,7 @@ class SequencePoolGradFunctor<platform::CUDADeviceContext, T> { ...@@ -330,7 +331,7 @@ class SequencePoolGradFunctor<platform::CUDADeviceContext, T> {
auto& lod = in_grad->lod()[lod_level - 1]; auto& lod = in_grad->lod()[lod_level - 1];
const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; const size_t item_dim = in_grad->numel() / in_grad->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(lod.size(), 1); dim3 grid(std::max(lod.size() - 1, 1UL), 1);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, MaxPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, MaxPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
......
...@@ -16,18 +16,19 @@ limitations under the License. */ ...@@ -16,18 +16,19 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector> #include <vector>
template <typename DeviceContext, typename Place, typename T> template <typename DeviceContext, typename T>
void TestSequencePoolingSum(const paddle::framework::LoD& lod) { void TestSequencePoolingSum(const DeviceContext &context,
const paddle::framework::LoD &lod,
const int64_t second_dim) {
paddle::framework::LoDTensor cpu_out_grad; paddle::framework::LoDTensor cpu_out_grad;
paddle::framework::LoDTensor cpu_in_grad; paddle::framework::LoDTensor cpu_in_grad;
paddle::framework::LoDTensor out_grad; paddle::framework::LoDTensor out_grad;
paddle::framework::LoDTensor in_grad; paddle::framework::LoDTensor in_grad;
const size_t second_dim = 128u;
// construct out_grad's tensor in cpu // construct out_grad's tensor in cpu
const size_t out_first_dim = lod[0].size() - 1; const size_t out_first_dim = lod[0].size() - 1;
auto out_dims = paddle::framework::make_ddim( auto out_dims = paddle::framework::make_ddim(
{static_cast<int64_t>(out_first_dim), static_cast<int64_t>(second_dim)}); {static_cast<int64_t>(out_first_dim), second_dim});
cpu_out_grad.mutable_data<T>(out_dims, paddle::platform::CPUPlace()); cpu_out_grad.mutable_data<T>(out_dims, paddle::platform::CPUPlace());
for (int64_t i = 0; i < cpu_out_grad.numel(); ++i) { for (int64_t i = 0; i < cpu_out_grad.numel(); ++i) {
...@@ -35,19 +36,18 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -35,19 +36,18 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
} }
// copy to dst out_grad // copy to dst out_grad
auto* place = new Place(); auto place = context.GetPlace();
DeviceContext* context = new DeviceContext(*place); if (paddle::platform::is_cpu_place(place)) {
if (paddle::platform::is_cpu_place(*place)) {
out_grad = cpu_out_grad; out_grad = cpu_out_grad;
} else { } else {
TensorCopySync(cpu_out_grad, *place, &out_grad); TensorCopySync(cpu_out_grad, place, &out_grad);
} }
// construct in_grad // construct in_grad
in_grad.set_lod(lod); in_grad.set_lod(lod);
auto in_dims = paddle::framework::make_ddim( auto in_dims = paddle::framework::make_ddim(
{static_cast<int64_t>(lod[0].back()), static_cast<int64_t>(second_dim)}); {static_cast<int64_t>(lod[0].back()), second_dim});
in_grad.mutable_data<T>(in_dims, context->GetPlace()); in_grad.mutable_data<T>(in_dims, place);
// check tensor contruction result // check tensor contruction result
PADDLE_ENFORCE_EQ(in_grad.dims().size(), out_grad.dims().size()); PADDLE_ENFORCE_EQ(in_grad.dims().size(), out_grad.dims().size());
...@@ -57,9 +57,9 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -57,9 +57,9 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
// call functor // call functor
paddle::operators::math::SequencePoolGradFunctor<DeviceContext, T>()( paddle::operators::math::SequencePoolGradFunctor<DeviceContext, T>()(
*context, "SUM", out_grad, &in_grad); context, "SUM", out_grad, &in_grad);
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(place)) {
cpu_in_grad = in_grad; cpu_in_grad = in_grad;
} else { } else {
TensorCopySync(in_grad, paddle::platform::CPUPlace(), &cpu_in_grad); TensorCopySync(in_grad, paddle::platform::CPUPlace(), &cpu_in_grad);
...@@ -69,12 +69,12 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -69,12 +69,12 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
EXPECT_EQ(in_grad.numel(), static_cast<int64_t>(lod[0].back() * second_dim)); EXPECT_EQ(in_grad.numel(), static_cast<int64_t>(lod[0].back() * second_dim));
EXPECT_EQ(in_grad.lod(), lod); EXPECT_EQ(in_grad.lod(), lod);
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(place)) {
for (size_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { for (size_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) {
int64_t begin = in_grad.lod()[0][i]; int64_t begin = in_grad.lod()[0][i];
int64_t end = in_grad.lod()[0][i + 1]; int64_t end = in_grad.lod()[0][i + 1];
paddle::framework::Tensor tmp = in_grad.Slice(begin, end); paddle::framework::Tensor tmp = in_grad.Slice(begin, end);
for (size_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) {
for (int64_t m = 0; m != second_dim; ++m) { for (int64_t m = 0; m != second_dim; ++m) {
EXPECT_EQ(tmp.data<T>()[m + j * second_dim], EXPECT_EQ(tmp.data<T>()[m + j * second_dim],
out_grad.data<T>()[m + i * second_dim]); out_grad.data<T>()[m + i * second_dim]);
...@@ -86,7 +86,7 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -86,7 +86,7 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
int64_t begin = cpu_in_grad.lod()[0][i]; int64_t begin = cpu_in_grad.lod()[0][i];
int64_t end = cpu_in_grad.lod()[0][i + 1]; int64_t end = cpu_in_grad.lod()[0][i + 1];
paddle::framework::Tensor tmp = cpu_in_grad.Slice(begin, end); paddle::framework::Tensor tmp = cpu_in_grad.Slice(begin, end);
for (size_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) {
for (int64_t m = 0; m != second_dim; ++m) { for (int64_t m = 0; m != second_dim; ++m) {
EXPECT_EQ(tmp.data<T>()[m + j * second_dim], EXPECT_EQ(tmp.data<T>()[m + j * second_dim],
cpu_out_grad.data<T>()[m + i * second_dim]); cpu_out_grad.data<T>()[m + i * second_dim]);
...@@ -94,33 +94,38 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -94,33 +94,38 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
} }
} }
} }
delete place;
delete context;
} }
TEST(SequencePoolingGrad, CPU_SUM) { TEST(SequencePoolingGrad, CPU_SUM) {
auto place = paddle::platform::CPUPlace();
auto *context = static_cast<paddle::platform::CPUDeviceContext *>(
paddle::platform::DeviceContextPool::Instance().Get(place));
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<size_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext, TestSequencePoolingSum<paddle::platform::CPUDeviceContext, float>(*context,
paddle::platform::CPUPlace, float>(lod1); lod1, 128);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext, TestSequencePoolingSum<paddle::platform::CPUDeviceContext, float>(*context,
paddle::platform::CPUPlace, float>(lod2); lod2, 128);
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TEST(SequencePoolingGrad, CUDA_SUM) { TEST(SequencePoolingGrad, CUDA_SUM) {
auto place = paddle::platform::CUDAPlace(0);
auto *context = static_cast<paddle::platform::CUDADeviceContext *>(
paddle::platform::DeviceContextPool::Instance().Get(place));
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<size_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext, TestSequencePoolingSum<paddle::platform::CUDADeviceContext, float>(*context,
paddle::platform::CUDAPlace, float>(lod1); lod1, 128);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext, TestSequencePoolingSum<paddle::platform::CUDADeviceContext, float>(*context,
paddle::platform::CUDAPlace, float>(lod2); lod2, 128);
} }
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册