From ea42e431f8a2cf2845043dd9b78461c607f40792 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Mon, 22 Apr 2019 13:32:24 +0800 Subject: [PATCH] Speed unit testing. (#16978) * Speed affine_channel_op unit testing * Add check in tensor_py * Fix ONLY_CPU Compiling --- paddle/fluid/operators/affine_channel_op.cu | 5 +++- paddle/fluid/pybind/tensor_py.h | 30 ++++++++++++------- .../tests/unittests/test_affine_channel_op.py | 18 +++++++++-- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu index e1435c29d..6bc0a2635 100644 --- a/paddle/fluid/operators/affine_channel_op.cu +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -65,6 +65,9 @@ class AffineChannelCUDAKernel : public framework::OpKernel { int block = 1024; int grid = (num + block - 1) / block; + + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + grid = std::min(std::max(max_threads / block, 1), grid); if (layout == framework::DataLayout::kNCHW) { KeAffineChannelCUDA<<>>( @@ -162,7 +165,7 @@ class AffineChannelGradCUDAKernel : public framework::OpKernel { } } else { if (dx) { - KeAffineChannelCUDA<<>>( dy_d, s_d, nullptr, C, HxW, num, dx_d); } diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 4a780f1cb..a30c7a723 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -138,25 +138,33 @@ inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) { template T TensorGetElement(const framework::Tensor &self, size_t offset) { + PADDLE_ENFORCE_LT(offset, self.numel()); + T b = static_cast(0); if (platform::is_cpu_place(self.place())) { - return self.data()[offset]; + b = self.data()[offset]; +#ifdef PADDLE_WITH_CUDA } else { - std::shared_ptr dst(new framework::Tensor); - framework::TensorCopySync(self, platform::CPUPlace(), dst.get()); - return dst->data()[offset]; + const T *a = self.data(); + auto p = boost::get(self.place()); + paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T), + nullptr); +#endif } + return b; } -// TODO(dzhwinter) : fix the redundant Tensor allocate and free template void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { - if (platform::is_gpu_place(self->place())) { - framework::Tensor dst; - framework::TensorCopySync(*self, platform::CPUPlace(), &dst); - dst.mutable_data(platform::CPUPlace())[offset] = elem; - framework::TensorCopySync(dst, self->place(), self); - } else if (platform::is_cpu_place(self->place())) { + PADDLE_ENFORCE_LT(offset, self->numel()); + if (platform::is_cpu_place(self->place())) { self->mutable_data(self->place())[offset] = elem; +#ifdef PADDLE_WITH_CUDA + } else { + auto p = boost::get(self->place()); + T *a = self->mutable_data(p); + paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T), + nullptr); +#endif } } diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py index 2c9a063e6..429d8ae94 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py @@ -70,6 +70,12 @@ class TestAffineChannelNHWC(TestAffineChannelOp): self.C = 32 self.layout = 'NHWC' + def test_check_grad_stopgrad_dx(self): + return + + def test_check_grad_stopgrad_dscale_dbias(self): + return + class TestAffineChannel2D(TestAffineChannelOp): def init_test_case(self): @@ -77,10 +83,16 @@ class TestAffineChannel2D(TestAffineChannelOp): self.C = 64 self.layout = 'NCHW' + def test_check_grad_stopgrad_dx(self): + return + + def test_check_grad_stopgrad_dscale_dbias(self): + return + class TestAffineChannelNCHWLargeShape(TestAffineChannelOp): def init_test_case(self): - self.shape = [64, 128, 112, 112] + self.shape = [4, 128, 112, 112] self.C = 128 self.layout = 'NCHW' @@ -95,9 +107,9 @@ class TestAffineChannelNCHWLargeShape(TestAffineChannelOp): pass -class TestAffineChannelNCHWLargeShape(TestAffineChannelNCHWLargeShape): +class TestAffineChannelNHWCLargeShape(TestAffineChannelNCHWLargeShape): def init_test_case(self): - self.shape = [64, 112, 112, 512] + self.shape = [64, 32, 32, 512] self.C = 512 self.layout = 'NHWC' -- GitLab