diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 13f8e15b92a0ef4643d2e72e4c14fb7dadc527b9..101141284ba4c8ac34c868aae18dc04c924b2aed 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -250,6 +250,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { VLOG(10) << "Start to apply buffer_shared_inplace_pass"; graph = inplace_pass->Apply(graph); VLOG(10) << "buffer_shared_inplace_pass Applied"; + LOG(INFO) << "Inplace strategy is enabled, when " + "build_strategy.enable_inplace = True"; } /** @@ -278,6 +280,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { VLOG(10) << "Start to apply buffer_shared_cross_op_memory_reuse_pass"; graph = cross_op_memory_reuse_pass->Apply(graph); VLOG(10) << "buffer_shared_cross_op_memory_reuse_pass Applied"; + LOG(INFO) << "Cross op memory reuse strategy is enabled, when " + "build_strategy.memory_optimize = True or garbage collection " + "strategy is disabled, which is not recommended"; } if (!is_gc_enabled) { diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 8b45a0b0311531f24cef731acd94f7bae59b4836..d2d0f6248ea0ca5edff6a3237c2e9a1352e80e65 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -116,6 +116,11 @@ cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor) +if (WITH_GPU) + nv_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc test_leaky_relu_grad_grad_functor.cu DEPS tensor device_context eigen3) +else() + cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3) +endif() if (WITH_PYTHON) cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 531e89a5efd9619a502a394b1b5d8f7c995d49d1..b3bc5c1c90d2bccc81012826b0cb13ab8460a649 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -716,8 +716,8 @@ class LeakyReluDoubleGradMaker std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { auto* op = new ::paddle::framework::OpDesc(); op->SetType("leaky_relu_grad_grad"); - // input1: X - op->SetInput("X", Input("X")); + // input1: Out + op->SetInput("Out", Input("Out")); // X@GRAD@GRAD: ddx op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); op->SetAttrMap(Attrs()); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 7afa7be25320d7ce5dae501df84ceeca9703c447..7e00eab466d5a7f568b21e7e7761cbd91ab93848 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -463,8 +463,8 @@ struct HardShrinkFunctor : public BaseActivationFunctor { } template void operator()(Device d, X x, Out out) const { - auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); - auto temp2 = (x > static_cast(threshold)).template cast().eval(); + auto temp1 = (x < static_cast(threshold * -1)).template cast(); + auto temp2 = (x > static_cast(threshold)).template cast(); out.device(d) = x * (temp1 + temp2); } }; @@ -480,8 +480,8 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); - auto temp2 = (x > static_cast(threshold)).template cast().eval(); + auto temp1 = (x < static_cast(threshold * -1)).template cast(); + auto temp2 = (x > static_cast(threshold)).template cast(); dx.device(d) = dout * (temp1 + temp2).template cast(); } @@ -500,8 +500,8 @@ struct SoftShrinkFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out) const { auto lambdaT = static_cast(lambda); - auto temp1 = (x > lambdaT).template cast().eval(); - auto temp2 = (x < -lambdaT).template cast().eval(); + auto temp1 = (x > lambdaT).template cast(); + auto temp2 = (x < -lambdaT).template cast(); out.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); } }; @@ -516,8 +516,8 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor { typename dX> void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto lambdaT = static_cast(lambda); - auto temp1 = (x > lambdaT).template cast().eval(); - auto temp2 = (x < -lambdaT).template cast().eval(); + auto temp1 = (x > lambdaT).template cast(); + auto temp2 = (x < -lambdaT).template cast(); dx.device(d) = dout * (temp1 + temp2).template cast(); } @@ -1043,7 +1043,7 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename dX> void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto tmp = static_cast(threshold); - auto temp = ((out > -tmp) * (out < tmp)).template cast().eval(); + auto temp = ((out > -tmp) * (out < tmp)).template cast(); dx.device(d) = dout * (static_cast(1) - (-out).exp()) * temp; } @@ -1072,13 +1072,13 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto temp1 = static_cast(alpha) * - (x < static_cast(0)).template cast().eval(); - auto temp2 = (x >= static_cast(0)).template cast().eval(); + auto temp1 = + static_cast(alpha) * (out < static_cast(0)).template cast(); + auto temp2 = (out >= static_cast(0)).template cast(); dx.device(d) = dout * (temp1 + temp2).template cast(); } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; template @@ -1413,19 +1413,19 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { const framework::Tensor* Out, const framework::Tensor* ddX, framework::Tensor* ddOut, framework::Tensor* dOut, framework::Tensor* dX) const { - auto* d = dev.eigen_device(); - auto ddx = framework::EigenVector::Flatten(detail::Ref(ddX)); - auto x = framework::EigenVector::Flatten(detail::Ref(X)); if (ddOut) { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten(detail::Ref(ddX)); + auto out = framework::EigenVector::Flatten(detail::Ref(Out)); auto ddout = framework::EigenVector::Flatten(detail::Ref(ddOut)); - ddout.device(*d) = ddx * - ((x >= static_cast(0)).template cast().eval() + - static_cast(alpha) * - (x < static_cast(0)).template cast().eval()) - .template cast(); + ddout.device(*d) = + ddx * + ((out >= static_cast(0)).template cast() + + static_cast(alpha) * (out < static_cast(0)).template cast()) + .template cast(); } } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; template diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cc b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cc new file mode 100644 index 0000000000000000000000000000000000000000..77e74e3f81ce7205164681b6459e7a327519eecc --- /dev/null +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cc @@ -0,0 +1,26 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h" + +namespace paddle { +namespace operators { + +TEST(leaky_relu_grad_grad, test_cpu) { + ASSERT_TRUE( + TestLeakyReluGradGradMain({32, 64}, platform::CPUPlace(), 0.02)); +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cu b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cu new file mode 100644 index 0000000000000000000000000000000000000000..bb1afaea621ea35c3a239d35c0150f2e33684542 --- /dev/null +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h" + +namespace paddle { +namespace operators { + +TEST(leaky_relu_grad_grad, test_gpu) { + ASSERT_TRUE( + TestLeakyReluGradGradMain({32, 64}, platform::CUDAPlace(0), 0.15)); +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h new file mode 100644 index 0000000000000000000000000000000000000000..fe9bf969b1de3893712c3f90715aac303a1d6ef5 --- /dev/null +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h @@ -0,0 +1,124 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/platform/for_range.h" + +namespace paddle { +namespace operators { + +template +static void InitRandom(framework::Tensor *tensor, + const platform::Place &place) { + framework::Tensor cpu_tensor; + auto *cpu_ptr = + cpu_tensor.mutable_data(tensor->dims(), platform::CPUPlace()); + int64_t numel = cpu_tensor.numel(); + std::mt19937 engine; + std::uniform_real_distribution dist(static_cast(-2.0), + static_cast(2.0)); + for (int64_t i = 0; i < numel; ++i) { + cpu_ptr[i] = dist(engine); + } + framework::TensorCopySync(cpu_tensor, place, tensor); +} + +template +struct LeakyReluGradGradEachElementFunctor { + LeakyReluGradGradEachElementFunctor(const T *ddx, const T *out, T alpha, + T *ddout) + : ddx_(ddx), out_(out), alpha_(alpha), ddout_(ddout) {} + + HOSTDEVICE void operator()(int idx) { + if (out_[idx] >= 0) { + ddout_[idx] = ddx_[idx]; + } else { + ddout_[idx] = ddx_[idx] * alpha_; + } + } + + const T *ddx_; + const T *out_; + T alpha_; + T *ddout_; +}; + +template +static bool TestLeakyReluGradGradMain(const framework::DDim &dim, + const platform::Place &place, + float alpha) { + LeakyReluGradGradFunctor functor; + functor.alpha = alpha; + auto &dev_ctx = *platform::DeviceContextPool::Instance().Get(place); + framework::Tensor *x = nullptr; + framework::Tensor *dout = nullptr; + framework::Tensor *dx = nullptr; + + framework::Tensor out; + out.Resize(dim); + InitRandom(&out, place); + + framework::Tensor ddx; + ddx.Resize(dim); + InitRandom(&ddx, place); + + framework::Tensor ddout; + ddout.Resize(dim); + InitRandom(&ddout, place); + + framework::Tensor ddout_actual; + ddout_actual.mutable_data(dim, place); + LeakyReluGradGradEachElementFunctor actual_functor( + ddx.data(), out.data(), static_cast(alpha), + ddout_actual.data()); + + int64_t limit = out.numel(); + +#ifdef __NVCC__ + if (platform::is_gpu_place(place)) { + auto &cuda_dev_ctx = dynamic_cast(dev_ctx); + functor(cuda_dev_ctx, x, &out, &ddx, &ddout, dout, dx); + platform::ForRange for_range(cuda_dev_ctx, + limit); + for_range(actual_functor); + } else { +#endif + auto &cpu_dev_ctx = dynamic_cast(dev_ctx); + functor(cpu_dev_ctx, x, &out, &ddx, &ddout, dout, dx); + platform::ForRange for_range(cpu_dev_ctx, + limit); + for_range(actual_functor); +#ifdef __NVCC__ + } +#endif + + dev_ctx.Wait(); + + framework::Tensor ddout_cpu, ddout_actual_cpu; + framework::TensorCopySync(ddout, platform::CPUPlace(), &ddout_cpu); + framework::TensorCopySync(ddout_actual, platform::CPUPlace(), + &ddout_actual_cpu); + + bool is_equal = std::equal(ddout_cpu.data(), ddout_cpu.data() + limit, + ddout_actual_cpu.data()); + return is_equal; +} + +} // namespace operators +} // namespace paddle