diff --git a/paddle/fluid/operators/elementwise/CMakeLists.txt b/paddle/fluid/operators/elementwise/CMakeLists.txt index 5d468316e8eacb73c4a4ce81c784880bb5e46c2d..37be1116ab30e492eb125acdd91897bb96ae9958 100644 --- a/paddle/fluid/operators/elementwise/CMakeLists.txt +++ b/paddle/fluid/operators/elementwise/CMakeLists.txt @@ -1,2 +1,4 @@ include(operators) register_operators() + +cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor) diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc new file mode 100644 index 0000000000000000000000000000000000000000..b8163169734bd2c64412bab7286aca9cc5e1b830 --- /dev/null +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -0,0 +1,136 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" + +USE_OP(elementwise_add); + +namespace paddle { +namespace operators { + +static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) { + if (copy_to_gpu) { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice)); +#else + PADDLE_THROW("Not compiled with cuda"); +#endif + } else { + std::memcpy(dst, src, n); + } +} + +template +bool TestMain(const platform::Place &place, const framework::DDim &dims, + bool inplace) { + framework::Scope scope; + auto *x = scope.Var("x")->GetMutable(); + auto *y = scope.Var("y")->GetMutable(); + auto *z = scope.Var("z")->GetMutable(); + + x->Resize(dims); + y->Resize(dims); + z->Resize(dims); + + size_t numel = static_cast(framework::product(dims)); + + auto x_ptr = x->mutable_data(place); + auto y_ptr = y->mutable_data(place); + auto z_ptr = z->mutable_data(place); + + std::uniform_real_distribution dist(static_cast(10.0), + static_cast(20.0)); + std::mt19937 engine; + std::vector x_data(numel), y_data(numel), z_data(numel); + std::vector sum_result(numel); + + for (size_t i = 0; i < numel; ++i) { + x_data[i] = dist(engine); + y_data[i] = dist(engine); + sum_result[i] = x_data[i] + y_data[i]; + z_data[i] = -1.0; // set some data that is not existed + } + + auto bytes = sizeof(T) * numel; + bool is_gpu_place = platform::is_gpu_place(place); + Memcpy(x_ptr, x_data.data(), bytes, is_gpu_place); + Memcpy(y_ptr, y_data.data(), bytes, is_gpu_place); + Memcpy(z_ptr, z_data.data(), bytes, is_gpu_place); + + const char *out_name = inplace ? "x" : "z"; + auto op = framework::OpRegistry::CreateOp("elementwise_add", + {{"X", {"x"}}, {"Y", {"y"}}}, + {{"Out", {out_name}}}, {}); + op->Run(scope, place); + platform::DeviceContextPool::Instance().Get(place)->Wait(); + + framework::LoDTensor cpu_out; + auto &out_tensor = scope.FindVar(out_name)->Get(); + PADDLE_ENFORCE(scope.kids().empty()); + if (inplace) { + PADDLE_ENFORCE_EQ(&out_tensor, x); + } else { + PADDLE_ENFORCE_EQ(&out_tensor, z); + } + + if (is_gpu_place) { + framework::TensorCopySync(out_tensor, platform::CPUPlace(), &cpu_out); + } else { + cpu_out = out_tensor; + } + + auto *out_ptr = cpu_out.data(); + bool is_equal = std::equal(out_ptr, out_ptr + numel, sum_result.data()); + return is_equal; +} + +TEST(test_elementwise_add_inplace, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + ASSERT_TRUE(TestMain(p, dims, true)); +} + +TEST(test_elementwise_add_not_inplace, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + ASSERT_TRUE(TestMain(p, dims, false)); +} + +#ifdef PADDLE_WITH_CUDA +TEST(test_elementwise_add_inplace, gpu_place) { + framework::DDim dims({32, 64}); + platform::CUDAPlace p(0); + ASSERT_TRUE(TestMain(p, dims, true)); +} + +TEST(test_elementwise_add_not_inplace, gpu_place) { + framework::DDim dims({32, 64}); + platform::CUDAPlace p(0); + ASSERT_TRUE(TestMain(p, dims, false)); +} +#endif + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index ba995dabecbfab8c4952bb7efeaa381f8078821a..f067e2834a6728d55817a5440fba70f5de18e0d8 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -422,8 +422,12 @@ void Blas::VADD(int n, const T *x, const T *y, #ifdef PADDLE_WITH_MKLML CBlas::VADD(n, x, y, z); #else - this->template VCOPY(n, y, z); - this->template AXPY(n, 1., x, z); + if (x == z) { + this->template AXPY(n, 1., y, z); + } else { + this->template VCOPY(n, y, z); + this->template AXPY(n, 1., x, z); + } #endif }