未验证 提交 f5641000 编写于 作者: Z Zeng Jinle 提交者: GitHub

Add a unittest to inplace elementwise_add (#18385)

* add_elementwise_add_inplace_test,test=develop

* rename file, test=develop
上级 43f64a17
include(operators)
register_operators()
cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <cstdlib>
#include <memory>
#include <random>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
USE_OP(elementwise_add);
namespace paddle {
namespace operators {
static void Memcpy(void *dst, const void *src, size_t n, bool copy_to_gpu) {
if (copy_to_gpu) {
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE(cudaMemcpy(dst, src, n, cudaMemcpyHostToDevice));
#else
PADDLE_THROW("Not compiled with cuda");
#endif
} else {
std::memcpy(dst, src, n);
}
}
template <typename T>
bool TestMain(const platform::Place &place, const framework::DDim &dims,
bool inplace) {
framework::Scope scope;
auto *x = scope.Var("x")->GetMutable<framework::LoDTensor>();
auto *y = scope.Var("y")->GetMutable<framework::LoDTensor>();
auto *z = scope.Var("z")->GetMutable<framework::LoDTensor>();
x->Resize(dims);
y->Resize(dims);
z->Resize(dims);
size_t numel = static_cast<size_t>(framework::product(dims));
auto x_ptr = x->mutable_data<T>(place);
auto y_ptr = y->mutable_data<T>(place);
auto z_ptr = z->mutable_data<T>(place);
std::uniform_real_distribution<T> dist(static_cast<T>(10.0),
static_cast<T>(20.0));
std::mt19937 engine;
std::vector<T> x_data(numel), y_data(numel), z_data(numel);
std::vector<T> sum_result(numel);
for (size_t i = 0; i < numel; ++i) {
x_data[i] = dist(engine);
y_data[i] = dist(engine);
sum_result[i] = x_data[i] + y_data[i];
z_data[i] = -1.0; // set some data that is not existed
}
auto bytes = sizeof(T) * numel;
bool is_gpu_place = platform::is_gpu_place(place);
Memcpy(x_ptr, x_data.data(), bytes, is_gpu_place);
Memcpy(y_ptr, y_data.data(), bytes, is_gpu_place);
Memcpy(z_ptr, z_data.data(), bytes, is_gpu_place);
const char *out_name = inplace ? "x" : "z";
auto op = framework::OpRegistry::CreateOp("elementwise_add",
{{"X", {"x"}}, {"Y", {"y"}}},
{{"Out", {out_name}}}, {});
op->Run(scope, place);
platform::DeviceContextPool::Instance().Get(place)->Wait();
framework::LoDTensor cpu_out;
auto &out_tensor = scope.FindVar(out_name)->Get<framework::LoDTensor>();
PADDLE_ENFORCE(scope.kids().empty());
if (inplace) {
PADDLE_ENFORCE_EQ(&out_tensor, x);
} else {
PADDLE_ENFORCE_EQ(&out_tensor, z);
}
if (is_gpu_place) {
framework::TensorCopySync(out_tensor, platform::CPUPlace(), &cpu_out);
} else {
cpu_out = out_tensor;
}
auto *out_ptr = cpu_out.data<T>();
bool is_equal = std::equal(out_ptr, out_ptr + numel, sum_result.data());
return is_equal;
}
TEST(test_elementwise_add_inplace, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, dims, true));
}
TEST(test_elementwise_add_not_inplace, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, dims, false));
}
#ifdef PADDLE_WITH_CUDA
TEST(test_elementwise_add_inplace, gpu_place) {
framework::DDim dims({32, 64});
platform::CUDAPlace p(0);
ASSERT_TRUE(TestMain<float>(p, dims, true));
}
TEST(test_elementwise_add_not_inplace, gpu_place) {
framework::DDim dims({32, 64});
platform::CUDAPlace p(0);
ASSERT_TRUE(TestMain<float>(p, dims, false));
}
#endif
} // namespace operators
} // namespace paddle
......@@ -422,8 +422,12 @@ void Blas<platform::CPUDeviceContext>::VADD(int n, const T *x, const T *y,
#ifdef PADDLE_WITH_MKLML
CBlas<T>::VADD(n, x, y, z);
#else
this->template VCOPY<T>(n, y, z);
this->template AXPY<T>(n, 1., x, z);
if (x == z) {
this->template AXPY<T>(n, 1., y, z);
} else {
this->template VCOPY<T>(n, y, z);
this->template AXPY<T>(n, 1., x, z);
}
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册