未验证 提交 69c7edcf 编写于 作者: H HongyuJia 提交者: GitHub

[Custom device] Add custom_cpu testcase of custom_relu (#49300)

* add custom_cpu testcase

* update test_custom_device_setup

* update path to custom_runtime

* fix cmd wait

* test Linux only

* setup once

* integrate to one run_cmd

* add pip install

* change timeout

* add debug string

* add debug string

* add debug string

* use os.system and change module name

* add runtime

* add more debug message

* continue debug

* timestamp

* fix testcase import bug

* remove error message

* set TIMEOUT property
上级 a4b4343f
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
// Note(chenweihang): In order to be compatible with the original custom // Note(chenweihang): In order to be compatible with the original custom
// operator Tensor interface, only available to external users, the file // operator Tensor interface, only available to external users, the file
// cannot be includeed in paddle // cannot be included in paddle
namespace paddle { namespace paddle {
using Tensor = experimental::Tensor; using Tensor = experimental::Tensor;
......
...@@ -28,4 +28,5 @@ if(WITH_CUSTOM_DEVICE AND NOT WITH_GPU) ...@@ -28,4 +28,5 @@ if(WITH_CUSTOM_DEVICE AND NOT WITH_GPU)
set_tests_properties(test_custom_cpu_profiler_plugin PROPERTIES TIMEOUT 120) set_tests_properties(test_custom_cpu_profiler_plugin PROPERTIES TIMEOUT 120)
set_tests_properties(test_fleet_launch_custom_device PROPERTIES TIMEOUT 120) set_tests_properties(test_fleet_launch_custom_device PROPERTIES TIMEOUT 120)
set_tests_properties(test_custom_cpu_to_static PROPERTIES TIMEOUT 120) set_tests_properties(test_custom_cpu_to_static PROPERTIES TIMEOUT 120)
set_tests_properties(test_custom_device_relu_setup PROPERTIES TIMEOUT 120)
endif() endif()
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
#define CHECK_CPU_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
#define CHECK_CUSTOM_INPUT(x) \
PD_CHECK(x.is_custom_device(), #x " must be a custom Tensor.")
template <typename data_t>
void relu_cpu_forward_kernel(const data_t* x_data,
data_t* out_data,
int64_t x_numel) {
PD_CHECK(x_data != nullptr, "x_data is nullptr.");
PD_CHECK(out_data != nullptr, "out_data is nullptr.");
for (int64_t i = 0; i < x_numel; ++i) {
out_data[i] = std::max(static_cast<data_t>(0.), x_data[i]);
}
}
template <typename data_t>
void relu_cpu_backward_kernel(const data_t* grad_out_data,
const data_t* out_data,
data_t* grad_x_data,
int64_t out_numel) {
for (int64_t i = 0; i < out_numel; ++i) {
grad_x_data[i] =
grad_out_data[i] * (out_data[i] > static_cast<data_t>(0) ? 1. : 0.);
}
}
template <typename data_t>
void relu_cpu_double_backward_kernel(const data_t* out_data,
const data_t* ddx_data,
data_t* ddout_data,
int64_t ddout_numel) {
for (int64_t i = 0; i < ddout_numel; ++i) {
ddout_data[i] =
ddx_data[i] * (out_data[i] > static_cast<data_t>(0) ? 1. : 0.);
}
}
std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
CHECK_CPU_INPUT(x);
auto out = paddle::empty_like(x);
PD_DISPATCH_FLOATING_TYPES(
x.type(), "relu_cpu_forward", ([&] {
relu_cpu_forward_kernel<data_t>(
x.data<data_t>(), out.data<data_t>(), x.numel());
}));
return {out};
}
std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
auto grad_x = paddle::empty_like(x);
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
relu_cpu_backward_kernel<data_t>(
grad_out.data<data_t>(),
out.data<data_t>(),
grad_x.data<data_t>(),
out.size());
}));
return {grad_x};
}
std::vector<paddle::Tensor> relu_cpu_double_backward(
const paddle::Tensor& out, const paddle::Tensor& ddx) {
CHECK_CPU_INPUT(out);
CHECK_CPU_INPUT(ddx);
auto ddout = paddle::empty(out.shape(), out.dtype(), out.place());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_double_backward", ([&] {
relu_cpu_double_backward_kernel<data_t>(
out.data<data_t>(),
ddx.data<data_t>(),
ddout.mutable_data<data_t>(out.place()),
ddout.size());
}));
return {ddout};
}
std::vector<paddle::Tensor> relu_custom_forward(const paddle::Tensor& x) {
CHECK_CUSTOM_INPUT(x);
auto out = paddle::relu(x);
return {out};
}
std::vector<paddle::Tensor> relu_custom_backward(
const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
CHECK_CUSTOM_INPUT(x);
CHECK_CUSTOM_INPUT(out);
auto grad_x = paddle::empty_like(x, x.dtype(), x.place());
auto ones = paddle::experimental::full_like(x, 1.0, x.dtype(), x.place());
auto zeros = paddle::experimental::full_like(x, 0.0, x.dtype(), x.place());
auto condition = paddle::experimental::greater_than(x, zeros);
grad_x = paddle::multiply(grad_out, paddle::where(condition, ones, zeros));
return {grad_x};
}
std::vector<paddle::Tensor> relu_custom_double_backward(
const paddle::Tensor& out, const paddle::Tensor& ddx) {
CHECK_CUSTOM_INPUT(out);
auto ddout = paddle::empty(out.shape(), out.dtype(), out.place());
auto ones =
paddle::experimental::full_like(out, 1.0, out.dtype(), out.place());
auto zeros =
paddle::experimental::full_like(out, 0.0, out.dtype(), out.place());
auto condition = paddle::experimental::greater_than(out, zeros);
ddout = paddle::multiply(ddx, paddle::where(condition, ones, zeros));
return {ddout};
}
std::vector<paddle::Tensor> ReluForward(const paddle::Tensor& x) {
if (x.is_cpu()) {
return relu_cpu_forward(x);
} else if (x.is_custom_device()) {
return relu_custom_forward(x);
} else {
PD_THROW("Not implemented.");
}
}
std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
if (x.is_cpu()) {
return relu_cpu_backward(x, out, grad_out);
} else if (x.is_custom_device()) {
return relu_custom_backward(x, out, grad_out);
} else {
PD_THROW("Not implemented.");
}
}
std::vector<paddle::Tensor> ReluDoubleBackward(const paddle::Tensor& out,
const paddle::Tensor& ddx) {
if (out.is_cpu()) {
return relu_cpu_double_backward(out, ddx);
} else if (out.is_custom_device()) {
return relu_custom_double_backward(out, ddx);
} else {
PD_THROW("Not implemented.");
}
}
std::vector<std::vector<int64_t>> ReluDoubleBackwardInferShape(
const std::vector<int64_t>& out_shape,
const std::vector<int64_t>& ddx_shape) {
return {out_shape};
}
PD_BUILD_OP(custom_relu)
.Inputs({"X"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward));
PD_BUILD_GRAD_OP(custom_relu)
.Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward));
PD_BUILD_DOUBLE_GRAD_OP(custom_relu)
.Inputs({"Out", paddle::Grad(paddle::Grad("X"))})
.Outputs({paddle::Grad(paddle::Grad("Out"))})
.SetKernelFn(PD_KERNEL(ReluDoubleBackward))
.SetInferShapeFn(PD_INFER_SHAPE(ReluDoubleBackwardInferShape));
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import tempfile
import unittest
from site import getsitepackages
import numpy as np
def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
import paddle
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype)
t.stop_gradient = False
sys.stdout.flush()
out = func(t) if use_func else paddle.nn.functional.relu(t)
out.stop_gradient = False
out.backward()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if t.grad is None:
return out.numpy(), t.grad
else:
return out.numpy(), t.grad.numpy()
def custom_relu_static(func, device, dtype, np_x, use_func=True):
import paddle
import paddle.static as static
paddle.enable_static()
paddle.set_device(device)
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
x = static.data(name="X", shape=[None, 8], dtype=dtype)
x.stop_gradient = False
out = func(x) if use_func else paddle.nn.functional.relu(x)
static.append_backward(out)
exe = static.Executor()
exe.run(static.default_startup_program())
# in static mode, x data has been covered by out
out_v = exe.run(
static.default_main_program(),
feed={"X": np_x},
fetch_list=[out.name],
)
paddle.disable_static()
return out_v
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
import paddle
import paddle.static as static
paddle.enable_static()
paddle.set_device(device)
places = paddle.CustomPlace("custom_cpu", 0)
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
x = static.data(name="X", shape=[None, 8], dtype=dtype)
x.stop_gradient = False
out = func(x) if use_func else paddle.nn.functional.relu(x)
static.append_backward(out)
exe = static.Executor()
exe.run(static.default_startup_program())
# in static mode, x data has been covered by out
compiled_prog = static.CompiledProgram(
static.default_main_program()
).with_data_parallel(loss_name=out.name, places=places)
out_v = exe.run(
compiled_prog, feed={"X": np_x}, fetch_list=[out.name]
)
paddle.disable_static()
return out_v
def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
import paddle
paddle.set_device(device)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
out = func(t) if use_func else paddle.nn.functional.relu(t)
dx = paddle.grad(
outputs=out,
inputs=t,
grad_outputs=paddle.ones_like(t),
create_graph=True,
retain_graph=True,
)
ddout = paddle.grad(
outputs=dx[0],
inputs=out.grad,
grad_outputs=paddle.ones_like(t),
create_graph=False,
)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
assert ddout[0].numpy() is not None
return dx[0].numpy(), ddout[0].numpy()
class TestNewCustomOpSetUpInstall(unittest.TestCase):
def setUp(self):
# compile so and set to current path
self.cur_dir = os.path.dirname(os.path.abspath(__file__))
self.temp_dir = tempfile.TemporaryDirectory()
cmd = 'cd {} \
&& git clone {} \
&& cd PaddleCustomDevice \
&& git fetch origin \
&& git checkout {} -b dev \
&& cd backends/custom_cpu \
&& mkdir build && cd build && cmake .. && make -j8 \
&& cd {}'.format(
self.temp_dir.name,
os.getenv('PLUGIN_URL'),
os.getenv('PLUGIN_TAG'),
self.cur_dir,
)
os.system(cmd)
# set environment for loading and registering compiled custom kernels
# only valid in current process
os.environ['CUSTOM_DEVICE_ROOT'] = os.path.join(
self.cur_dir,
'{}/PaddleCustomDevice/backends/custom_cpu/build'.format(
self.temp_dir.name
),
)
# `import paddle` loads custom_cpu.so, hence we must import paddle after finishing build PaddleCustomDevice
import paddle
# [Why specific paddle_includes directory?]
# Add paddle_includes to pass CI, for more details,
# please refer to the comments in `paddle/fluid/tests/custom_op/utils.py``
paddle_includes = []
for site_packages_path in getsitepackages():
paddle_includes.append(
os.path.join(site_packages_path, 'paddle', 'include')
)
paddle_includes.append(
os.path.join(
site_packages_path, 'paddle', 'include', 'third_party'
)
)
custom_module = paddle.utils.cpp_extension.load(
name='custom_device_relu',
sources=['custom_relu_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=["-w", "-g"], # test for cc flags
# build_directory=self.cur_dir,
verbose=True,
)
self.custom_op = custom_module.custom_relu
self.dtypes = ["float32", "float64"]
self.device = "custom_cpu"
# config seed
SEED = 2021
paddle.seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
def tearDown(self):
self.temp_dir.cleanup()
del os.environ['CUSTOM_DEVICE_ROOT']
def test_custom_device(self):
self._test_static()
self._test_static_pe()
self._test_dynamic()
self._test_double_grad_dynamic()
self._test_with_dataloader()
def _test_static(self):
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
out = custom_relu_static(self.custom_op, self.device, dtype, x)
pd_out = custom_relu_static(
self.custom_op, self.device, dtype, x, False
)
np.testing.assert_array_equal(
out,
pd_out,
err_msg="custom op out: {},\n paddle api out: {}".format(
out, pd_out
),
)
def _test_static_pe(self):
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
out = custom_relu_static_pe(self.custom_op, self.device, dtype, x)
pd_out = custom_relu_static_pe(
self.custom_op, self.device, dtype, x, False
)
np.testing.assert_array_equal(
out,
pd_out,
err_msg="custom op out: {},\n paddle api out: {}".format(
out, pd_out
),
)
def _test_dynamic(self):
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
out, x_grad = custom_relu_dynamic(
self.custom_op, self.device, dtype, x
)
pd_out, pd_x_grad = custom_relu_dynamic(
self.custom_op, self.device, dtype, x, False
)
np.testing.assert_array_equal(
out,
pd_out,
err_msg="custom op out: {},\n paddle api out: {}".format(
out, pd_out
),
)
np.testing.assert_array_equal(
x_grad,
pd_x_grad,
err_msg="custom op x grad: {},\n paddle api x grad: {}".format(
x_grad, pd_x_grad
),
)
def _test_double_grad_dynamic(self):
for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
out, dx_grad = custom_relu_double_grad_dynamic(
self.custom_op, self.device, dtype, x
)
pd_out, pd_dx_grad = custom_relu_double_grad_dynamic(
self.custom_op, self.device, dtype, x, False
)
np.testing.assert_array_equal(
out,
pd_out,
err_msg="custom op out: {},\n paddle api out: {}".format(
out, pd_out
),
)
np.testing.assert_array_equal(
dx_grad,
pd_dx_grad,
err_msg="custom op dx grad: {},\n paddle api dx grad: {}".format(
dx_grad, pd_dx_grad
),
)
def _test_with_dataloader(self):
import paddle
from paddle.vision.transforms import Compose, Normalize
paddle.set_device(self.device)
# data loader
transform = Compose(
[Normalize(mean=[127.5], std=[127.5], data_format="CHW")]
)
train_dataset = paddle.vision.datasets.MNIST(
mode="train", transform=transform
)
train_loader = paddle.io.DataLoader(
train_dataset,
batch_size=64,
shuffle=True,
drop_last=True,
num_workers=0,
)
for batch_id, (image, _) in enumerate(train_loader()):
out = self.custom_op(image)
pd_out = paddle.nn.functional.relu(image)
np.testing.assert_array_equal(
out,
pd_out,
err_msg="custom op out: {},\n paddle api out: {}".format(
out, pd_out
),
)
if batch_id == 5:
break
if __name__ == "__main__":
if os.name == 'nt' or sys.platform.startswith('darwin'):
# only support Linux now
exit()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册