未验证 提交 aeb33958 编写于 作者: C Chen Weihang 提交者: GitHub

add custom linear backward test (#42740)

上级 5198a497
......@@ -23,6 +23,16 @@ std::vector<paddle::Tensor> PhiLinearForward(const paddle::Tensor& x,
return {paddle::add(paddle::matmul(x, weight), bias)};
}
std::vector<paddle::Tensor> PhiLinearBackward(const paddle::Tensor& x,
const paddle::Tensor& weight,
const paddle::Tensor& bias,
const paddle::Tensor& out_grad) {
auto x_grad = paddle::matmul(out_grad, weight, false, true);
auto weight_grad = paddle::matmul(x, out_grad, true, false);
auto bias_grad = paddle::experimental::sum(out_grad, {0});
return {x_grad, weight_grad, bias_grad};
}
std::vector<std::vector<int64_t>> LinearInferShape(
const std::vector<int64_t>& x_shape,
const std::vector<int64_t>& weight_shape,
......@@ -86,9 +96,14 @@ std::vector<paddle::DataType> LinearInferDtype(
return {x_dtype};
}
PD_BUILD_OP(pten_linear)
PD_BUILD_OP(phi_linear)
.Inputs({"X", "Weight", "Bias"})
.Outputs({"Out"})
.SetKernelFn(PD_KERNEL(PhiLinearForward))
.SetInferShapeFn(PD_INFER_SHAPE(LinearInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(LinearInferDtype));
PD_BUILD_GRAD_OP(phi_linear)
.Inputs({"X", "Weight", "Bias", paddle::Grad("Out")})
.Outputs({paddle::Grad("X"), paddle::Grad("Weight"), paddle::Grad("Bias")})
.SetKernelFn(PD_KERNEL(PhiLinearBackward));
......@@ -40,43 +40,56 @@ custom_ops = load(
verbose=True)
def linear_dynamic(func, dtype, np_x, np_weight, np_bias):
paddle.set_device("cpu")
x = paddle.to_tensor(np_x, dtype=dtype)
weight = paddle.to_tensor(np_weight, dtype=dtype)
bias = paddle.to_tensor(np_bias, dtype=dtype)
def linear_dynamic(func, device, dtype, np_x, np_weight, np_bias):
paddle.set_device(device)
x = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
weight = paddle.to_tensor(np_weight, dtype=dtype, stop_gradient=False)
bias = paddle.to_tensor(np_bias, dtype=dtype, stop_gradient=False)
out = func(x, weight, bias)
return out.numpy()
out.backward()
return out.numpy(), x.grad.numpy(), weight.grad.numpy(), bias.grad.numpy()
def linear_static(func, dtype, np_x, np_weight, np_bias):
def linear_static(func, device, dtype, np_x, np_weight, np_bias):
paddle.enable_static()
paddle.set_device("cpu")
paddle.set_device(device)
with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()):
x = static.data(name="x", shape=np_x.shape, dtype=dtype)
x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
weight = static.data(
name="weight", shape=np_weight.shape, dtype=dtype)
bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
x.stop_gradient = False
weight.stop_gradient = False
bias.stop_gradient = False
out = func(x, weight, bias)
mean_out = paddle.mean(out)
static.append_backward(mean_out)
exe = static.Executor()
exe.run(static.default_startup_program())
out_v, = exe.run(static.default_main_program(),
out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
static.default_main_program(),
feed={
"x": np_x.astype(dtype),
"weight": np_weight.astype(dtype),
"bias": np_bias.astype(dtype)
},
fetch_list=[out.name])
fetch_list=[
out.name, x.name + "@GRAD", weight.name + "@GRAD",
bias.name + "@GRAD"
])
paddle.disable_static()
return out_v
return out_v, x_grad_v, weight_grad_v, bias_grad_v
class TestCustomLinearJit(unittest.TestCase):
def setUp(self):
self.dtypes = ['float32', 'float64']
self.devices = ['cpu']
if paddle.is_compiled_with_cuda():
self.devices.append('gpu')
self.np_x = np.random.random((3, 2)).astype("float32")
self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32")
self.np_bias = np.ones([4], dtype="float32")
......@@ -88,20 +101,34 @@ class TestCustomLinearJit(unittest.TestCase):
pd_out))
def test_static(self):
for device in self.devices:
for dtype in self.dtypes:
pten_out = linear_static(custom_ops.pten_linear, dtype, self.np_x,
phi_out, phi_x_grad, phi_weight_grad, phi_bias_grad = linear_static(
custom_ops.phi_linear, device, dtype, self.np_x,
self.np_weight, self.np_bias)
pd_out = linear_static(F.linear, dtype, self.np_x, self.np_weight,
pd_out, pd_x_grad, pd_weight_grad, pd_bias_grad = linear_static(
F.linear, device, dtype, self.np_x, self.np_weight,
self.np_bias)
self.check_output(pten_out, pd_out, "pten_out")
self.check_output(phi_out, pd_out, "out")
self.check_output(phi_x_grad, pd_x_grad, "x_grad")
self.check_output(phi_weight_grad, pd_weight_grad,
"weight_grad")
self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
def func_dynamic(self):
for device in self.devices:
for dtype in self.dtypes:
pten_out = linear_dynamic(custom_ops.pten_linear, dtype, self.np_x,
phi_out, phi_x_grad, phi_weight_grad, phi_bias_grad = linear_dynamic(
custom_ops.phi_linear, device, dtype, self.np_x,
self.np_weight, self.np_bias)
pd_out = linear_dynamic(F.linear, dtype, self.np_x, self.np_weight,
pd_out, pd_x_grad, pd_weight_grad, pd_bias_grad = linear_dynamic(
F.linear, device, dtype, self.np_x, self.np_weight,
self.np_bias)
self.check_output(pten_out, pd_out, "pten_out")
self.check_output(phi_out, pd_out, "phi_out")
self.check_output(phi_x_grad, pd_x_grad, "x_grad")
self.check_output(phi_weight_grad, pd_weight_grad,
"weight_grad")
self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
def test_dynamic(self):
with _test_eager_guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册