未验证 提交 aeb33958 编写于 作者: C Chen Weihang 提交者: GitHub

add custom linear backward test (#42740)

上级 5198a497
...@@ -23,6 +23,16 @@ std::vector<paddle::Tensor> PhiLinearForward(const paddle::Tensor& x, ...@@ -23,6 +23,16 @@ std::vector<paddle::Tensor> PhiLinearForward(const paddle::Tensor& x,
return {paddle::add(paddle::matmul(x, weight), bias)}; return {paddle::add(paddle::matmul(x, weight), bias)};
} }
std::vector<paddle::Tensor> PhiLinearBackward(const paddle::Tensor& x,
const paddle::Tensor& weight,
const paddle::Tensor& bias,
const paddle::Tensor& out_grad) {
auto x_grad = paddle::matmul(out_grad, weight, false, true);
auto weight_grad = paddle::matmul(x, out_grad, true, false);
auto bias_grad = paddle::experimental::sum(out_grad, {0});
return {x_grad, weight_grad, bias_grad};
}
std::vector<std::vector<int64_t>> LinearInferShape( std::vector<std::vector<int64_t>> LinearInferShape(
const std::vector<int64_t>& x_shape, const std::vector<int64_t>& x_shape,
const std::vector<int64_t>& weight_shape, const std::vector<int64_t>& weight_shape,
...@@ -86,9 +96,14 @@ std::vector<paddle::DataType> LinearInferDtype( ...@@ -86,9 +96,14 @@ std::vector<paddle::DataType> LinearInferDtype(
return {x_dtype}; return {x_dtype};
} }
PD_BUILD_OP(pten_linear) PD_BUILD_OP(phi_linear)
.Inputs({"X", "Weight", "Bias"}) .Inputs({"X", "Weight", "Bias"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(PhiLinearForward)) .SetKernelFn(PD_KERNEL(PhiLinearForward))
.SetInferShapeFn(PD_INFER_SHAPE(LinearInferShape)) .SetInferShapeFn(PD_INFER_SHAPE(LinearInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(LinearInferDtype)); .SetInferDtypeFn(PD_INFER_DTYPE(LinearInferDtype));
PD_BUILD_GRAD_OP(phi_linear)
.Inputs({"X", "Weight", "Bias", paddle::Grad("Out")})
.Outputs({paddle::Grad("X"), paddle::Grad("Weight"), paddle::Grad("Bias")})
.SetKernelFn(PD_KERNEL(PhiLinearBackward));
...@@ -40,43 +40,56 @@ custom_ops = load( ...@@ -40,43 +40,56 @@ custom_ops = load(
verbose=True) verbose=True)
def linear_dynamic(func, dtype, np_x, np_weight, np_bias): def linear_dynamic(func, device, dtype, np_x, np_weight, np_bias):
paddle.set_device("cpu") paddle.set_device(device)
x = paddle.to_tensor(np_x, dtype=dtype) x = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
weight = paddle.to_tensor(np_weight, dtype=dtype) weight = paddle.to_tensor(np_weight, dtype=dtype, stop_gradient=False)
bias = paddle.to_tensor(np_bias, dtype=dtype) bias = paddle.to_tensor(np_bias, dtype=dtype, stop_gradient=False)
out = func(x, weight, bias) out = func(x, weight, bias)
return out.numpy() out.backward()
return out.numpy(), x.grad.numpy(), weight.grad.numpy(), bias.grad.numpy()
def linear_static(func, dtype, np_x, np_weight, np_bias): def linear_static(func, device, dtype, np_x, np_weight, np_bias):
paddle.enable_static() paddle.enable_static()
paddle.set_device("cpu") paddle.set_device(device)
with static.scope_guard(static.Scope()): with static.scope_guard(static.Scope()):
with static.program_guard(static.Program()): with static.program_guard(static.Program()):
x = static.data(name="x", shape=np_x.shape, dtype=dtype) x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
weight = static.data( weight = static.data(
name="weight", shape=np_weight.shape, dtype=dtype) name="weight", shape=np_weight.shape, dtype=dtype)
bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
x.stop_gradient = False
weight.stop_gradient = False
bias.stop_gradient = False
out = func(x, weight, bias) out = func(x, weight, bias)
mean_out = paddle.mean(out)
static.append_backward(mean_out)
exe = static.Executor() exe = static.Executor()
exe.run(static.default_startup_program()) exe.run(static.default_startup_program())
out_v, = exe.run(static.default_main_program(), out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
feed={ static.default_main_program(),
"x": np_x.astype(dtype), feed={
"weight": np_weight.astype(dtype), "x": np_x.astype(dtype),
"bias": np_bias.astype(dtype) "weight": np_weight.astype(dtype),
}, "bias": np_bias.astype(dtype)
fetch_list=[out.name]) },
fetch_list=[
out.name, x.name + "@GRAD", weight.name + "@GRAD",
bias.name + "@GRAD"
])
paddle.disable_static() paddle.disable_static()
return out_v return out_v, x_grad_v, weight_grad_v, bias_grad_v
class TestCustomLinearJit(unittest.TestCase): class TestCustomLinearJit(unittest.TestCase):
def setUp(self): def setUp(self):
self.dtypes = ['float32', 'float64'] self.dtypes = ['float32', 'float64']
self.devices = ['cpu']
if paddle.is_compiled_with_cuda():
self.devices.append('gpu')
self.np_x = np.random.random((3, 2)).astype("float32") self.np_x = np.random.random((3, 2)).astype("float32")
self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32") self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32")
self.np_bias = np.ones([4], dtype="float32") self.np_bias = np.ones([4], dtype="float32")
...@@ -88,20 +101,34 @@ class TestCustomLinearJit(unittest.TestCase): ...@@ -88,20 +101,34 @@ class TestCustomLinearJit(unittest.TestCase):
pd_out)) pd_out))
def test_static(self): def test_static(self):
for dtype in self.dtypes: for device in self.devices:
pten_out = linear_static(custom_ops.pten_linear, dtype, self.np_x, for dtype in self.dtypes:
self.np_weight, self.np_bias) phi_out, phi_x_grad, phi_weight_grad, phi_bias_grad = linear_static(
pd_out = linear_static(F.linear, dtype, self.np_x, self.np_weight, custom_ops.phi_linear, device, dtype, self.np_x,
self.np_bias) self.np_weight, self.np_bias)
self.check_output(pten_out, pd_out, "pten_out") pd_out, pd_x_grad, pd_weight_grad, pd_bias_grad = linear_static(
F.linear, device, dtype, self.np_x, self.np_weight,
self.np_bias)
self.check_output(phi_out, pd_out, "out")
self.check_output(phi_x_grad, pd_x_grad, "x_grad")
self.check_output(phi_weight_grad, pd_weight_grad,
"weight_grad")
self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
def func_dynamic(self): def func_dynamic(self):
for dtype in self.dtypes: for device in self.devices:
pten_out = linear_dynamic(custom_ops.pten_linear, dtype, self.np_x, for dtype in self.dtypes:
self.np_weight, self.np_bias) phi_out, phi_x_grad, phi_weight_grad, phi_bias_grad = linear_dynamic(
pd_out = linear_dynamic(F.linear, dtype, self.np_x, self.np_weight, custom_ops.phi_linear, device, dtype, self.np_x,
self.np_bias) self.np_weight, self.np_bias)
self.check_output(pten_out, pd_out, "pten_out") pd_out, pd_x_grad, pd_weight_grad, pd_bias_grad = linear_dynamic(
F.linear, device, dtype, self.np_x, self.np_weight,
self.np_bias)
self.check_output(phi_out, pd_out, "phi_out")
self.check_output(phi_x_grad, pd_x_grad, "x_grad")
self.check_output(phi_weight_grad, pd_weight_grad,
"weight_grad")
self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
def test_dynamic(self): def test_dynamic(self):
with _test_eager_guard(): with _test_eager_guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册