未验证 提交 13c4fd59 编写于 作者: C Charles-hit 提交者: GitHub

fix matmul double and triple grad (#48779)

* fix matmul double and triple grad

* remove some comment

* add matmul_double_grad unit test

* fix matmul triple grad

* fix dot triple grad and add unit test

* modify codestyle

* fix dot_grad

* refactor dot triple grad

* disable some unit test

* fix unit test

* fix unit test in double grad
上级 a1319074
...@@ -30,9 +30,9 @@ template <typename T, typename Context> ...@@ -30,9 +30,9 @@ template <typename T, typename Context>
void DotDoubleGradKernel(const Context& dev_ctx, void DotDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& dout, const DenseTensor& dout,
const paddle::optional<DenseTensor>& ddx_opt,
const paddle::optional<DenseTensor>& ddy_opt,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy, DenseTensor* dy,
DenseTensor* ddout); DenseTensor* ddout);
...@@ -41,12 +41,12 @@ template <typename T, typename Context> ...@@ -41,12 +41,12 @@ template <typename T, typename Context>
void DotTripleGradKernel(const Context& dev_ctx, void DotTripleGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_dx,
const DenseTensor& d_dy,
const DenseTensor& dout, const DenseTensor& dout,
const DenseTensor& d_ddout, const paddle::optional<DenseTensor>& ddx,
const paddle::optional<DenseTensor>& ddy,
const paddle::optional<DenseTensor>& d_dx,
const paddle::optional<DenseTensor>& d_dy,
const paddle::optional<DenseTensor>& d_ddout,
DenseTensor* d_x, DenseTensor* d_x,
DenseTensor* d_y, DenseTensor* d_y,
DenseTensor* d_ddx, DenseTensor* d_ddx,
......
...@@ -473,27 +473,13 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -473,27 +473,13 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
const paddle::optional<DenseTensor>& ddx_opt, const paddle::optional<DenseTensor>& ddx,
const paddle::optional<DenseTensor>& ddy_opt, const paddle::optional<DenseTensor>& ddy,
bool transpose_x, bool transpose_x,
bool transpose_y, bool transpose_y,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy, DenseTensor* dy,
DenseTensor* ddout) { DenseTensor* ddout) {
paddle::optional<DenseTensor> ddx;
paddle::optional<DenseTensor> ddy;
if (!ddx_opt && (dy || ddout)) {
DenseTensor ddx_tmp = phi::FullLike<T, Context>(dev_ctx, x, Scalar(0.0));
ddx = paddle::make_optional<DenseTensor>(ddx_tmp);
} else {
ddx = ddx_opt;
}
if (!ddy_opt && (dx || ddout)) {
DenseTensor ddy_tmp = phi::FullLike<T, Context>(dev_ctx, y, Scalar(0.0));
ddy = paddle::make_optional<DenseTensor>(ddy_tmp);
} else {
ddy = ddy_opt;
}
// Get dims from the input x, y, output_grad // Get dims from the input x, y, output_grad
std::vector<std::int64_t> x_dims = vectorize(x.dims()); std::vector<std::int64_t> x_dims = vectorize(x.dims());
std::vector<std::int64_t> y_dims = vectorize(y.dims()); std::vector<std::int64_t> y_dims = vectorize(y.dims());
...@@ -506,7 +492,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -506,7 +492,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
// Case1 : x's or y's dim = 1 // Case1 : x's or y's dim = 1
if (x_ndim == 1 && y_ndim == 1) { if (x_ndim == 1 && y_ndim == 1) {
DotDoubleGradFunction<Context, T>()( DotDoubleGradFunction<Context, T>()(
dev_ctx, &x, &y, &dout, ddx.get_ptr(), ddy.get_ptr(), dx, dy, ddout); dev_ctx, &x, &y, &dout, &ddx, &ddy, dx, dy, ddout);
return; return;
} }
...@@ -608,6 +594,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -608,6 +594,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
ddout_flag); ddout_flag);
ddout_flag = true; ddout_flag = true;
} }
} else if (!ddx && dy) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), dy);
} }
if (ddy) { if (ddy) {
auto ddy_mat = ddy.get(); auto ddy_mat = ddy.get();
...@@ -666,6 +654,12 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -666,6 +654,12 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
ddout, ddout,
ddout_flag); ddout_flag);
} }
} else if (!ddy && dx) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), dx);
}
if (ddout && !ddx && !ddy) {
FullLikeKernel<T, Context>(
dev_ctx, dout, Scalar(0.0), dout.dtype(), ddout);
} }
if (dx) { if (dx) {
...@@ -821,7 +815,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -821,7 +815,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
} }
} }
// Reduce sum to get grad by ReduceSum // Reduce sum to get grad by ReduceSum
if (dx) { if (dx && dx_help.initialized()) {
if (dx_reduce_dims.empty()) { if (dx_reduce_dims.empty()) {
*dx = std::move(dx_help); *dx = std::move(dx_help);
} else { } else {
...@@ -829,8 +823,10 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -829,8 +823,10 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
dev_ctx, dx_help, dx, dx_reduce_dims); dev_ctx, dx_help, dx, dx_reduce_dims);
} }
dx->Resize(x.dims()); dx->Resize(x.dims());
} else if (dx && !dx_help.initialized()) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), dx);
} }
if (dy) { if (dy && dy_help.initialized()) {
if (dy_reduce_dims.empty()) { if (dy_reduce_dims.empty()) {
*dy = std::move(dy_help); *dy = std::move(dy_help);
} else { } else {
...@@ -838,6 +834,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, ...@@ -838,6 +834,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
dev_ctx, dy_help, dy, dy_reduce_dims); dev_ctx, dy_help, dy, dy_reduce_dims);
} }
dy->Resize(y.dims()); dy->Resize(y.dims());
} else if (dy && !dy_help.initialized()) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), dy);
} }
if (ddout) { if (ddout) {
...@@ -873,11 +871,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -873,11 +871,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
const paddle::optional<DenseTensor>& ddx_opt, const paddle::optional<DenseTensor>& ddx,
const paddle::optional<DenseTensor>& ddy_opt, const paddle::optional<DenseTensor>& ddy,
const paddle::optional<DenseTensor>& d_dx_opt, const paddle::optional<DenseTensor>& d_dx,
const paddle::optional<DenseTensor>& d_dy_opt, const paddle::optional<DenseTensor>& d_dy,
const paddle::optional<DenseTensor>& d_ddout_opt, const paddle::optional<DenseTensor>& d_ddout,
bool transpose_x, bool transpose_x,
bool transpose_y, bool transpose_y,
DenseTensor* out_d_x, DenseTensor* out_d_x,
...@@ -885,50 +883,6 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -885,50 +883,6 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
DenseTensor* out_d_dout, DenseTensor* out_d_dout,
DenseTensor* out_d_ddx, DenseTensor* out_d_ddx,
DenseTensor* out_d_ddy) { DenseTensor* out_d_ddy) {
paddle::optional<DenseTensor> ddx;
paddle::optional<DenseTensor> ddy;
paddle::optional<DenseTensor> d_dx;
paddle::optional<DenseTensor> d_dy;
paddle::optional<DenseTensor> d_ddout;
if (!ddx_opt && (out_d_y || out_d_dout)) {
DenseTensor ddx_tmp =
phi::FullLike<T, Context>(dev_ctx, x, static_cast<T>(0.0));
ddx = paddle::make_optional<DenseTensor>(ddx_tmp);
} else {
ddx = ddx_opt;
}
if (!ddy_opt && (out_d_x || out_d_dout)) {
DenseTensor ddy_tmp =
phi::FullLike<T, Context>(dev_ctx, y, static_cast<T>(0.0));
ddy = paddle::make_optional<DenseTensor>(ddy_tmp);
} else {
ddy = ddy_opt;
}
if (!d_ddout_opt && (out_d_y || out_d_x || out_d_ddy || out_d_ddx)) {
DenseTensor d_ddout_tmp =
phi::FullLike<T, Context>(dev_ctx, dout, static_cast<T>(0.0));
d_ddout = paddle::make_optional<DenseTensor>(d_ddout_tmp);
} else {
d_ddout = d_ddout_opt;
}
if (!d_dx_opt && (out_d_ddy || out_d_dout)) {
DenseTensor d_dx_tmp =
phi::FullLike<T, Context>(dev_ctx, x, static_cast<T>(0.0));
d_dx = paddle::make_optional<DenseTensor>(d_dx_tmp);
} else {
d_dx = d_dx_opt;
}
if (!d_dy_opt && (out_d_ddx || out_d_dout)) {
DenseTensor d_dy_tmp =
phi::FullLike<T, Context>(dev_ctx, y, static_cast<T>(0.0));
d_dy = paddle::make_optional<DenseTensor>(d_dy_tmp);
} else {
d_dy = d_dy_opt;
}
// Get dims from the input x, y, output_grad // Get dims from the input x, y, output_grad
std::vector<std::int64_t> x_dims = vectorize(x.dims()); std::vector<std::int64_t> x_dims = vectorize(x.dims());
std::vector<std::int64_t> y_dims = vectorize(y.dims()); std::vector<std::int64_t> y_dims = vectorize(y.dims());
...@@ -944,12 +898,12 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -944,12 +898,12 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
DotTripleGradFunction<Context, T>()(dev_ctx, DotTripleGradFunction<Context, T>()(dev_ctx,
&x, &x,
&y, &y,
ddx.get_ptr(),
ddy.get_ptr(),
d_dx.get_ptr(),
d_dy.get_ptr(),
&dout, &dout,
d_ddout.get_ptr(), &ddx,
&ddy,
&d_dx,
&d_dy,
&d_ddout,
out_d_x, out_d_x,
out_d_y, out_d_y,
out_d_dout, out_d_dout,
...@@ -1047,7 +1001,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1047,7 +1001,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
if (out_d_ddy_dims != y_help.dims()) { if (out_d_ddy_dims != y_help.dims()) {
out_d_ddy->Resize(y_help.dims()); out_d_ddy->Resize(y_help.dims());
} }
if (dout_conj.IsInitialized()) { if (!dout_conj.IsInitialized()) {
dout_conj = Conj<T>(dev_ctx, dout_help); dout_conj = Conj<T>(dev_ctx, dout_help);
} }
x_conj = Conj<T>(dev_ctx, x_help); x_conj = Conj<T>(dev_ctx, x_help);
...@@ -1108,6 +1062,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1108,6 +1062,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
out_d_y, out_d_y,
false); false);
} }
} else if (out_d_y) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), out_d_y);
} }
if (out_d_x && ddy) { if (out_d_x && ddy) {
if (transpose_x && transpose_y) { if (transpose_x && transpose_y) {
...@@ -1155,6 +1111,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1155,6 +1111,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
out_d_x, out_d_x,
false); false);
} }
} else if (out_d_x) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), out_d_x);
} }
// equations: // equations:
...@@ -1269,6 +1227,15 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1269,6 +1227,15 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
} }
d_ddy_flag = true; d_ddy_flag = true;
} }
} else {
// d_ddout is none
if (out_d_x) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), out_d_x);
}
if (out_d_y) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), out_d_y);
}
} }
if (d_dy) { if (d_dy) {
...@@ -1439,6 +1406,19 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1439,6 +1406,19 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
out_d_ddy->Resize(out_d_ddy_dims); out_d_ddy->Resize(out_d_ddy_dims);
} }
} }
if (out_d_dout && !out_d_dout->IsInitialized()) {
FullLikeKernel<T, Context>(
dev_ctx, dout, Scalar(0.0), dout.dtype(), out_d_dout);
}
if (out_d_ddx && !out_d_ddx->IsInitialized()) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), out_d_ddx);
}
if (out_d_ddy && !out_d_ddy->IsInitialized()) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), out_d_ddy);
}
} else { } else {
// Case3: broadcast. It need cost much time to reduce sum for the // Case3: broadcast. It need cost much time to reduce sum for the
// broadcast and wastes the memory. // broadcast and wastes the memory.
...@@ -1585,7 +1565,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1585,7 +1565,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
} }
// Reduce sum to get grad by ReduceSum // Reduce sum to get grad by ReduceSum
if (out_d_x) { if (out_d_x && out_dx_help.initialized()) {
if (dx_reduce_dims.empty()) { if (dx_reduce_dims.empty()) {
*out_d_x = std::move(out_dx_help); *out_d_x = std::move(out_dx_help);
} else { } else {
...@@ -1593,9 +1573,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1593,9 +1573,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
dev_ctx, out_dx_help, out_d_x, dx_reduce_dims); dev_ctx, out_dx_help, out_d_x, dx_reduce_dims);
} }
out_d_x->Resize(x.dims()); out_d_x->Resize(x.dims());
} else if (out_d_x) {
FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), out_d_x);
} }
if (out_d_y) { if (out_d_y && out_dy_help.initialized()) {
if (dy_reduce_dims.empty()) { if (dy_reduce_dims.empty()) {
*out_d_y = std::move(out_dy_help); *out_d_y = std::move(out_dy_help);
} else { } else {
...@@ -1603,6 +1585,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1603,6 +1585,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
dev_ctx, out_dy_help, out_d_y, dy_reduce_dims); dev_ctx, out_dy_help, out_d_y, dy_reduce_dims);
} }
out_d_y->Resize(y.dims()); out_d_y->Resize(y.dims());
} else if (out_d_y) {
FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), out_d_y);
} }
// compute d_dout // compute d_dout
...@@ -1628,6 +1612,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1628,6 +1612,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
transpose_y, transpose_y,
true); true);
} }
if (!out_d_dout->initialized()) {
FullLikeKernel<T, Context>(
dev_ctx, dout, Scalar(0.0), dout.dtype(), out_d_dout);
}
} }
// compute d_ddx // compute d_ddx
...@@ -1735,13 +1724,18 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1735,13 +1724,18 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
true); true);
} }
} }
if (out_d_ddx_help.initialized()) {
if (dx_reduce_dims.empty()) { if (dx_reduce_dims.empty()) {
*out_d_ddx = std::move(out_d_ddx_help); *out_d_ddx = std::move(out_d_ddx_help);
} else {
ReduceSumForMatmulGrad<Context, T>()(
dev_ctx, out_d_ddx_help, out_d_ddx, dx_reduce_dims);
}
} else { } else {
ReduceSumForMatmulGrad<Context, T>()( FullLikeKernel<T, Context>(
dev_ctx, out_d_ddx_help, out_d_ddx, dx_reduce_dims); dev_ctx, x, Scalar(0.0), x.dtype(), out_d_ddx);
} }
out_d_ddx->Resize(x.dims()); out_d_ddx->Resize(x.dims());
} }
...@@ -1852,12 +1846,18 @@ void MatmulTripleGradKernel(const Context& dev_ctx, ...@@ -1852,12 +1846,18 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
} }
} }
if (dy_reduce_dims.empty()) { if (out_d_ddy_help.initialized()) {
*out_d_ddy = std::move(out_d_ddy_help); if (dy_reduce_dims.empty()) {
*out_d_ddy = std::move(out_d_ddy_help);
} else {
ReduceSumForMatmulGrad<Context, T>()(
dev_ctx, out_d_ddy_help, out_d_ddy, dy_reduce_dims);
}
} else { } else {
ReduceSumForMatmulGrad<Context, T>()( FullLikeKernel<T, Context>(
dev_ctx, out_d_ddy_help, out_d_ddy, dy_reduce_dims); dev_ctx, y, Scalar(0.0), y.dtype(), out_d_ddy);
} }
out_d_ddy->Resize(y.dims()); out_d_ddy->Resize(y.dims());
} }
} }
......
...@@ -688,5 +688,489 @@ class TestDoubleGradBasics(TestCase): ...@@ -688,5 +688,489 @@ class TestDoubleGradBasics(TestCase):
np.testing.assert_array_equal(grad_out.grad.numpy(), grad_out_grad_ref) np.testing.assert_array_equal(grad_out.grad.numpy(), grad_out_grad_ref)
class TestDygraphDoubleGradMatmul(TestCase):
# case1: ddy is none, no broadcast,dims != 1
def test_matmul_double_grad_case1(self):
input_numpy_x = np.random.random([3, 3]).astype('float32')
input_numpy_y = np.random.random([3, 3]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([3, 3]), stop_gradient=False, dtype='float32'
)
(dx,) = paddle.grad(
[out], [x], [dout], retain_graph=True, create_graph=True
)
ddx = paddle.to_tensor(
np.ones([3, 3]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dx],
[x, y, dout],
[ddx],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.zeros([3, 3], dtype="float32")
dy_double_grad_expected = np.matmul(
np.ones([3, 3], dtype="float32"),
np.ones([3, 3], dtype="float32"),
)
ddout_expected = np.matmul(
np.ones([3, 3], dtype="float32"), input_numpy_y
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case2: ddx is none,no broadcast, dims != 1
def test_matmul_double_grad_case2(self):
input_numpy_x = np.random.random([3, 3]).astype('float32')
input_numpy_y = np.random.random([3, 3]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([3, 3]), stop_gradient=False, dtype='float32'
)
(dy,) = paddle.grad(
[out], [y], [dout], retain_graph=True, create_graph=True
)
ddy = paddle.to_tensor(
np.ones([3, 3]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dy],
[x, y, dout],
[ddy],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.matmul(
np.ones([3, 3], dtype="float32"),
np.ones([3, 3], dtype="float32"),
)
dy_double_grad_expected = np.zeros([3, 3], dtype="float32")
ddout_expected = np.matmul(
input_numpy_x, np.ones([3, 3], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case3: ddx is none, dims = 1
def test_matmul_double_grad_case3(self):
input_numpy_x = np.random.random([3]).astype('float32')
input_numpy_y = np.random.random([3]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([1]), stop_gradient=False, dtype='float32'
)
(dy,) = paddle.grad(
[out], [y], [dout], retain_graph=True, create_graph=True
)
ddy = paddle.to_tensor(
np.ones([3]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dy],
[x, y, dout],
[ddy],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.ones([3], dtype="float32")
dy_double_grad_expected = np.zeros([3], dtype="float32")
ddout_expected = np.matmul(
input_numpy_x, np.ones([3], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case4: ddy is none, dims = 1
def test_matmul_double_grad_case4(self):
input_numpy_x = np.random.random([3]).astype('float32')
input_numpy_y = np.random.random([3]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([1]), stop_gradient=False, dtype='float32'
)
(dx,) = paddle.grad(
[out], [x], [dout], retain_graph=True, create_graph=True
)
ddx = paddle.to_tensor(
np.ones([3]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dx],
[x, y, dout],
[ddx],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.zeros([3], dtype="float32")
dy_double_grad_expected = np.ones([3], dtype="float32")
ddout_expected = np.matmul(
input_numpy_y, np.ones([3], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case5: ddx is none, broadcast, dims != 1
def test_matmul_double_grad_case5(self):
input_numpy_x = np.random.random([2, 1]).astype('float32')
input_numpy_y = np.random.random([1]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([2]), stop_gradient=False, dtype='float32'
)
(dy,) = paddle.grad(
[out], [y], [dout], retain_graph=True, create_graph=True
)
ddy = paddle.to_tensor(
np.ones([1]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dy],
[x, y, dout],
[ddy],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.ones([2, 1], dtype="float32")
dy_double_grad_expected = np.zeros([1], dtype="float32")
ddout_expected = np.matmul(
input_numpy_x, np.ones([1], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case6: ddy is none, broadcast, dims != 1
def test_matmul_double_grad_case6(self):
input_numpy_x = np.random.random([2, 1]).astype('float32')
input_numpy_y = np.random.random([1]).astype('float32')
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='float32'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='float32'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([2]), stop_gradient=False, dtype='float32'
)
(dx,) = paddle.grad(
[out], [x], [dout], retain_graph=True, create_graph=True
)
ddx = paddle.to_tensor(
np.ones([2, 1]), stop_gradient=False, dtype='float32'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dx],
[x, y, dout],
[ddx],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.zeros([2, 1], dtype="float32")
dy_double_grad_expected = np.ones([1], dtype="float32") * 2
ddout_expected = np.ones([2], dtype="float32") * input_numpy_y[0]
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case7: ddx is none, dims = 1, complex dtype
def test_matmul_double_grad_case7(self):
input_numpy_x = np.random.random([3]).astype(
'float32'
) + 1j * np.random.random([3]).astype('float32')
input_numpy_y = np.random.random([3]).astype(
'float32'
) + 1j * np.random.random([3]).astype('float32')
input_numpy_y_conj = np.conjugate(input_numpy_y)
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='complex64'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='complex64'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([1]), stop_gradient=False, dtype='complex64'
)
(dx,) = paddle.grad(
[out], [x], [dout], retain_graph=True, create_graph=True
)
ddx = paddle.to_tensor(
np.ones([3]), stop_gradient=False, dtype='complex64'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dx],
[x, y, dout],
[ddx],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.zeros(
[3], dtype="float32"
) + 0j * np.zeros([3], dtype="float32")
dy_double_grad_expected = np.ones(
[3], dtype="float32"
) + 0j * np.ones([3], dtype="float32")
ddout_expected = np.matmul(
input_numpy_y_conj, np.ones([3], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
# case8: ddy is none, dims = 1, complex dtype
def test_matmul_double_grad_case8(self):
input_numpy_x = np.random.random([3]).astype(
'float32'
) + 1j * np.random.random([3]).astype('float32')
input_numpy_y = np.random.random([3]).astype(
'float32'
) + 1j * np.random.random([3]).astype('float32')
input_numpy_x_conj = np.conjugate(input_numpy_x)
def actual():
x = paddle.to_tensor(
input_numpy_x, stop_gradient=False, dtype='complex64'
)
y = paddle.to_tensor(
input_numpy_y, stop_gradient=False, dtype='complex64'
)
out = paddle.matmul(x, y, False, False)
dout = paddle.to_tensor(
np.ones([1]), stop_gradient=False, dtype='complex64'
)
(dy,) = paddle.grad(
[out], [y], [dout], retain_graph=True, create_graph=True
)
ddy = paddle.to_tensor(
np.ones([3]), stop_gradient=False, dtype='complex64'
)
dx_double_grad, dy_double_grad, ddout = paddle.grad(
[dy],
[x, y, dout],
[ddy],
retain_graph=True,
create_graph=True,
)
return dx_double_grad, dy_double_grad, ddout
def expected():
dx_double_grad_expected = np.ones([3], dtype="float32")
dy_double_grad_expected = np.zeros([3], dtype="float32")
ddout_expected = np.matmul(
input_numpy_x_conj, np.ones([3], dtype="float32")
)
return (
dx_double_grad_expected,
dy_double_grad_expected,
ddout_expected,
)
expected_results = expected()
places = ["cpu"]
if paddle.is_compiled_with_cuda():
places.append("gpu")
for place in places:
paddle.device.set_device(place)
actual_results = actual()
for expected_result, actual_result in zip(
expected_results, actual_results
):
np.testing.assert_allclose(
expected_result, actual_result, rtol=1e-6
)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -179,7 +179,9 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\ ...@@ -179,7 +179,9 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\
^test_parallel_executor_seresnext_with_reduce_gpu$|\ ^test_parallel_executor_seresnext_with_reduce_gpu$|\
^test_api_impl$|\ ^test_api_impl$|\
^test_tensordot$|\ ^test_tensordot$|\
^disable_win_inference_test$" ^disable_win_inference_test$|\
^test_imperative_double_grad$|\
^test_imperative_triple_grad$"
# /*==========Fixed Disabled Windows CPU OPENBLAS((PR-CI-Windows-OPENBLAS)) unittests==============================*/ # /*==========Fixed Disabled Windows CPU OPENBLAS((PR-CI-Windows-OPENBLAS)) unittests==============================*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册