【论文复现】部分op不可计算二阶导数
Created by: leeacord
- 版本、环境信息: 1)PaddlePaddle版本:1.8.4post97 2)CPU:aistudio 1.8.0 py37 3)GPU:aistudio 1.8.0 py37 4)系统环境:aistudio 1.8.0 py37
- 复现代码:
import paddle
import numpy as np
from paddle import fluid
def warp_coordinates(coordinates_in, tps=False):
theta = paddle.fluid.dygraph.to_variable(np.ones((5, 2, 3)).astype(np.float32))
theta = fluid.layers.unsqueeze(theta, 1)
coordinates = fluid.layers.unsqueeze(coordinates_in, -1)
theta_parta = theta[:, :, :, :2]
print(theta_parta.shape, '@', coordinates.shape, '+', theta[:, :, :, 2:].shape)
transformed = fluid.layers.matmul(*broadcast_v1(theta_parta, coordinates)) + theta[:, :, :, 2:]
print('=', transformed.shape)
transformed = fluid.layers.squeeze(transformed, [-1])
if tps:
control_points = paddle.fluid.dygraph.to_variable(np.ones([1, 5, 5, 2]).astype(np.float32)*3)
control_params = paddle.fluid.dygraph.to_variable(np.ones([5, 1, 25]).astype(np.float32)*4)
_a = fluid.layers.reshape(coordinates, (coordinates.shape[0], -1, 1, 2))
distances_0 = _a - fluid.layers.reshape(control_points, (1, 1, -1, 2))
distances = fluid.layers.reduce_sum(fluid.layers.abs(distances_0), -1)
result = distances ** 2
result = result * fluid.layers.log(distances + 1e-6)
print(result.shape, '*', control_params.shape)
result = result * control_params
print('=', result.shape)
result = fluid.layers.reshape(fluid.layers.reduce_sum(result, 2), (5, coordinates.shape[1], 1))
print(transformed.shape, '+', result.shape)
transformed = transformed + result
print('=', transformed.shape)
return transformed
def broadcast_v1(x, y):
"""
Broadcast before matmul
"""
if len(x.shape) != len(y.shape):
raise ValueError()
# pdb.set_trace()
*dim_x, _, _ = x.shape
*dim_y, _, _ = y.shape
max_shape = np.max(np.stack([dim_x, dim_y], axis=0), axis=0)
if np.count_nonzero(max_shape%np.array(dim_x)) != 0 or np.count_nonzero(max_shape%np.array(dim_y)) != 0:
raise ValueError()
x_bc = fluid.layers.expand(x, (*((max_shape/np.array(dim_x)).astype(np.int32).tolist()), 1, 1)).astype('float32')
y_bc = fluid.layers.expand(y, (*((max_shape/np.array(dim_y)).astype(np.int32).tolist()), 1, 1)).astype('float32')
return x_bc, y_bc
with paddle.fluid.dygraph.guard():
a = paddle.fluid.dygraph.to_variable(np.ones((1, 65536, 2)).astype(np.float32)*2)
a.stop_gradient = False
rs = warp_coordinates(a, True)
res = fluid.layers.reduce_sum(rs[:, :, 0])
try:
dd = paddle.fluid.dygraph.grad(res, a, create_graph=True)
print('Grad Pass')
except Exception as e:
print(e)
print('Grad Not Pass')
- 错误信息:
W0826 00:53:01.437263 1833 device_context.cc:252] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 9.2, Runtime API Version: 9.0
W0826 00:53:01.441725 1833 device_context.cc:260] device: 0, cuDNN Version: 7.6.
[5, 1, 2, 2] @ [1, 65536, 2, 1] + [5, 1, 2, 1]
= [5, 65536, 2, 1]
[1, 65536, 25] * [5, 1, 25]
= [5, 65536, 25]
[5, 65536, 2] + [5, 65536, 1]
= [5, 65536, 2]
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int)
2 paddle::imperative::PartialGradTask::RunEachOp(paddle::imperative::OpBase*)
3 paddle::imperative::PartialGradTask::Run()
4 paddle::imperative::PartialGradEngine::Execute()
----------------------
Error Message Summary:
----------------------
NotFoundError: The Op reduce_sum_grad doesn't have any grad op. If you don't intend calculating higher order derivatives, please set `create_graph` to False.
[Hint: double_grad_node should not be null.] at (/paddle/paddle/fluid/imperative/partial_grad_engine.cc:894)
Grad Not Pass
- po上原始版本的实现
def warp_coordinates(self, coordinates):
theta = self.theta.type(coordinates.type())
theta = theta.unsqueeze(1)
transformed = torch.matmul(theta[:, :, :, :2], coordinates.unsqueeze(-1)) + theta[:, :, :, 2:]
transformed = transformed.squeeze(-1)
if self.tps:
control_points = self.control_points.type(coordinates.type())
control_params = self.control_params.type(coordinates.type())
distances = coordinates.view(coordinates.shape[0], -1, 1, 2) - control_points.view(1, 1, -1, 2)
distances = torch.abs(distances).sum(-1)
result = distances ** 2
result = result * torch.log(distances + 1e-6)
result = result * control_params
result = result.sum(dim=2).view(self.bs, coordinates.shape[1], 1)
transformed = transformed + result
new_coordinates = warp_coordinates(coordinates)
grad_x = grad(new_coordinates[..., 0].sum(), coordinates, create_graph=True)
grad_y = grad(new_coordinates[..., 1].sum(), coordinates, create_graph=True)