提交 b64aac54 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #3857 from qingqing01/grad_test_for_multi_inputs

Enhance the unit test framework to explicitly test whether the operator correctly handles gradients for multiple inputs.
...@@ -286,28 +286,50 @@ class TestMulOp(unittest.TestCase): ...@@ -286,28 +286,50 @@ class TestMulOp(unittest.TestCase):
反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。
``` ```
class MulGradOpTest(GradientChecker): class TestMulGradOp(GradientChecker):
def test_mul(self): def setUp(self):
op = create_op("mul") self.op = create_op("mul")
inputs = { self.inputs = {
'X': np.random.random((32, 84)).astype("float32"), 'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((84, 100)).astype("float32") 'Y': np.random.random((84, 100)).astype("float32")
} }
self.compare_grad(op, inputs)
def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)
def test_normal(self):
# mul op will enlarge the relative error # mul op will enlarge the relative error
self.check_grad( self.check_grad(
op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
```
def test_ignore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})
def test_ignore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
```
下面解释一些关键的地方:
- 调用`create_op("mul")`创建反向Op对应的前向Op。 - 调用`create_op("mul")`创建反向Op对应的前向Op。
- 定义输入`inputs`
- 调用`compare_grad`函数对比CPU、GPU计算结果。 - 调用`compare_grad`函数对比CPU、GPU计算结果。
- 调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。 - `test_normal`调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。
- 第一个参数`op` : 前向op。 - 第一个参数`self.op` : 前向Op。
- 第二个参数`inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 - 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。
- 第三个参数`set(["X", "Y"])` : 指定对输入变量`X``Y`做梯度检测。 - 第三个参数`["X", "Y"]` : 指定对输入变量`X``Y`做梯度检测。
- 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`
- `test_ignore_x``test_ignore_y`分支测试只需要计算一个输入梯度的情况。
### 编译和执行 ### 编译和执行
......
...@@ -75,8 +75,8 @@ class MulOpGrad : public framework::OperatorWithKernel { ...@@ -75,8 +75,8 @@ class MulOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE(y_dims[1] == out_dims[1], PADDLE_ENFORCE(y_dims[1] == out_dims[1],
"Out@GRAD M X N must equal to Y dims 1, N "); "Out@GRAD M X N must equal to Y dims 1, N ");
x_grad->Resize(x_dims); if (x_grad) x_grad->Resize(x_dims);
y_grad->Resize(y_dims); if (y_grad) y_grad->Resize(y_dims);
} }
}; };
......
...@@ -31,13 +31,13 @@ template <typename Place, typename T> ...@@ -31,13 +31,13 @@ template <typename Place, typename T>
class MulKernel : public framework::OpKernel { class MulKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<Tensor>("X"); auto* x = context.Input<Tensor>("X");
auto* Y = context.Input<Tensor>("Y"); auto* y = context.Input<Tensor>("Y");
auto* Z = context.Output<Tensor>("Out"); auto* z = context.Output<Tensor>("Out");
Z->mutable_data<T>(context.GetPlace()); z->mutable_data<T>(context.GetPlace());
auto* device_context = auto* device_context =
const_cast<platform::DeviceContext*>(context.device_context_); const_cast<platform::DeviceContext*>(context.device_context_);
math::matmul<Place, T>(*X, false, *Y, false, 1, Z, 0, device_context); math::matmul<Place, T>(*x, false, *y, false, 1, z, 0, device_context);
} }
}; };
...@@ -45,20 +45,24 @@ template <typename Place, typename T> ...@@ -45,20 +45,24 @@ template <typename Place, typename T>
class MulGradKernel : public framework::OpKernel { class MulGradKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* X = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto* Y = ctx.Input<Tensor>("Y"); auto* y = ctx.Input<Tensor>("Y");
auto* dOut = ctx.Input<Tensor>(framework::GradVarName("Out")); auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dX = ctx.Output<Tensor>(framework::GradVarName("X")); auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dY = ctx.Output<Tensor>(framework::GradVarName("Y")); auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
dX->mutable_data<T>(ctx.GetPlace());
dY->mutable_data<T>(ctx.GetPlace());
auto* device_context = auto* device_context =
const_cast<platform::DeviceContext*>(ctx.device_context_); const_cast<platform::DeviceContext*>(ctx.device_context_);
// dX = dOut * Y'. dX: M x K, dOut : M x N, Y : K x N if (dx) {
math::matmul<Place, T>(*dOut, false, *Y, true, 1, dX, 0, device_context); dx->mutable_data<T>(ctx.GetPlace());
// dY = X' * dOut. dY: K x N, dOut : M x N, X : M x K // dx = dout * y'. dx: M x K, dout : M x N, y : K x N
math::matmul<Place, T>(*X, true, *dOut, false, 1, dY, 0, device_context); math::matmul<Place, T>(*dout, false, *y, true, 1, dx, 0, device_context);
}
if (dy) {
dy->mutable_data<T>(ctx.GetPlace());
// dy = x' * dout. dy K x N, dout : M x N, x : M x K
math::matmul<Place, T>(*x, true, *dout, false, 1, dy, 0, device_context);
}
} }
}; };
......
...@@ -64,8 +64,10 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel { ...@@ -64,8 +64,10 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
auto dims0 = ctx.Input<Tensor>("X")->dims(); auto dims0 = ctx.Input<Tensor>("X")->dims();
auto dims1 = ctx.Input<Tensor>("b")->dims(); auto dims1 = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1") PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1")
ctx.Output<Tensor>(framework::GradVarName("X"))->Resize(dims0); auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
ctx.Output<Tensor>(framework::GradVarName("b"))->Resize(dims1); auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
if (dx) dx->Resize(dims0);
if (db) db->Resize(dims1);
} }
}; };
......
...@@ -51,20 +51,24 @@ template <typename Place, typename T> ...@@ -51,20 +51,24 @@ template <typename Place, typename T>
class RowwiseAddGradKernel : public framework::OpKernel { class RowwiseAddGradKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* dOut = context.Input<Tensor>(framework::GradVarName("Out")); auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dX = context.Output<Tensor>(framework::GradVarName("X")); auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* db = context.Output<Tensor>(framework::GradVarName("b")); auto* db = context.Output<Tensor>(framework::GradVarName("b"));
dX->mutable_data<T>(context.GetPlace());
db->mutable_data<T>(context.GetPlace());
auto OutGrad = EigenMatrix<T>::From(*dOut); auto out_grad = EigenMatrix<T>::From(*dout);
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
EigenMatrix<T>::From(*dX).device(place) = OutGrad; if (dx) {
dx->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*dx).device(place) = out_grad;
}
// https://eigen.tuxfamily.org/dox/unsupported/TensorBase_8h_source.html if (db) {
// colwise add db->mutable_data<T>(context.GetPlace());
Eigen::array<int, 1> dims{{0}}; /* dimension to reduce */ // https://eigen.tuxfamily.org/dox/unsupported/TensorBase_8h_source.html
EigenVector<T>::Flatten(*db).device(place) = OutGrad.sum(dims); // colwise add
Eigen::array<int, 1> dims{{0}}; /* dimension to reduce */
EigenVector<T>::Flatten(*db).device(place) = out_grad.sum(dims);
}
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -286,6 +286,9 @@ class GradientChecker(unittest.TestCase): ...@@ -286,6 +286,9 @@ class GradientChecker(unittest.TestCase):
for no_grad in no_grad_set: for no_grad in no_grad_set:
if no_grad not in in_names: if no_grad not in in_names:
raise ValueError("no_grad should be in in_names") raise ValueError("no_grad should be in in_names")
if no_grad in inputs_to_check:
raise ValueError("no_grad should not be in inputs_to_check")
backward_op = core.Operator.backward(forward_op, no_grad_set) backward_op = core.Operator.backward(forward_op, no_grad_set)
places = [core.CPUPlace()] places = [core.CPUPlace()]
...@@ -301,7 +304,6 @@ class GradientChecker(unittest.TestCase): ...@@ -301,7 +304,6 @@ class GradientChecker(unittest.TestCase):
check_names = [grad_var_name(name) for name in inputs_to_check] check_names = [grad_var_name(name) for name in inputs_to_check]
for place in places: for place in places:
# get analytical gradients according to different device
analytic_grads = self.__get_gradient(forward_op, backward_op, analytic_grads = self.__get_gradient(forward_op, backward_op,
input_vars, check_names, place) input_vars, check_names, place)
self.__assert_is_close(numeric_grads, analytic_grads, check_names, self.__assert_is_close(numeric_grads, analytic_grads, check_names,
......
...@@ -16,16 +16,37 @@ class TestMulOp(unittest.TestCase): ...@@ -16,16 +16,37 @@ class TestMulOp(unittest.TestCase):
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
class MulGradOpTest(GradientChecker): class TestMulGradOp(GradientChecker):
def test_mul(self): def setUp(self):
op = create_op("mul") self.op = create_op("mul")
inputs = { self.inputs = {
'X': np.random.random((32, 84)).astype("float32"), 'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((84, 100)).astype("float32") 'Y': np.random.random((84, 100)).astype("float32")
} }
def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)
def test_normal(self):
# mul op will enlarge the relative error # mul op will enlarge the relative error
self.check_grad( self.check_grad(
op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
def test_ignore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})
def test_ignore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
# TODO(dzh,qijun) : mulgrad test case need transpose feature of blas library # TODO(dzh,qijun) : mulgrad test case need transpose feature of blas library
......
...@@ -16,14 +16,22 @@ class TestRowwiseAddOp(unittest.TestCase): ...@@ -16,14 +16,22 @@ class TestRowwiseAddOp(unittest.TestCase):
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])} self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
class RowwiseAddGradOpTest(GradientChecker): class TestRowwiseAddGradOp(GradientChecker):
def test_rowwise_add(self): def setUp(self):
op = create_op("rowwise_add") self.op = create_op("rowwise_add")
inputs = { self.inputs = {
"X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"), "X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"),
"b": np.random.uniform(0.1, 1, [10]).astype("float32") "b": np.random.uniform(0.1, 1, [10]).astype("float32")
} }
self.check_grad(op, inputs, set(["X", "b"]), "Out")
def test_normal(self):
self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
def test_ignore_b(self):
self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
def test_ignore_x(self):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册