From f07164912bca60a36a72dc6ce22f8e00caa99301 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Wed, 3 Jan 2018 20:00:07 +0800 Subject: [PATCH] fix backward --- paddle/operators/hierarchical_sigmoid_op.cc | 28 +++++++------- paddle/operators/hierarchical_sigmoid_op.h | 38 +++++++++---------- paddle/operators/math/matrix_bit_code.cc | 1 - paddle/pybind/pybind.cc | 2 - python/paddle/v2/fluid/executor.py | 1 - python/paddle/v2/fluid/tests/op_test.py | 2 - .../paddle/v2/fluid/tests/test_hsigmoid_op.py | 16 ++------ 7 files changed, 37 insertions(+), 51 deletions(-) diff --git a/paddle/operators/hierarchical_sigmoid_op.cc b/paddle/operators/hierarchical_sigmoid_op.cc index 4b3487f8b96..bc6ceb98747 100644 --- a/paddle/operators/hierarchical_sigmoid_op.cc +++ b/paddle/operators/hierarchical_sigmoid_op.cc @@ -61,10 +61,8 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Parameters"), - "Input(Parameters)" - "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); const int64_t batch_size = ctx->GetInputDim("X")[0]; std::vector output_shape({batch_size, 1}); @@ -84,15 +82,17 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Parameters"), - "Input(Parameters)" - "should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), - "Input(Label)" - "should not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Parameters")), - "Input(Parameters@Grad should not be null.)"); + PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("W")), + "Input(W@Grad should not be null.)"); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X"))); + if (ctx->HasOutput(framework::GradVarName("Bias"))) { + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Bias")); + } + ctx->SetOutputDim(framework::GradVarName("W"), ctx->GetInputDim("W")); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } protected: @@ -112,11 +112,11 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor, required) The input Tensor, which the shape is" "[N * D], which N is the size of mini-batch," "D is the embded size"); - AddInput("Parameters", + AddInput("W", "(Tensor, required), The parameters of hierarchical " "sigmoid operator, each of them is s a 3-D tensor, the shape is" "[N, num_classes - 1, D]"); - AddInput("Label", + AddInput("Ids", "(Tensor, required), The labels of training data. It's a" "1-D tensor, which the shape is [1, N]"); AddInput("Bias", diff --git a/paddle/operators/hierarchical_sigmoid_op.h b/paddle/operators/hierarchical_sigmoid_op.h index 531fd9f7fc0..1b8d21c095e 100644 --- a/paddle/operators/hierarchical_sigmoid_op.h +++ b/paddle/operators/hierarchical_sigmoid_op.h @@ -32,15 +32,14 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); - auto* params = ctx.Input("Parameters"); - auto* label = ctx.Input("Label"); + auto* w = ctx.Input("W"); + auto* ids = ctx.Input("Ids"); auto* bias = ctx.Input("Bias"); auto* out = ctx.Output("Out"); size_t num_classes = static_cast(ctx.Attr("num_classes")); int64_t code_length = math::FindLastSet(num_classes - 1); int64_t batch_size = in->dims()[0]; - auto* ids = label->data(); framework::Tensor pre_out; framework::Tensor sum; auto pre_out_data = pre_out.mutable_data( @@ -59,18 +58,19 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { auto out_mat = framework::EigenVector::Flatten(*out); if (bias) { - bit_code.Add(num_classes, ids, pre_out, *bias); + bit_code.Add(num_classes, ids->data(), pre_out, *bias); } for (int i = 0; i < in->dims()[0]; ++i) { - bit_code.Mul(num_classes, ids, pre_out, params->Slice(i, i + 1), - in->Slice(i, i + 1)); + bit_code.Mul(num_classes, ids->data(), pre_out, + w->Slice(i, i + 1), in->Slice(i, i + 1)); } // clip the matrix with (-40, 40) Transform trans; trans(ctx.template device_context(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data, ClipFunctor(static_cast(-40.0), static_cast(40.0))); - bit_code.Sum(num_classes, ids, pre_out, *out, static_cast(-1)); + bit_code.Sum(num_classes, ids->data(), pre_out, *out, + static_cast(-1)); // softrelu with threshold is 40.0 trans(ctx.template device_context(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data, @@ -88,10 +88,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto* in_grad = ctx.Output(framework::GradVarName("X")); - auto* params = - ctx.Output(framework::GradVarName("Parameters")); + auto* w = ctx.Output(framework::GradVarName("W")); auto* bias = ctx.Output(framework::GradVarName("Bias")); - auto* label = ctx.Input("Label"); + auto* ids = ctx.Input("Ids"); size_t num_classes = static_cast(ctx.Attr("num_classes")); int64_t code_length = math::FindLastSet(num_classes - 1); int64_t batch_size = in->dims()[0]; @@ -102,8 +101,6 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { auto& place = *ctx.template device_context().eigen_device(); auto& device_ctx = ctx.template device_context(); auto pre_out_mat = EigenMatrix::From(pre_out); - auto* ids = label->data(); - // init pre_out matrix with {1.0} math::SetConstant one; math::MatrixBitCodeFunctor bit_code; @@ -112,19 +109,22 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { pre_out_mat.device(place) = pre_out_mat * (static_cast(1.0) - static_cast(1.0) / pre_out_mat); - bit_code.Sub(num_classes, ids, pre_out); + bit_code.Sub(num_classes, ids->data(), pre_out); if (bias) { - bit_code.AddGrad(num_classes, ids, pre_out, *bias); + bias->mutable_data(ctx.GetPlace()); + bit_code.AddGrad(num_classes, ids->data(), pre_out, *bias); } - + in_grad->mutable_data(ctx.GetPlace()); + w->mutable_data(ctx.GetPlace()); for (int i = 0; i < in_grad->dims()[0]; ++i) { - auto p_sliced = params->Slice(i, i + 1); + auto p_sliced = w->Slice(i, i + 1); auto in_sliced = in->Slice(i, i + 1); auto in_grad_sliced = in_grad->Slice(i, i + 1); - bit_code.MulGradWeight(num_classes, ids, pre_out, p_sliced, in_sliced); - bit_code.MulGradError(num_classes, ids, pre_out, p_sliced, - in_grad_sliced); + bit_code.MulGradWeight(num_classes, ids->data(), pre_out, + p_sliced, in_sliced); + bit_code.MulGradError(num_classes, ids->data(), pre_out, + p_sliced, in_grad_sliced); } } }; diff --git a/paddle/operators/math/matrix_bit_code.cc b/paddle/operators/math/matrix_bit_code.cc index 4ad0a000083..b192183b101 100644 --- a/paddle/operators/math/matrix_bit_code.cc +++ b/paddle/operators/math/matrix_bit_code.cc @@ -56,7 +56,6 @@ static void AddByBitCodeT(Op op, CodeTable code_table, const int64_t* codes, const framework::Tensor& vec) { size_t num_sample = tmat.dims()[0]; size_t width = vec.dims()[1]; - for (size_t i = 0; i < num_sample; ++i) { auto code = code_table(static_cast(codes[i])); int code_length = code.get_length(); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 921b316a695..de6b24f70d8 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -109,8 +109,6 @@ PYBIND11_PLUGIN(core) { .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("set_float_element", TensorSetElement) .def("get_float_element", TensorGetElement) - .def("set_int64_element", TensorSetElement) - .def("get_int64_element", TensorGetElement) .def("set_double_element", TensorSetElement) .def("get_double_element", TensorGetElement) .def("dtype", [](Tensor &self) { return ToDataType(self.type()); }); diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index cdd576294f4..a054d5eafb2 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -148,7 +148,6 @@ class Executor(object): inputs={'X': [var]}, outputs={'Out': [fetch_var]}, attrs={'col': i}) - self.executor.run(program.desc, scope, 0, True, True) outs = [ core.get_fetch_variable(scope, fetch_var_name, i) diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 287dc298048..0493a0c2061 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -123,8 +123,6 @@ def get_numeric_gradient(scope, def __set_elem__(tensor, i, e): if tensor_to_check_dtype == np.float32: tensor.set_float_element(i, e) - elif tensor_to_check_dtype == np.int64: - tensor.set_int64_element(i, e) else: tensor.set_double_element(i, e) diff --git a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py index 194d5e315fc..b6d961b6318 100644 --- a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py +++ b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py @@ -10,16 +10,11 @@ class TestHSigmoidOp(OpTest): embded_size = 10 batch_size = 5 x = np.random.random((batch_size, embded_size)).astype("float32") - parameter = np.random.random( + w = np.random.random( (batch_size, num_classes - 1, embded_size)).astype("float32") - label = np.random.randint(0, num_classes, batch_size) + ids = np.random.randint(0, num_classes, batch_size) bias = np.random.random((1, num_classes - 1)).astype("float32") - self.inputs = { - 'X': x, - 'Parameters': parameter, - 'Label': label, - 'Bias': bias - } + self.inputs = {'X': x, 'W': w, 'Ids': ids, 'Bias': bias} self.attrs = {'num_classes': num_classes} self.outputs = { 'Out': np.random.random((batch_size, 1)).astype("float32") @@ -29,10 +24,7 @@ class TestHSigmoidOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X', 'Parameters', 'Label', 'Bias'], - 'Out', - no_grad_set=set(['Label'])) + self.check_grad(['X', 'W', 'Bias'], 'Out', no_grad_set=set('Ids')) if __name__ == '__main__': -- GitLab