diff --git a/paddle/operators/hierarchical_sigmoid_op.cc b/paddle/operators/hierarchical_sigmoid_op.cc index 4b3487f8b96683e436227ae3171e39496c84833f..bc6ceb987477903c621e1fb88e36a7f8b9d58e3a 100644 --- a/paddle/operators/hierarchical_sigmoid_op.cc +++ b/paddle/operators/hierarchical_sigmoid_op.cc @@ -61,10 +61,8 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Parameters"), - "Input(Parameters)" - "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); const int64_t batch_size = ctx->GetInputDim("X")[0]; std::vector output_shape({batch_size, 1}); @@ -84,15 +82,17 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Parameters"), - "Input(Parameters)" - "should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), - "Input(Label)" - "should not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Parameters")), - "Input(Parameters@Grad should not be null.)"); + PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("W")), + "Input(W@Grad should not be null.)"); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X"))); + if (ctx->HasOutput(framework::GradVarName("Bias"))) { + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Bias")); + } + ctx->SetOutputDim(framework::GradVarName("W"), ctx->GetInputDim("W")); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } protected: @@ -112,11 +112,11 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor, required) The input Tensor, which the shape is" "[N * D], which N is the size of mini-batch," "D is the embded size"); - AddInput("Parameters", + AddInput("W", "(Tensor, required), The parameters of hierarchical " "sigmoid operator, each of them is s a 3-D tensor, the shape is" "[N, num_classes - 1, D]"); - AddInput("Label", + AddInput("Ids", "(Tensor, required), The labels of training data. It's a" "1-D tensor, which the shape is [1, N]"); AddInput("Bias", diff --git a/paddle/operators/hierarchical_sigmoid_op.h b/paddle/operators/hierarchical_sigmoid_op.h index 531fd9f7fc09e90e301b0ab7fd7efeafe5077018..1b8d21c095eac3e64f459732b359c72e4986db81 100644 --- a/paddle/operators/hierarchical_sigmoid_op.h +++ b/paddle/operators/hierarchical_sigmoid_op.h @@ -32,15 +32,14 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); - auto* params = ctx.Input("Parameters"); - auto* label = ctx.Input("Label"); + auto* w = ctx.Input("W"); + auto* ids = ctx.Input("Ids"); auto* bias = ctx.Input("Bias"); auto* out = ctx.Output("Out"); size_t num_classes = static_cast(ctx.Attr("num_classes")); int64_t code_length = math::FindLastSet(num_classes - 1); int64_t batch_size = in->dims()[0]; - auto* ids = label->data(); framework::Tensor pre_out; framework::Tensor sum; auto pre_out_data = pre_out.mutable_data( @@ -59,18 +58,19 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { auto out_mat = framework::EigenVector::Flatten(*out); if (bias) { - bit_code.Add(num_classes, ids, pre_out, *bias); + bit_code.Add(num_classes, ids->data(), pre_out, *bias); } for (int i = 0; i < in->dims()[0]; ++i) { - bit_code.Mul(num_classes, ids, pre_out, params->Slice(i, i + 1), - in->Slice(i, i + 1)); + bit_code.Mul(num_classes, ids->data(), pre_out, + w->Slice(i, i + 1), in->Slice(i, i + 1)); } // clip the matrix with (-40, 40) Transform trans; trans(ctx.template device_context(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data, ClipFunctor(static_cast(-40.0), static_cast(40.0))); - bit_code.Sum(num_classes, ids, pre_out, *out, static_cast(-1)); + bit_code.Sum(num_classes, ids->data(), pre_out, *out, + static_cast(-1)); // softrelu with threshold is 40.0 trans(ctx.template device_context(), pre_out_data, pre_out_data + pre_out.numel(), pre_out_data, @@ -88,10 +88,9 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto* in_grad = ctx.Output(framework::GradVarName("X")); - auto* params = - ctx.Output(framework::GradVarName("Parameters")); + auto* w = ctx.Output(framework::GradVarName("W")); auto* bias = ctx.Output(framework::GradVarName("Bias")); - auto* label = ctx.Input("Label"); + auto* ids = ctx.Input("Ids"); size_t num_classes = static_cast(ctx.Attr("num_classes")); int64_t code_length = math::FindLastSet(num_classes - 1); int64_t batch_size = in->dims()[0]; @@ -102,8 +101,6 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { auto& place = *ctx.template device_context().eigen_device(); auto& device_ctx = ctx.template device_context(); auto pre_out_mat = EigenMatrix::From(pre_out); - auto* ids = label->data(); - // init pre_out matrix with {1.0} math::SetConstant one; math::MatrixBitCodeFunctor bit_code; @@ -112,19 +109,22 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { pre_out_mat.device(place) = pre_out_mat * (static_cast(1.0) - static_cast(1.0) / pre_out_mat); - bit_code.Sub(num_classes, ids, pre_out); + bit_code.Sub(num_classes, ids->data(), pre_out); if (bias) { - bit_code.AddGrad(num_classes, ids, pre_out, *bias); + bias->mutable_data(ctx.GetPlace()); + bit_code.AddGrad(num_classes, ids->data(), pre_out, *bias); } - + in_grad->mutable_data(ctx.GetPlace()); + w->mutable_data(ctx.GetPlace()); for (int i = 0; i < in_grad->dims()[0]; ++i) { - auto p_sliced = params->Slice(i, i + 1); + auto p_sliced = w->Slice(i, i + 1); auto in_sliced = in->Slice(i, i + 1); auto in_grad_sliced = in_grad->Slice(i, i + 1); - bit_code.MulGradWeight(num_classes, ids, pre_out, p_sliced, in_sliced); - bit_code.MulGradError(num_classes, ids, pre_out, p_sliced, - in_grad_sliced); + bit_code.MulGradWeight(num_classes, ids->data(), pre_out, + p_sliced, in_sliced); + bit_code.MulGradError(num_classes, ids->data(), pre_out, + p_sliced, in_grad_sliced); } } }; diff --git a/paddle/operators/math/matrix_bit_code.cc b/paddle/operators/math/matrix_bit_code.cc index 4ad0a00008342369c8b829d39814f0da073f5d78..b192183b101c9b354a3355a9325b2f96fe17ea29 100644 --- a/paddle/operators/math/matrix_bit_code.cc +++ b/paddle/operators/math/matrix_bit_code.cc @@ -56,7 +56,6 @@ static void AddByBitCodeT(Op op, CodeTable code_table, const int64_t* codes, const framework::Tensor& vec) { size_t num_sample = tmat.dims()[0]; size_t width = vec.dims()[1]; - for (size_t i = 0; i < num_sample; ++i) { auto code = code_table(static_cast(codes[i])); int code_length = code.get_length(); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 921b316a69553012ccf960136d9f7cbf1d4651a4..de6b24f70d84a28add0c0a09cac79b8c5b1044de 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -109,8 +109,6 @@ PYBIND11_PLUGIN(core) { .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("set_float_element", TensorSetElement) .def("get_float_element", TensorGetElement) - .def("set_int64_element", TensorSetElement) - .def("get_int64_element", TensorGetElement) .def("set_double_element", TensorSetElement) .def("get_double_element", TensorGetElement) .def("dtype", [](Tensor &self) { return ToDataType(self.type()); }); diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index cdd576294f4f53bd3760b2c95a41b2129004a51a..a054d5eafb2b69d3a680ced373c53c69a609b4ac 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -148,7 +148,6 @@ class Executor(object): inputs={'X': [var]}, outputs={'Out': [fetch_var]}, attrs={'col': i}) - self.executor.run(program.desc, scope, 0, True, True) outs = [ core.get_fetch_variable(scope, fetch_var_name, i) diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 287dc29804896bea37bbabf70cbe3b974e513387..0493a0c20611ff8a12d64c853578817e29748fa2 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -123,8 +123,6 @@ def get_numeric_gradient(scope, def __set_elem__(tensor, i, e): if tensor_to_check_dtype == np.float32: tensor.set_float_element(i, e) - elif tensor_to_check_dtype == np.int64: - tensor.set_int64_element(i, e) else: tensor.set_double_element(i, e) diff --git a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py index 194d5e315fcb0144cab9aa2935637a42d2de0373..b6d961b6318c3b5ddc08902f00317ee81dcd2dba 100644 --- a/python/paddle/v2/fluid/tests/test_hsigmoid_op.py +++ b/python/paddle/v2/fluid/tests/test_hsigmoid_op.py @@ -10,16 +10,11 @@ class TestHSigmoidOp(OpTest): embded_size = 10 batch_size = 5 x = np.random.random((batch_size, embded_size)).astype("float32") - parameter = np.random.random( + w = np.random.random( (batch_size, num_classes - 1, embded_size)).astype("float32") - label = np.random.randint(0, num_classes, batch_size) + ids = np.random.randint(0, num_classes, batch_size) bias = np.random.random((1, num_classes - 1)).astype("float32") - self.inputs = { - 'X': x, - 'Parameters': parameter, - 'Label': label, - 'Bias': bias - } + self.inputs = {'X': x, 'W': w, 'Ids': ids, 'Bias': bias} self.attrs = {'num_classes': num_classes} self.outputs = { 'Out': np.random.random((batch_size, 1)).astype("float32") @@ -29,10 +24,7 @@ class TestHSigmoidOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['X', 'Parameters', 'Label', 'Bias'], - 'Out', - no_grad_set=set(['Label'])) + self.check_grad(['X', 'W', 'Bias'], 'Out', no_grad_set=set('Ids')) if __name__ == '__main__':