提交 da66891b 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #3928 from lcy-seso/refine_softmax_op

Refine names and doc of some operators.
...@@ -18,17 +18,20 @@ ...@@ -18,17 +18,20 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
// identity is a alias of scale op. This is also a example for creating a alias // The identity operator is an alias of the scale operator. This is also an
// operator. // example for creating an alias for an existing operator.
template <typename AttrType> template <typename AttrType>
class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
IdentityOpMaker(framework::OpProto *proto, IdentityOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input tensor of identity op"); AddInput("X", "The input tensor of identity operator.");
AddOutput("Out", "output tensor of identity op"); AddOutput("Out", "The output tensor of identity operator.");
AddComment("identity operator. Just a alias of scale op which scale = 1.0"); AddComment(R"DOC(
The identity operator is an alias of the scale operator
with the attribute scale fixed to 1.0.
)DOC");
} }
}; };
......
...@@ -44,11 +44,13 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -44,11 +44,13 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
The equation is: Out = scale*X The equation is: Out = scale*X
)DOC"); )DOC");
AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0); AddAttr<AttrType>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0);
} }
}; };
// Scale Op's gradient is scale op, too. // The operator to calculate gradients of a scale operator is just the scale
// operator itself.
// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
template <typename AttrType> template <typename AttrType>
class ScaleGradOp : public NetOp { class ScaleGradOp : public NetOp {
......
...@@ -51,7 +51,7 @@ the other dimensions in the K-dimensional vector input. Then the ratio of the ...@@ -51,7 +51,7 @@ the other dimensions in the K-dimensional vector input. Then the ratio of the
exponential of the given dimension and the sum of exponential values of all exponential of the given dimension and the sum of exponential values of all
the other dimensions is the output of the softmax operator. the other dimensions is the output of the softmax operator.
For each row `i` and each column `j` in X, we have: For each row `i` and each column `j` in input X, we have:
Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j])) Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j]))
)DOC"); )DOC");
...@@ -64,14 +64,15 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -64,14 +64,15 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should be not null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
"Input(Y@GRAD) should not be null"); "Input(Y@GRAD) should be not null.");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() == PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Y")->dims(),
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(), ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same"); "Input(Y) and its gradients should have a same shape.");
ctx.Output<Tensor>(framework::GradVarName("X")) ctx.Output<Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("Y")->dims()); ->Resize(ctx.Input<Tensor>("X")->dims());
} }
}; };
......
...@@ -28,12 +28,12 @@ template <typename Place, typename T> ...@@ -28,12 +28,12 @@ template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel { class SoftmaxKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>("X"); auto X = context.Input<Tensor>("X");
auto output = context.Output<Tensor>("Y"); auto Y = context.Output<Tensor>("Y");
output->mutable_data<T>(context.GetPlace()); Y->mutable_data<T>(context.GetPlace());
auto logits = EigenMatrix<T>::From(*input); auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*output); auto softmax = EigenMatrix<T>::From(*Y);
const int kBatchDim = 0; const int kBatchDim = 0;
const int kClassDim = 1; const int kClassDim = 1;
......
...@@ -4,8 +4,8 @@ import paddle.v2.framework.proto.framework_pb2 as framework_pb2 ...@@ -4,8 +4,8 @@ import paddle.v2.framework.proto.framework_pb2 as framework_pb2
def get_all_op_protos(): def get_all_op_protos():
""" """
Get all registered op proto from Paddle C++ Get all registered op proto from PaddlePaddle C++ end.
:return: list of OpProto :return: A list of registered OpProto.
""" """
protostrs = core.get_all_op_protos() protostrs = core.get_all_op_protos()
ret_values = [] ret_values = []
...@@ -21,8 +21,8 @@ def is_str(s): ...@@ -21,8 +21,8 @@ def is_str(s):
class OpDescCreationMethod(object): class OpDescCreationMethod(object):
""" """
A Functor object to convert user input(use key word args) to OpDesc based on Convert the user's input(only keyword arguments are supported) to OpDesc
OpProto. based on the OpProto.
:param op_proto: The OpProto object. :param op_proto: The OpProto object.
:type op_proto: op_proto_pb2.OpProto :type op_proto: op_proto_pb2.OpProto
...@@ -30,17 +30,18 @@ class OpDescCreationMethod(object): ...@@ -30,17 +30,18 @@ class OpDescCreationMethod(object):
def __init__(self, op_proto): def __init__(self, op_proto):
if not isinstance(op_proto, framework_pb2.OpProto): if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("Argument should be OpProto") raise TypeError(
"Type of op_proto should be OpProto in PaddlePaddle.")
self.__op_proto__ = op_proto self.__op_proto__ = op_proto
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
""" """
Convert user input to OpDesc. Only key-word args are supported. Convert user's input to OpDesc. Only keyword arguments are supported.
:return: OpDesc based on user input :return: The OpDesc based on user input.
:rtype: op_desc_pb2.OpDesc :rtype: op_desc_pb2.OpDesc
""" """
if len(args) != 0: if len(args) != 0:
raise ValueError("Only keyword arguments is supported by Paddle") raise ValueError("Only keyword arguments are supported.")
op_desc = framework_pb2.OpDesc() op_desc = framework_pb2.OpDesc()
for input_parameter in self.__op_proto__.inputs: for input_parameter in self.__op_proto__.inputs:
...@@ -49,8 +50,9 @@ class OpDescCreationMethod(object): ...@@ -49,8 +50,9 @@ class OpDescCreationMethod(object):
input_arguments = [input_arguments] input_arguments = [input_arguments]
if not input_parameter.duplicable and len(input_arguments) > 1: if not input_parameter.duplicable and len(input_arguments) > 1:
raise ValueError("Input %s only accepts one input, but give %d" raise ValueError(
% (input_parameter.name, len(input_arguments))) "Input %s expects only one input, but %d are given." %
(input_parameter.name, len(input_arguments)))
ipt = op_desc.inputs.add() ipt = op_desc.inputs.add()
ipt.parameter = input_parameter.name ipt.parameter = input_parameter.name
...@@ -63,7 +65,7 @@ class OpDescCreationMethod(object): ...@@ -63,7 +65,7 @@ class OpDescCreationMethod(object):
if not output_parameter.duplicable and len(output_arguments) > 1: if not output_parameter.duplicable and len(output_arguments) > 1:
raise ValueError( raise ValueError(
"Output %s only accepts one output, but give %d" % "Output %s expects only one output, but %d are given." %
(output_parameter.name, len(output_arguments))) (output_parameter.name, len(output_arguments)))
out = op_desc.outputs.add() out = op_desc.outputs.add()
...@@ -100,15 +102,17 @@ class OpDescCreationMethod(object): ...@@ -100,15 +102,17 @@ class OpDescCreationMethod(object):
pair.first = p[0] pair.first = p[0]
pair.second = p[1] pair.second = p[1]
else: else:
raise NotImplementedError("Not support attribute type " + raise NotImplementedError(
str(attr.type)) "A not supported attribute type: %s." % (
str(attr.type)))
return op_desc return op_desc
@staticmethod @staticmethod
def any_is_true(generator): def any_is_true(generator):
""" """
Reduce a bool array to one. If any of them is True, then return True. Reduce a boolean array to a single boolean parameter. If any element in
the array is True, this function will return True, otherwise False.
""" """
for flag in generator: for flag in generator:
if flag: if flag:
...@@ -127,7 +131,7 @@ class OpInfo(object): ...@@ -127,7 +131,7 @@ class OpInfo(object):
def create_op_creation_method(op_proto): def create_op_creation_method(op_proto):
""" """
Generate op creation method for an OpProto Generate op creation method for an OpProto.
""" """
method = OpDescCreationMethod(op_proto) method = OpDescCreationMethod(op_proto)
...@@ -146,20 +150,23 @@ def create_op_creation_method(op_proto): ...@@ -146,20 +150,23 @@ def create_op_creation_method(op_proto):
class OperatorFactory(object): class OperatorFactory(object):
def __init__(self): def __init__(self):
self.op_methods = dict() self.op_methods = dict()
for op_proto in get_all_op_protos(): for op_proto in get_all_op_protos():
method = create_op_creation_method(op_proto) method = create_op_creation_method(op_proto)
self.op_methods[method.name] = method self.op_methods[method.name] = method
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
if 'type' in kwargs: if "type" in kwargs:
if len(args) != 0: if len(args) != 0:
raise ValueError("All Paddle argument should be key-word " raise ValueError(
"argument except type") "Except the argument \"type\","
t = kwargs.pop('type') "all of the other arguments should be keyword arguments.")
t = kwargs.pop("type")
else: else:
if len(args) != 1: if len(args) != 1:
raise ValueError("All Paddle argument should be key-word " raise ValueError(
"argument except type") "Except the argument \"type\","
"all of the other arguments should be keyword arguments.")
t = args[0] t = args[0]
return self.get_op_info(t).method(**kwargs) return self.get_op_info(t).method(**kwargs)
...@@ -169,7 +176,7 @@ class OperatorFactory(object): ...@@ -169,7 +176,7 @@ class OperatorFactory(object):
def get_op_info(self, t): def get_op_info(self, t):
if t not in self.op_methods: if t not in self.op_methods:
raise ValueError("operator %s is not registered", t) raise ValueError("The operator: %s is not registered." % t)
return self.op_methods.get(t) return self.op_methods.get(t)
def get_op_input_names(self, type): def get_op_input_names(self, type):
...@@ -184,7 +191,7 @@ class OperatorFactory(object): ...@@ -184,7 +191,7 @@ class OperatorFactory(object):
class __RecurrentOp__(object): class __RecurrentOp__(object):
__proto__ = None __proto__ = None
type = 'recurrent' type = "recurrent"
def __init__(self): def __init__(self):
# cache recurrent_op's proto # cache recurrent_op's proto
...@@ -194,8 +201,8 @@ class __RecurrentOp__(object): ...@@ -194,8 +201,8 @@ class __RecurrentOp__(object):
self.__proto__ = op_proto self.__proto__ = op_proto
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
if self.type not in args and 'type' not in kwargs: if self.type not in args and "type" not in kwargs:
kwargs['type'] = self.type kwargs["type"] = self.type
# create proto # create proto
create_method = OpDescCreationMethod(self.__proto__) create_method = OpDescCreationMethod(self.__proto__)
proto = create_method(*args, **kwargs) proto = create_method(*args, **kwargs)
...@@ -203,5 +210,5 @@ class __RecurrentOp__(object): ...@@ -203,5 +210,5 @@ class __RecurrentOp__(object):
return core.RecurrentOp.create(proto.SerializeToString()) return core.RecurrentOp.create(proto.SerializeToString())
Operator = OperatorFactory() # Default global factory Operator = OperatorFactory() # The default global factory
RecurrentOp = __RecurrentOp__() RecurrentOp = __RecurrentOp__()
...@@ -38,9 +38,9 @@ def feed_data(name, data): ...@@ -38,9 +38,9 @@ def feed_data(name, data):
assert isinstance(data, numpy.ndarray) assert isinstance(data, numpy.ndarray)
tensor = scope.find_var(name).get_tensor() tensor = scope.find_var(name).get_tensor()
tensor.set_dims(data.shape) tensor.set_dims(data.shape)
if data.dtype == numpy.dtype('int32'): if data.dtype == numpy.dtype("int32"):
tensor.alloc_int(place) tensor.alloc_int(place)
elif data.dtype == numpy.dtype('float32'): elif data.dtype == numpy.dtype("float32"):
tensor.alloc_float(place) tensor.alloc_float(place)
else: else:
raise ValueError("data type not supported") raise ValueError("data type not supported")
...@@ -74,22 +74,25 @@ def init_param(net, param_name, dims): ...@@ -74,22 +74,25 @@ def init_param(net, param_name, dims):
# fc_layer # fc_layer
def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
""" """
Add a fc layer to net The fully connected layer.
:param input: input variable name. :param input: The name of input variable.
:type input: str :type input: str
:param size: fully connected layer size. :param size: The size of fully connected layer.
:param act: activation name :param act: The name of activation.
:param param: parameter attribute, used for initialize parameters. :param param: The attribute of learnable parameter which can be used to
:param bias: bias attribute. False will not have a bias. modify initialization mean and std of the parameter.
:param name: the name of fc layer. If not set, model will generate a :param bias: The attribute of bias. If set False, this layer does not have
readable name a bias.
:return: output variable name. :param name: The name of this layer. If it is not set explictly, a name
will be generated automatically.
:return: The name of the output variable.
""" """
if name is None: if name is None:
name = 'fc_%d' % uniq_id() name = "fc_%d" % uniq_id()
if not isinstance(name, str): if not isinstance(name, str):
raise ValueError("name should be string") raise ValueError("The name of a layer should be a string.")
input_dims = scope.find_var(input).get_tensor().get_dims() input_dims = scope.find_var(input).get_tensor().get_dims()
...@@ -123,7 +126,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): ...@@ -123,7 +126,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
def cross_entropy_layer(net, input, label): def cross_entropy_layer(net, input, label):
cost_name = 'cross_entropy_%d' % uniq_id() cost_name = "cross_entropy_%d" % uniq_id()
cross_entropy_op = Operator( cross_entropy_op = Operator(
"onehot_cross_entropy", X=input, label=label, Y=cost_name) "onehot_cross_entropy", X=input, label=label, Y=cost_name)
net.append_op(cross_entropy_op) net.append_op(cross_entropy_op)
...@@ -177,8 +180,8 @@ def error_rate(predict, label): ...@@ -177,8 +180,8 @@ def error_rate(predict, label):
return error_num / float(len(label)) return error_num / float(len(label))
images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) images = data_layer(name="pixel", dims=[BATCH_SIZE, 784])
labels = data_layer(name='label', dims=[BATCH_SIZE]) labels = data_layer(name="label", dims=[BATCH_SIZE])
fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid")
fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid")
predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax") predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax")
......
...@@ -7,11 +7,11 @@ from gradient_checker import get_numeric_gradient ...@@ -7,11 +7,11 @@ from gradient_checker import get_numeric_gradient
class GetNumericGradientTest(unittest.TestCase): class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self): def test_add_op(self):
add_op = Operator('add', X="X", Y="Y", Out="Z") add_op = Operator("add", X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32") x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32") y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') arr = get_numeric_gradient(add_op, {"X": x, "Y": y}, "Z", "X")
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4) self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4)
def test_softmax_op(self): def test_softmax_op(self):
...@@ -35,9 +35,9 @@ class GetNumericGradientTest(unittest.TestCase): ...@@ -35,9 +35,9 @@ class GetNumericGradientTest(unittest.TestCase):
dY = numpy.ones(Y.shape) dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY) dX = label_softmax_grad(Y, dY)
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') arr = get_numeric_gradient(softmax_op, {"X": X}, "Y", "X")
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -18,18 +18,22 @@ class TestSoftmaxOp(unittest.TestCase): ...@@ -18,18 +18,22 @@ class TestSoftmaxOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "softmax" self.type = "softmax"
self.inputs = {'X': np.random.random((32, 100)).astype("float32")} self.inputs = {"X": np.random.random((10, 10)).astype("float32")}
self.outputs = { self.outputs = {
'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) "Y": np.apply_along_axis(stable_softmax, 1, self.inputs["X"])
} }
class SoftmaxGradOpTest(GradientChecker): class TestSoftmaxGradOp(GradientChecker):
def test_softmax(self): def setUp(self):
op = create_op("softmax") self.op = create_op("softmax")
inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} self.inputs = {
self.check_grad(op, inputs, set("X"), "Y") "X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")
}
def test_softmax_grad(self):
self.check_grad(self.op, self.inputs, ["X"], "Y")
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册