提交 7d16fe87 编写于 作者: C caoying03

refine softmax operator.

上级 b3afe30d
......@@ -44,11 +44,12 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
The equation is: Out = scale*X
)DOC");
AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0);
AddAttr<AttrType>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0);
}
};
// Identity Op's gradient is identity op, too.
// IdentityOp's gradient is IdentityOp, too.
// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
template <typename AttrType>
class ScaleGradOp : public NetOp {
......@@ -65,17 +66,20 @@ class ScaleGradOp : public NetOp {
}
};
// identity is a alias of scale op. This is also a example for creating a alias
// operator.
// IdentityOp is an alias of the ScaleOp. This is also an example for creating
// an alias of an existing operator.
template <typename AttrType>
class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
public:
IdentityOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input tensor of identity op");
AddOutput("Out", "output tensor of identity op");
AddComment("identity operator. Just a alias of scale op which scale = 1.0");
AddInput("X", "The input tensor of identity op.");
AddOutput("Out", "The output tensor of identity op.");
AddComment(R"DOC(
The identity operator is just an alias of the scale operator with the
attribute scale is fixed to 1.0.
)DOC");
}
};
......
......@@ -23,9 +23,9 @@ class SoftmaxOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL,
PADDLE_ENFORCE(ctx.Input<Tensor>("logits")->dims().size() == 2UL,
"The input of softmax op must be a matrix.");
ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
ctx.Output<Tensor>("softmax")->Resize(ctx.Input<Tensor>("logits")->dims());
}
};
......@@ -34,10 +34,10 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
AddInput("logits",
"The input tensor of softmax. "
"2-D with shape [batch_size, input_feature_dimensions].");
AddOutput("Y", "The normalized values with the same shape as X.");
AddOutput("softmax", "The normalized values with the same shape as X.");
AddComment(R"DOC(
The input of softmax operator is a 2-D tensor with shape N x K (N is the
batch_size, K is the dimension of input feature). The output tensor has the
......@@ -64,14 +64,17 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
"Input(Y@GRAD) should not be null");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same");
ctx.Output<Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("Y")->dims());
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("softmax"),
"Input(softmax) should be not null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("softmax")),
"Input(softmax@GRAD) should be not null.");
PADDLE_ENFORCE_EQ(
ctx.Input<Tensor>("softmax")->dims(),
ctx.Input<Tensor>(framework::GradVarName("softmax"))->dims(),
"Input(softmax) and its gradients should have a same shape.");
ctx.Output<Tensor>(framework::GradVarName("logits"))
->Resize(ctx.Input<Tensor>("logits")->dims());
}
};
......
......@@ -28,12 +28,12 @@ template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>("X");
auto output = context.Output<Tensor>("Y");
output->mutable_data<T>(context.GetPlace());
auto X = context.Input<Tensor>("logits");
auto Y = context.Output<Tensor>("softmax");
Y->mutable_data<T>(context.GetPlace());
auto logits = EigenMatrix<T>::From(*input);
auto softmax = EigenMatrix<T>::From(*output);
auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*Y);
const int kBatchDim = 0;
const int kClassDim = 1;
......@@ -69,9 +69,9 @@ class SoftmaxGradKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
auto Y = context.Input<Tensor>("Y");
auto dY = context.Input<Tensor>(framework::GradVarName("Y"));
auto dX = context.Output<Tensor>(framework::GradVarName("X"));
auto Y = context.Input<Tensor>("softmax");
auto dY = context.Input<Tensor>(framework::GradVarName("softmax"));
auto dX = context.Output<Tensor>(framework::GradVarName("logits"));
dX->mutable_data<T>(context.GetPlace());
const int batch_size = Y->dims()[0];
......
......@@ -18,18 +18,23 @@ class TestSoftmaxOp(unittest.TestCase):
def setUp(self):
self.type = "softmax"
self.inputs = {'X': np.random.random((32, 100)).astype("float32")}
self.inputs = {"logits": np.random.random((10, 10)).astype("float32")}
self.outputs = {
'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X'])
"softmax":
np.apply_along_axis(stable_softmax, 1, self.inputs["logits"])
}
class SoftmaxGradOpTest(GradientChecker):
def test_softmax(self):
op = create_op("softmax")
inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")}
self.check_grad(op, inputs, set("X"), "Y")
class TestSoftmaxGradOp(GradientChecker):
def setUp(self):
self.op = create_op("softmax")
self.inputs = {
"logits": np.random.uniform(0.1, 1, [10, 10]).astype("float32")
}
def test_softmax_grad(self):
self.check_grad(self.op, self.inputs, ["logits"], "softmax")
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册