diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 83d35a450d0e8ebf5311cdfd948b066642ccec8c..c66d575d24bb6b410602c34965ab1db6bc81b41d 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -98,7 +98,6 @@ $y = \max(x, 0)$ } }; -template class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { public: LeakyReluOpMaker(framework::OpProto *proto, @@ -106,8 +105,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Y", "Output of LeakyRelu operator"); - AddAttr("alpha", "The small negative slope") - .SetDefault(static_cast(0.02f)); + AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( LeakyRelu Activation Operator. @@ -117,7 +115,6 @@ $y = \max(x, \alpha * x)$ } }; -template class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftShrinkOpMaker(framework::OpProto *proto, @@ -125,8 +122,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); AddOutput("Y", "Output of Softshrink operator"); - AddAttr("lambda", "non-negative offset") - .SetDefault(static_cast(0.5f)); + AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( Softshrink Activation Operator. @@ -173,7 +169,6 @@ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ } }; -template class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: HardShrinkOpMaker(framework::OpProto *proto, @@ -181,8 +176,8 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); AddOutput("Y", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") - .SetDefault(static_cast(0.5)); + AddAttr("threshold", "The value of threshold for HardShrink") + .SetDefault(0.5f); AddComment(R"DOC( HardShrink Activation Operator. @@ -308,17 +303,16 @@ $$y = \frac{x}{1 + |x|}$$ } }; -template class BReluOpMaker : public framework::OpProtoAndCheckerMaker { public: BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); AddOutput("Y", "Output of BRelu operator"); - AddAttr("t_min", "The min marginal value of BRelu") - .SetDefault(static_cast(0)); - AddAttr("t_max", "The max marginal value of BRelu") - .SetDefault(static_cast(24)); + AddAttr("t_min", "The min marginal value of BRelu") + .SetDefault(static_cast(0)); + AddAttr("t_max", "The max marginal value of BRelu") + .SetDefault(static_cast(24)); AddComment(R"DOC( BRelu Activation Operator. @@ -328,7 +322,6 @@ $y = \max(\min(x, t_{min}), t_{max})$ } }; -template class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftReluOpMaker(framework::OpProto *proto, @@ -336,8 +329,8 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); AddOutput("Y", "Output of SoftRelu operator"); - AddAttr("threshold", "The threshold value of SoftRelu") - .SetDefault(static_cast(40)); + AddAttr("threshold", "The threshold value of SoftRelu") + .SetDefault(40.0f); AddComment(R"DOC( SoftRelu Activation Operator. @@ -347,15 +340,13 @@ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ } }; -template class ELUOpMaker : public framework::OpProtoAndCheckerMaker { public: ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ELU operator"); AddOutput("Y", "Output of ELU operator"); - AddAttr("alpha", "The alpha value of ELU") - .SetDefault(static_cast(1.0f)); + AddAttr("alpha", "The alpha value of ELU").SetDefault(1.0f); AddComment(R"DOC( ELU Activation Operator. @@ -368,15 +359,14 @@ $y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ } }; -template class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { public: Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); AddOutput("Y", "Output of Relu6 operator"); - AddAttr("threshold", "The threshold value of Relu6") - .SetDefault(static_cast(6)); + AddAttr("threshold", "The threshold value of Relu6") + .SetDefault(6.0f); AddComment(R"DOC( Relu6 Activation Operator. @@ -386,15 +376,13 @@ $y = \min(\max(0, x), 6)$ } }; -template class PowOpMaker : public framework::OpProtoAndCheckerMaker { public: PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); AddOutput("Y", "Output of Pow operator"); - AddAttr("factor", "The exponential factor of Pow") - .SetDefault(static_cast(1)); + AddAttr("factor", "The exponential factor of Pow").SetDefault(1.0f); AddComment(R"DOC( Pow Activation Operator. @@ -404,17 +392,16 @@ $y = x^{factor}$ } }; -template class STanhOpMaker : public framework::OpProtoAndCheckerMaker { public: STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); AddOutput("Y", "Output of STanh operator"); - AddAttr("scale_a", "The scale parameter of a for the input") - .SetDefault(static_cast(2 / 3)); - AddAttr("scale_b", "The scale parameter of b for the input") - .SetDefault(static_cast(1.7159)); + AddAttr("scale_a", "The scale parameter of a for the input") + .SetDefault(2.0f / 3.0f); + AddAttr("scale_b", "The scale parameter of b for the input") + .SetDefault(1.7159f); AddComment(R"DOC( STanh Activation Operator. @@ -424,7 +411,6 @@ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ } }; -template class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { public: ThresholdedReluOpMaker(framework::OpProto *proto, @@ -432,8 +418,8 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); AddOutput("Y", "Output of ThresholdedRelu operator"); - AddAttr("threshold", "The threshold location of activation") - .SetDefault(static_cast(1.0)); + AddAttr("threshold", "The threshold location of activation") + .SetDefault(1.0f); AddComment(R"DOC( ThresholdedRelu Activation Operator. @@ -448,7 +434,6 @@ $$ } }; -template class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: HardSigmoidOpMaker(framework::OpProto *proto, @@ -456,10 +441,10 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); AddOutput("Y", "Output of HardSigmoid operator"); - AddAttr("slope", "Slope for linear approximation of sigmoid") - .SetDefault(static_cast(0.2)); - AddAttr("offset", "Offset for linear approximation of sigmoid") - .SetDefault(static_cast(0.5)); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(0.2f); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(0.5f); AddComment(R"DOC( HardSigmoid Activation Operator. @@ -499,7 +484,7 @@ REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad, REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, tanh_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, +REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, softshrink_grad, ops::ActivationOpGrad); REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, @@ -523,35 +508,34 @@ REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad, REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, ops::ActivationOpGrad); -REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, +REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, ops::ActivationOpGrad); -REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, +REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, leaky_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, - soft_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad, + ops::ActivationOpGrad); -REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, +REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, ops::ActivationOpGrad); -REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, +REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, ops::ActivationOpGrad); -REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, +REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, ops::ActivationOpGrad); -REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, +REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, +REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, hard_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(thresholded_relu, ops::ActivationOp, - ops::ThresholdedReluOpMaker, thresholded_relu_grad, - ops::ActivationOpGrad); +REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, + thresholded_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, hard_sigmoid_grad, ops::ActivationOpGrad); #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ diff --git a/paddle/operators/adadelta_op.cc b/paddle/operators/adadelta_op.cc index b717e1647e4b89285b841420650dc69e8a1e0c58..16a7794d5b7bf1d56cd9f5874454c41cab43b41f 100644 --- a/paddle/operators/adadelta_op.cc +++ b/paddle/operators/adadelta_op.cc @@ -109,4 +109,5 @@ paramOut = param + paramUpdate$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adadelta, ops::AdadeltaOp, ops::AdadeltaOpMaker); REGISTER_OP_CPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.cu b/paddle/operators/adadelta_op.cu index 3af1c8c8e9861138a33b3156818f704c3b20363f..9fb61852071f11670b8bc51321bb0881de196777 100644 --- a/paddle/operators/adadelta_op.cu +++ b/paddle/operators/adadelta_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.h b/paddle/operators/adadelta_op.h index d29e15c43583bd447fbacb548a326f303f7d1463..a8c5f0c8aa20ce506f5279fa696079ba64034bd5 100644 --- a/paddle/operators/adadelta_op.h +++ b/paddle/operators/adadelta_op.h @@ -33,8 +33,8 @@ class AdadeltaOpKernel : public framework::OpKernel { avg_squared_grad_out_tensor->mutable_data(ctx.GetPlace()); avg_squared_update_out_tensor->mutable_data(ctx.GetPlace()); - float rho = ctx.Attr("rho"); - float epsilon = ctx.Attr("epsilon"); + T rho = static_cast(ctx.Attr("rho")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index 5b869e6bc5f4604ba6055ffd62fa21e4a1f41b93..1c870214b29dbfcabb7414317b1214d6bef369cb 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -14,8 +14,8 @@ #define EIGEN_USE_GPU #include "paddle/operators/adagrad_op.h" -#include "paddle/operators/math/selected_rows_functor.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" #include "paddle/platform/cuda_helper.h" namespace paddle { @@ -134,8 +134,8 @@ struct SparseAdagradFunctor { T, 256><<(context) .stream()>>>(grad_merge_data, grad_merge->rows().data(), - lr, param_data, - moment_data, grad_width, epsilon); + lr, param_data, moment_data, grad_width, + epsilon); } }; diff --git a/paddle/operators/adam_op.cc b/paddle/operators/adam_op.cc index 97a091ae766abfba5412bbd32c34a6f80701fbf7..03faa2a7c5a486cb0d2b6f2f10d140eeb4c6c04e 100644 --- a/paddle/operators/adam_op.cc +++ b/paddle/operators/adam_op.cc @@ -127,4 +127,5 @@ paramOut = param - learningRate * moment_1/ ($\sqrt{(moment_2)} + \epsilon)$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker); REGISTER_OP_CPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu index a3def912e540454275350209435eb01ae2151331..6e34f7818ce20c75692fe21776721ce200b7a147 100644 --- a/paddle/operators/adam_op.cu +++ b/paddle/operators/adam_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 45938006db1231a7a134964d729df6ca114d4dbe..7f7fa1da1c0d8d81d1bcb18a1bf542838eddccf7 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -31,9 +31,9 @@ class AdamOpKernel : public framework::OpKernel { moment1_out_tensor->mutable_data(ctx.GetPlace()); moment2_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adamax_op.cc b/paddle/operators/adamax_op.cc index 14cf3841b33a8153549e4c99ed2b75286e9c64db..d5bbc672e18f392d6a91383b919fefc4b2d8ff0e 100644 --- a/paddle/operators/adamax_op.cc +++ b/paddle/operators/adamax_op.cc @@ -126,4 +126,5 @@ division by 0 error. namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adamax, ops::AdamaxOp, ops::AdamaxOpMaker); REGISTER_OP_CPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.cu b/paddle/operators/adamax_op.cu index fee3b6fc6b656917d79b84f48da8e63be7683890..057ef39025aa23704457ef7bbe54934d06cdc87f 100644 --- a/paddle/operators/adamax_op.cu +++ b/paddle/operators/adamax_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.h b/paddle/operators/adamax_op.h index 2c99832ec08e9c1d9b5458c467d5238f9b1b3c37..bf36ed78604dd88c537db51fbeb38f43d0c46173 100644 --- a/paddle/operators/adamax_op.h +++ b/paddle/operators/adamax_op.h @@ -31,9 +31,9 @@ class AdamaxOpKernel : public framework::OpKernel { moment_out_tensor->mutable_data(ctx.GetPlace()); inf_norm_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/sequence_conv_op.cc b/paddle/operators/sequence_conv_op.cc index 41cadce4c603a9c14db79e2f6b30f8664cf72a38..c5533732d44737bb8cc71fd8ac46f3c36c72ada1 100644 --- a/paddle/operators/sequence_conv_op.cc +++ b/paddle/operators/sequence_conv_op.cc @@ -179,7 +179,9 @@ REGISTER_OP(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, sequence_conv_grad, ops::SequenceConvGradOp); REGISTER_OP_CPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_CPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_conv_op.cu.cc b/paddle/operators/sequence_conv_op.cu.cc index 6106b0e46c0ab96e01dfc344055f23dbf4a1a2c3..c8136dbcb35be4f1236dddc3d24546f9d91670c8 100644 --- a/paddle/operators/sequence_conv_op.cu.cc +++ b/paddle/operators/sequence_conv_op.cu.cc @@ -16,7 +16,9 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_GPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index bb9af926e36a97863a871d185c1716e1b38098e1..26a10ae766c9c37b68951613c494029a6f162084 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -248,7 +248,7 @@ def data(name, stop_gradient=stop_gradient) -def create_tensor(dtype, name=None, main_program=None): +def create_tensor(dtype, name=None, main_program=None, startup_program=None): helper = LayerHelper("create_tensor", **locals()) return helper.create_variable(name=helper.name, dtype=dtype) @@ -412,30 +412,12 @@ _create_op_func_('mul') _create_op_func_('elementwise_add') _create_op_func_('dropout') _create_op_func_('reshape') -_create_op_func_('elementwise_add') _create_op_func_('sigmoid') _create_op_func_('scale') _create_op_func_('reshape') _create_op_func_('transpose') -def fill_constant(data_type, shape, value=None, program=None): - """ - This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that - comes in the input. - """ - helper = LayerHelper('fill_constant', **locals()) - out = helper.create_tmp_variable(dtype=data_type) - helper.append_op( - type='fill_constant', - outputs={'Out': [out]}, - attrs={'data_type': data_type, - 'shape': shape, - 'value': value}) - return out - - def cast(x, data_type, main_program=None): """ This function takes in the input with input_data_type @@ -478,7 +460,7 @@ def sums(input, main_program=None, startup_program=None): return out -def assign(input, output, main_program=None): +def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( type='scale', @@ -490,7 +472,7 @@ def assign(input, output, main_program=None): def split_lod_tensor(input, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) @@ -512,11 +494,11 @@ def merge_lod_tensor(in_true, in_false, x, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=in_true.data_type) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -1366,7 +1348,7 @@ def array_to_lod_tensor(x, table, main_program=None): return tmp -def fill_constant(shape, dtype, value, main_program=None): +def fill_constant(shape, dtype, value, main_program=None, startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and specified data_type and fills this up with a constant value that @@ -1387,6 +1369,31 @@ def fill_constant(shape, dtype, value, main_program=None): return out +def fill_constant_batch_size_like(input, + shape, + dtype, + value, + input_dim_idx=0, + output_dim_idx=0, + main_program=None, + startup_program=None): + helper = LayerHelper("fill_constant_batch_size_like", **locals()) + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='fill_constant_batch_size_like', + inputs={'Input': input}, + outputs={'Out': [out]}, + attrs={ + 'shape': shape, + 'data_type': out.data_type, + 'value': float(value), + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx + }) + out.stop_gradient = True + return out + + def ones(shape, dtype, main_program=None): """ This function performs the same function as fill_constant() declared above @@ -1449,7 +1456,7 @@ def create_array(dtype, main_program=None): dtype=dtype) -def less_than(x, y, cond=None, main_program=None): +def less_than(x, y, cond=None, main_program=None, **ignored): helper = LayerHelper("less_than", **locals()) if cond is None: cond = helper.create_tmp_variable(dtype='bool') @@ -1527,13 +1534,20 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): - def __init__(self, inputs, name=None, main_program=None): + def __init__(self, + inputs, + name=None, + main_program=None, + startup_program=None): for each_input in inputs: if not isinstance(each_input, Variable): raise TypeError("Each input should be variable") self.inputs = inputs self.helper = LayerHelper( - 'conditional_block', name=name, main_program=main_program) + 'conditional_block', + name=name, + main_program=main_program, + startup_program=startup_program) def block(self): return ConditionalBlockGuard(self) @@ -1578,3 +1592,148 @@ class ConditionalBlock(object): outputs={'Out': out_list, 'Scope': [step_scope]}, attrs={'block': inside_block}) + + +class IfElseBlockGuard(object): + def __init__(self, is_true, ifelse): + if not isinstance(ifelse, IfElse): + raise TypeError("ifelse must be an instance of IfElse class") + + if ifelse.status != IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("You cannot invoke IfElse.block() inside a block") + + self.is_true = is_true + self.ie = ifelse + if is_true: + self.cond_block = ifelse.conditional_true_block + else: + self.cond_block = ifelse.conditional_false_block + + if not isinstance(self.cond_block, ConditionalBlock): + raise TypeError("Unexpected situation") + + self.cond_block = self.cond_block.block() + + def __enter__(self): + self.ie.status = IfElse.IN_IF_ELSE_TRUE_BLOCKS if self.is_true else IfElse.IN_IF_ELSE_FALSE_BLOCKS + self.cond_block.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.cond_block.__exit__(exc_type, exc_val, exc_tb): + # re-raise inside exception + return False + if len(self.ie.output_table[1 if self.is_true else 0]) == 0: + raise ValueError("Must set output inside block") + self.ie.status = IfElse.OUT_IF_ELSE_BLOCKS + + +class IfElse(object): + OUT_IF_ELSE_BLOCKS = 0 + IN_IF_ELSE_TRUE_BLOCKS = 1 + IN_IF_ELSE_FALSE_BLOCKS = 2 + + def __init__(self, cond, name=None, main_program=None, + startup_program=None): + if not isinstance(cond, Variable): + raise TypeError("cond must be a Variable") + self.helper = LayerHelper( + 'ifelse', + name=name, + main_program=main_program, + startup_program=startup_program) + self.cond = cond + self.input_table = {} + self.status = IfElse.OUT_IF_ELSE_BLOCKS + self.conditional_true_block = ConditionalBlock(inputs=[self.cond]) + self.conditional_false_block = ConditionalBlock(inputs=[self.cond]) + self.output_table = ([], []) # (true_outs, false_outs) + + def input(self, x): + if self.status == IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("input must in true/false blocks") + if id(x) not in self.input_table: + parent_block = self.parent_block() + out_true = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + + out_false = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + parent_block.append_op( + type='split_lod_tensor', + inputs={ + 'X': x, + 'Mask': self.cond, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': 0}) + self.input_table[id(x)] = (out_true, out_false) + else: + out_true, out_false = self.input_table[id(x)] + + if self.status == IfElse.IN_IF_ELSE_TRUE_BLOCKS: + return out_true + else: + return out_false + + def parent_block(self): + current_block = self.helper.main_program.current_block() + return self.helper.main_program.block(current_block.parent_idx) + + def true_block(self): + return IfElseBlockGuard(True, self) + + def false_block(self): + return IfElseBlockGuard(False, self) + + def output(self, *outs): + if self.status == self.OUT_IF_ELSE_BLOCKS: + raise ValueError("output can only be invoked in the sub-block") + + out_table = self.output_table[1 if self.status == + self.IN_IF_ELSE_TRUE_BLOCKS else 0] + parent_block = self.parent_block() + for each_out in outs: + if not isinstance(each_out, Variable): + raise TypeError("Each output should be a variable") + # create outside tensor + outside_out = parent_block.create_var( + name=unique_name("_".join([self.helper.name, 'output'])), + dtype=each_out.data_type) + out_table.append(outside_out) + + # assign local var to outside + assign( + input=each_out, + output=outside_out, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program) + + def __call__(self): + if self.status != self.OUT_IF_ELSE_BLOCKS: + raise ValueError("IfElse::__call__ must be out of sub-block") + false_len, true_len = map(len, self.output_table) + if false_len == 0 and true_len == 0: + raise ValueError("Must invoke true_block/false_block before " + "__call__") + elif false_len != true_len and false_len != 0 and true_len != 0: + raise ValueError("The output side must be same") + elif false_len == 0 or true_len == 0: + return self.output_table[0 if false_len != 0 else 1] + + # else none of false_len/true_len is zero + # merge together + rlist = [] + for false_var, true_var in zip(*self.output_table): + rlist.append( + merge_lod_tensor( + in_true=true_var, + in_false=false_var, + mask=self.cond, + x=self.cond, + level=0, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program)) + return rlist diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py new file mode 100644 index 0000000000000000000000000000000000000000..8af99005dc0b5d50de60ca89c2ddf870b1537edb --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -0,0 +1,154 @@ +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import MomentumOptimizer +import paddle.v2.fluid.core as core +import paddle.v2 as paddle +import unittest +import numpy as np + + +class TestMNISTIfElseOp(unittest.TestCase): + def test_raw_api(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond, **kwargs) + + true_out = layers.create_tensor(dtype='float32', **kwargs) + true_cond = layers.ConditionalBlock([true_image], **kwargs) + + with true_cond.block(): + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=true_out, **kwargs) + + false_out = layers.create_tensor(dtype='float32', **kwargs) + false_cond = layers.ConditionalBlock([false_image], **kwargs) + + with false_cond.block(): + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=false_out, **kwargs) + + prob = layers.merge_lod_tensor( + in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) + loss = layers.cross_entropy(input=prob, label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + def test_ifelse(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + + ie = layers.IfElse(cond, **kwargs) + + with ie.true_block(): + true_image = ie.input(image) + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + with ie.false_block(): + false_image = ie.input(image) + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + prob = ie() + loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + +if __name__ == '__main__': + unittest.main()