diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f8c97b05bc08a26c1fa0bdcc2cd1abd932af158a..b1a380995065d7d06d1056afc1c552541194e54c 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -277,6 +277,7 @@ paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, de paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292')) paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb')) paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756')) +paddle.fluid.layers.zeros_like (ArgSpec(args=['x', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c7e4cfffc93ae89c8f6f53b6d650f923')) paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -286,7 +287,11 @@ paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs= paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713')) paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a')) paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f')) +paddle.fluid.layers.less_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd6b173ae1a149e0bdfe7b8bf69285957')) +paddle.fluid.layers.greater_than (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '2c9bd414caa6c615539018d27001b44c')) +paddle.fluid.layers.greater_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '62c667d24e7b07e166b47a53b61b2ff4')) paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77')) +paddle.fluid.layers.not_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '56148fb1024687a08e96af79bdc5c929')) paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', 'dd68bead34dfbaf6b0a163fc1cc3c385')) paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2')) paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -319,6 +324,7 @@ paddle.fluid.layers.atan (ArgSpec(args=['x', 'name'], varargs=None, keywords=Non paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7')) paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13')) paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9e27491c39ac74d0b1ffe506aec0ebb')) +paddle.fluid.layers.rsqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c445467ebe58b3c0d7f0bba7795b6f56')) paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd')) paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad')) paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973')) @@ -331,13 +337,13 @@ paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywor paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee')) paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a')) paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148')) -paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641')) +paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', 'a8c4e972b7d6742c838a37abf407ed9a')) paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e')) paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926')) paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a')) paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8')) paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9')) -paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd')) +paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'fe9afaee481dd09f28866df22756466f')) paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1')) @@ -352,7 +358,7 @@ paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1')) paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e')) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b')) -paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gtscore', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '57fa96922e42db8f064c3fb77f2255e8')) +paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '059021025283ad1ee6f4d32228cf3e4e')) paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5566169a5ab993d177792c023c7fb340')) paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e')) paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0')) diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 98ce225a0476b38c021b0b81489f69d7953ae456..ec10e36c3b3707a88eebe116aaf3de454fc199b5 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -116,7 +116,7 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) { reinterpret_cast(&analysis_config); auto native_pred = CreateTestPredictor(config, false); auto analysis_pred = CreateTestPredictor(config, true); - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 20; i++) { std::vector> inputs_all; if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) { SetFakeImageInput(&inputs_all, model_dir, true, FLAGS_prog_filename, @@ -133,11 +133,13 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) { TEST(TensorRT_mobilenet, compare) { std::string model_dir = FLAGS_infer_model + "/mobilenet"; compare(model_dir, /* use_tensorrt */ true); + // Open it when need. + // profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt); } -TEST(TensorRT_resnet50, compare) { +TEST(resnet50, compare_continuous_input) { std::string model_dir = FLAGS_infer_model + "/resnet50"; - compare(model_dir, /* use_tensorrt */ true); + compare_continuous_input(model_dir, true); } TEST(TensorRT_resnext50, compare) { @@ -145,24 +147,6 @@ TEST(TensorRT_resnext50, compare) { compare(model_dir, /* use_tensorrt */ true); } -TEST(TensorRT_resnext50, profile) { - std::string model_dir = FLAGS_infer_model + "/resnext50"; - // Set FLAGS_record_benchmark to true to record benchmark to file. - // FLAGS_record_benchmark=true; - FLAGS_model_name = "resnext50"; - profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt); -} - -TEST(resnext50, compare_analysis_native) { - std::string model_dir = FLAGS_infer_model + "/resnext50"; - compare(model_dir, false /*use tensorrt*/); -} - -TEST(TensorRT_mobilenet, analysis) { - std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; - compare(model_dir, false /* use_tensorrt */); -} - TEST(AnalysisPredictor, use_gpu) { std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; AnalysisConfig config; @@ -180,20 +164,5 @@ TEST(AnalysisPredictor, use_gpu) { } } -TEST(TensorRT_mobilenet, profile) { - std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; - profile(model_dir, true, false); -} - -TEST(resnet50, compare_continuous_input) { - std::string model_dir = FLAGS_infer_model + "/resnet50"; - compare_continuous_input(model_dir, true); -} - -TEST(resnet50, compare_continuous_input_native) { - std::string model_dir = FLAGS_infer_model + "/resnet50"; - compare_continuous_input(model_dir, false); -} - } // namespace inference } // namespace paddle diff --git a/paddle/fluid/op_use_default_grad_op_maker.spec b/paddle/fluid/op_use_default_grad_op_maker.spec index 21a25ce7d5e2bad172cf50cee6138ef4b44b07c1..403be1fc2c97a189a541c0c887eaadfe4266a124 100644 --- a/paddle/fluid/op_use_default_grad_op_maker.spec +++ b/paddle/fluid/op_use_default_grad_op_maker.spec @@ -18,7 +18,6 @@ gru hierarchical_sigmoid lrn lstm_unit -lstmp max_pool2d_with_index max_pool3d_with_index maxout diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 1e5d63fc11d1d81350525e2b3390a3ae44f00f8d..348902c656cec1ea1eeaccc90feefd56d307111d 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -227,6 +227,15 @@ $out = \sqrt{x}$ )DOC"; +UNUSED constexpr char RsqrtDoc[] = R"DOC( +Rsqrt Activation Operator. + +Please make sure input is legal in case of numeric errors. + +$out = \frac{1}{\sqrt{x}}$ + +)DOC"; + UNUSED constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. @@ -575,6 +584,7 @@ REGISTER_ACTIVATION_OP_MAKER(Gelu, GeluDoc); REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); +REGISTER_ACTIVATION_OP_MAKER(Rsqrt, RsqrtDoc); REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc); REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc); REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc); @@ -586,6 +596,7 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 915632a328feb99c021ec062a9b22a04623eff4a..1732f61582f79365d6872e15b9df1ee8f053903c 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -511,6 +511,26 @@ struct SqrtGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; +// rsqrt(x) = x^(-1/2) +template +struct RsqrtFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.rsqrt(); + } +}; + +template +struct RsqrtGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = static_cast(-0.5) * dout * out * out * out; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; + // ceil(x) = ceiling(x) template struct CeilFunctor : public BaseActivationFunctor { @@ -1191,6 +1211,7 @@ struct SwishGradFunctor : public BaseActivationFunctor { __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \ __macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor); \ __macro(abs, Abs, AbsFunctor, AbsGradFunctor); \ __macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \ __macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \ diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 268a5b894a95df8e27730879473b457a31e18cd6..27370a3c29a073f3ce6f01fd5aaf28b5ef1ca3a6 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -79,9 +79,13 @@ class AffineChannelOp : public framework::OperatorWithKernel { : x_dims[x_dims.size() - 1]); PADDLE_ENFORCE_EQ(scale_dims.size(), 1UL); - PADDLE_ENFORCE_EQ(scale_dims[0], C); PADDLE_ENFORCE_EQ(b_dims.size(), 1UL); - PADDLE_ENFORCE_EQ(b_dims[0], C); + if (ctx->IsRuntime() || scale_dims[0] > 0) { + PADDLE_ENFORCE_EQ(scale_dims[0], C); + } + if (ctx->IsRuntime() || b_dims[0] > 0) { + PADDLE_ENFORCE_EQ(b_dims[0], C); + } ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareLoD("X", "Out"); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 494d26f58f23ad1e445bbe8d7f8ce1037e5aa598..0cc3e1f2b8350acb693ad7ba4f4dab270068723b 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -65,11 +65,22 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], C); + auto scale_dim = ctx->GetInputDim("Scale"); + auto bias_dim = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE_EQ(scale_dim.size(), 1UL); + PADDLE_ENFORCE_EQ(scale_dim.size(), 1UL); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 || + framework::product(bias_dim) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(scale_dim[0], C); + PADDLE_ENFORCE_EQ(scale_dim[0], C); + } ctx->SetOutputDim("Y", x_dims); ctx->SetOutputDim("MeanOut", {C}); ctx->SetOutputDim("VarianceOut", {C}); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 1f71555180361a1522b7a1c8383fe128bc4edcd0..b1a6d66b80efdae3e78d7c3321a6107d2dd607aa 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -49,7 +49,15 @@ class ConcatOp : public framework::OperatorWithKernel { for (size_t i = 1; i < n; i++) { for (size_t j = 0; j < in_zero_dims_size; j++) { if (j == axis) { - out_dims[axis] += ins[i][j]; + if (ctx->IsRuntime()) { + out_dims[axis] += ins[i][j]; + } else { + if (ins[i][j] == -1) { + out_dims[axis] = -1; + } else { + out_dims[axis] += ins[i][j]; + } + } } else { if (ctx->IsRuntime()) { // check all shape in run time diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 619e12e6ba7c73e46beafadd50770aedfb52c964..e1281602bf0d1bf25a2c4dfa32f495ed724d24eb 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -68,9 +68,14 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { std::vector output_shape({in_dims[0], filter_dims[0]}); for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], - dilations[i], paddings[i], - strides[i])); + if ((!ctx->IsRuntime()) && + (in_dims[i + 2] <= 0 || filter_dims[i + 2] <= 0)) { + output_shape.push_back(-1); + } else { + output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + dilations[i], paddings[i], + strides[i])); + } } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->ShareLoD("Input", "Output"); diff --git a/paddle/fluid/operators/conv_shift_op.cc b/paddle/fluid/operators/conv_shift_op.cc index 08506ddd18ed35831702814e70962cb36ec958b1..fa4edb70b48e529102f11a1b0b9cac2110a33966 100644 --- a/paddle/fluid/operators/conv_shift_op.cc +++ b/paddle/fluid/operators/conv_shift_op.cc @@ -36,14 +36,17 @@ class ConvShiftOp : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); PADDLE_ENFORCE_EQ(y_dims.size(), 2, "Input(Y)'s rank should be 2."); - PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], - "The 1st dimension of Input(X) and Input(Y) should " - "be equal."); - PADDLE_ENFORCE_EQ(y_dims[1] % 2, 1, - "The 2nd dimension of Input(Y) should be odd."); - PADDLE_ENFORCE_LE(y_dims[1], x_dims[1], - "The 2nd dimension of Input(Y) should be less than or " - "equal to the 2nd dimension of Input(X)."); + if (ctx->IsRuntime() || (x_dims[0] > 0 && y_dims[0] > 0)) + PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], + "The 1st dimension of Input(X) and Input(Y) should " + "be equal."); + if (ctx->IsRuntime() || y_dims[1] > 0) + PADDLE_ENFORCE_EQ(y_dims[1] % 2, 1, + "The 2nd dimension of Input(Y) should be odd."); + if (ctx->IsRuntime() || (x_dims[1] > 0 && y_dims[1] > 0)) + PADDLE_ENFORCE_LE(y_dims[1], x_dims[1], + "The 2nd dimension of Input(Y) should be less than or " + "equal to the 2nd dimension of Input(X)."); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index 30ec74d8442d2f42510220b825988b340f79d0a2..93304ec6700b795c923f24a5d0663884b818b9b3 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -40,17 +40,27 @@ class CosSimOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto y_dims = ctx->GetInputDim("Y"); - PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(), - "Ranks of Input(X) and Input(Y) must be equal."); - PADDLE_ENFORCE_GE(x_dims.size(), 2, - "Rank of Input(X) must not be less than 2."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 1, x_dims.size()), - framework::slice_ddim(y_dims, 1, y_dims.size()), - "All dimensions except the 1st of Input(X) and Input(Y) " - "must be equal."); - PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, - "The 1st dimension of Input(Y) must be equal to Input(X) or" - " just 1 (which will be broadcasted to match Input(X))."); + bool check = true; + if ((!ctx->IsRuntime()) && + (framework::product(x_dims) <= 0 || framework::product(y_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(), + "Ranks of Input(X) and Input(Y) must be equal."); + PADDLE_ENFORCE_GE(x_dims.size(), 2, + "Rank of Input(X) must not be less than 2."); + PADDLE_ENFORCE_EQ( + framework::slice_ddim(x_dims, 1, x_dims.size()), + framework::slice_ddim(y_dims, 1, y_dims.size()), + "All dimensions except the 1st of Input(X) and Input(Y) " + "must be equal."); + PADDLE_ENFORCE( + x_dims[0] == y_dims[0] || y_dims[0] == 1, + "The 1st dimension of Input(Y) must be equal to Input(X) or" + " just 1 (which will be broadcasted to match Input(X))."); + } // resize tensor ctx->SetOutputDim("Out", {x_dims[0], 1}); diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index e1d113f8542da8827b9e36e44fc1bac6c07c9257..554e50725ffa5fc30849dc62fe525d72c6561a8b 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -51,8 +51,10 @@ class DetectionMAPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(label_dims.size(), 2, "The rank of Input(Label) must be 2, " "the shape is [N, 6]."); - PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5, - "The shape of Input(Label) is [N, 6] or [N, 5]."); + if (ctx->IsRuntime() || label_dims[1] > 0) { + PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5, + "The shape of Input(Label) is [N, 6] or [N, 5]."); + } if (ctx->HasInput("PosCount")) { PADDLE_ENFORCE(ctx->HasInput("TruePos"), diff --git a/paddle/fluid/operators/distributed_ops/split_byref_op.cc b/paddle/fluid/operators/distributed_ops/split_byref_op.cc index d65e7ffe5a492fe5df038bb6bd469e09de6f95ca..43980107c14176f1751a3db2858c80cb65c764de 100644 --- a/paddle/fluid/operators/distributed_ops/split_byref_op.cc +++ b/paddle/fluid/operators/distributed_ops/split_byref_op.cc @@ -31,14 +31,16 @@ class SplitByrefOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("X"); auto outs_names = ctx->Outputs("Out"); size_t num = static_cast(ctx->Attrs().Get("num")); - std::vector sections = static_cast>( - ctx->Attrs().Get>("sections")); + auto sections = ctx->Attrs().Get>("sections"); const size_t outs_number = outs_names.size(); std::vector outs_dims; outs_dims.reserve(outs_number); if (num > 0) { - int64_t in_axis_dim = in_dims[0]; + int64_t in_axis_dim = 0; + if (ctx->IsRuntime()) { + in_axis_dim = in_dims[0]; + } PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, "tensor split does not result" " in an equal division"); diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 241184c6f4a19a1da0d6d75c5d4e2b372c14e9da..57a1fcd42da04a766ebd8713e3863f259b3784ac 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/grid_sampler_op.h" +#include #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cudnn_helper.h" @@ -40,10 +41,12 @@ class GridSampleOp : public framework::OperatorWithKernel { "Input(X) of GridSampleOp should be 4-D Tensor."); PADDLE_ENFORCE(grid_dims.size() == 4, "Input(Grid) of GridSampleOp should be 4-D Tensor."); - PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2."); - PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0], - "Input(X) and Input(Grid) dims[0] should be equal."); + if (ctx->IsRuntime() || grid_dims[3] > 0) { + PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2."); + } if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0], + "Input(X) and Input(Grid) dims[0] should be equal."); PADDLE_ENFORCE_EQ( grid_dims[1], x_dims[2], "Input(X) dims[2] and Input(Grid) dims[1] should be equal."); diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.h b/paddle/fluid/operators/hierarchical_sigmoid_op.h index 82c8171ca52ffb128df103f27bafbdba1e72e52f..7cfe0aabcb7f3ce86ccc3a9a1c54b3b60d384aa1 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.h +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h @@ -238,6 +238,8 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { zero(dev_ctx, w_grad, static_cast(0.0)); bit_code->MulGradWeight(pre_out_grad, w_grad, in); } else { + PADDLE_ENFORCE(path != nullptr, + "Sparse mode should not be used without custom tree!"); framework::Vector real_rows = PathToRows(*path); auto* w_grad = ctx.Output(framework::GradVarName("W")); diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 9f2e3ad4a5ac1786096c67154d5a9ef5ea62855c..900b0c636ddafc8c033560adf58d596eb696621f 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -45,9 +45,14 @@ class InterpolateOp : public framework::OperatorWithKernel { // round down out_h = static_cast(dim_x[2] * scale); out_w = static_cast(dim_x[3] * scale); + // protect when input shape is -1 + out_h = out_h > 0 ? out_h : -1; + out_w = out_w > 0 ? out_w : -1; } else { out_h = ctx->Attrs().Get("out_h"); out_w = ctx->Attrs().Get("out_w"); + PADDLE_ENFORCE_GT(out_h, 0, "out_h should be greater than 0."); + PADDLE_ENFORCE_GT(out_w, 0, "out_w should be greater than 0."); } if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { @@ -58,6 +63,7 @@ class InterpolateOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", "Out"); return; } + std::vector dim_out({dim_x[0], dim_x[1], out_h, out_w}); ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); } diff --git a/paddle/fluid/operators/kldiv_loss_op.cc b/paddle/fluid/operators/kldiv_loss_op.cc index a43f22c0496f89943d2fd5110446f1aae6a99315..a7c5d6305b09afb93be0b3b8524a91bd53e719fe 100644 --- a/paddle/fluid/operators/kldiv_loss_op.cc +++ b/paddle/fluid/operators/kldiv_loss_op.cc @@ -35,8 +35,10 @@ class KLDivLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(), "Input(X) rank and Input(Target) rank should be same."); for (int i = 0; i < dim_x.size(); i++) { - PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i], - "Input(X) and Input(Target) should in same shape."); + if (ctx->IsRuntime() || (dim_x[i] > 0 && dim_target[i] > 0)) { + PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i], + "Input(X) and Input(Target) should in same shape."); + } } auto reduction = ctx->Attrs().Get("reduction"); diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc index 2728aa8a4ee21a9e1fe3deddcdba4c35a6aba7bc..f31c177c92d0a9e4cc731c478ea8339b450f318a 100644 --- a/paddle/fluid/operators/lstmp_op.cc +++ b/paddle/fluid/operators/lstmp_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstmp_op.h" +#include #include namespace paddle { @@ -45,6 +46,7 @@ class LSTMPOp : public framework::OperatorWithKernel { "Output(BatchHidden) of LSTMP operator should not be null."); auto in_dims = ctx->GetInputDim("Input"); + PADDLE_ENFORCE_EQ(in_dims.size(), 2, "Input(X)'s rank of LSTMP operator must be 2."); @@ -269,13 +271,47 @@ Users can choose to use fully-connected operator before LSTMP operator. } }; +class LSTMPGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto* grad_op = new framework::OpDesc(); + grad_op->SetType("lstmp_grad"); + grad_op->SetInput("Weight", Input("Weight")); + grad_op->SetInput("ProjWeight", Input("ProjWeight")); + grad_op->SetInput("Bias", Input("Bias")); + + grad_op->SetInput("Projection", Output("Projection")); + grad_op->SetInput("Cell", Output("Cell")); + grad_op->SetInput("BatchGate", Output("BatchGate")); + grad_op->SetInput("BatchCellPreAct", Output("BatchCellPreAct")); + grad_op->SetInput("BatchHidden", Output("BatchHidden")); + grad_op->SetInput("H0", Input("H0")); + grad_op->SetInput("C0", Input("C0")); + + grad_op->SetInput(framework::GradVarName("Projection"), + OutputGrad("Projection")); + + grad_op->SetOutput(framework::GradVarName("Input"), InputGrad("Input")); + grad_op->SetOutput(framework::GradVarName("Weight"), InputGrad("Weight")); + grad_op->SetOutput(framework::GradVarName("ProjWeight"), + InputGrad("ProjWeight")); + grad_op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias")); + grad_op->SetOutput(framework::GradVarName("H0"), InputGrad("H0")); + grad_op->SetOutput(framework::GradVarName("C0"), InputGrad("C0")); + + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + class LSTMPGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of LSTMP operator should not be null."); PADDLE_ENFORCE(ctx->HasInput("Projection"), "Input(Projection) of LSTMP operator should not be null."); PADDLE_ENFORCE(ctx->HasInput("Cell"), @@ -298,7 +334,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel { ctx->SetOutputDim(g_name, ctx->GetInputDim(name)); }; - SetOutGradDim("Input"); + ctx->SetOutputDim(framework::GradVarName("Input"), + ctx->GetInputDim("BatchGate")); SetOutGradDim("Weight"); SetOutGradDim("ProjWeight"); SetOutGradDim("Bias"); @@ -310,7 +347,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - ctx.Input("Input")->type(), ctx.device_context()); + ctx.Input("BatchGate")->type(), + ctx.device_context()); } }; @@ -318,8 +356,7 @@ class LSTMPGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, - paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, ops::LSTMPGradMaker); REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp); REGISTER_OP_CPU_KERNEL( lstmp, ops::LSTMPKernel, diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index c7d6e4205f8862526904e4fa767a2f4c4a2d8481..36da882639a235f27b4e5a9e77bf0813ea9c0ee3 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -267,7 +267,6 @@ class LSTMPGradKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); auto* weight = ctx.Input("Weight"); auto* proj_weight = ctx.Input("ProjWeight"); auto* bias = ctx.Input("Bias"); @@ -323,7 +322,8 @@ class LSTMPGradKernel : public framework::OpKernel { ordered_c0_g.mutable_data(c0_g->dims(), ctx.GetPlace()); } - auto in_dims = input->dims(); + // batch_gate dims equal to input dims + auto in_dims = batch_gate->dims(); auto out_dims = cell_out->dims(); framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]}); int frame_size = static_cast(in_dims[1] / 4); diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index da7fa1b81d601f4dd03d6716de601a4b1abc7fa0..5edc233f6f73262c3d1b803aae0089f5b15d403d 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -164,7 +164,9 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { auto mask_dim = context->GetInputDim("Mask"); PADDLE_ENFORCE_EQ(mask_dim.size(), 2); - PADDLE_ENFORCE_EQ(mask_dim[1], 1); + if (context->IsRuntime() || mask_dim[1] > 0) { + PADDLE_ENFORCE_EQ(mask_dim[1], 1); + } context->SetOutputDim("Out", context->GetInputDim("InTrue")); } diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index bddca232e6c8a2a7fde998877006e37ee6d3d0dc..911c4d22ee5cd84c0b42646a1d3e62a0d765732e 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -39,13 +39,9 @@ struct bn_type_traits { class BatchNormMKLDNNHandler : public platform::MKLDNNHandler { public: - BatchNormMKLDNNHandler( - std::shared_ptr batch_norm_pd, - const platform::MKLDNNDeviceContext &dev_ctx, mkldnn::engine engine, - const std::string &base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key) { - batch_norm_pd_ = batch_norm_pd; - } + BatchNormMKLDNNHandler(const platform::MKLDNNDeviceContext &dev_ctx, + mkldnn::engine engine, const std::string &base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key) {} std::shared_ptr AcquireScaleshiftMemoryFromPrimitive(void *ptr) { return this->AcquireMemoryFromPrimitive( @@ -62,6 +58,26 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandler { batch_norm_pd_->variance_primitive_desc(), ptr, "@variance_mem_p"); } + std::shared_ptr + AcquireBatchNormPrimitiveDescriptor(const batch_norm_fwd::desc &bn_fwd_desc, + const mkldnn::engine &engine) { + const std::string key_batch_norm_fwd_pd = key_ + "@bn_fwd_pd"; + auto batch_norm_pd = + std::static_pointer_cast( + dev_ctx_.GetBlob(key_batch_norm_fwd_pd)); + + if (batch_norm_pd == nullptr) { + batch_norm_pd_.reset( + new batch_norm_fwd::primitive_desc(bn_fwd_desc, engine)); + dev_ctx_.SetBlob(key_batch_norm_fwd_pd, batch_norm_pd_); + } else { + batch_norm_pd_ = batch_norm_pd; + is_reusing_ = true; + } + + return batch_norm_pd_; + } + std::shared_ptr AcquireTestTrainingBatchNormFwd( std::shared_ptr src_memory, std::shared_ptr scaleshift_memory, @@ -213,7 +229,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { const std::string key = BatchNormMKLDNNHandler::GetHash( src_tz, epsilon, flags, global_stats, input_format, ctx.op().Output("SavedMean")); - const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; + BatchNormMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); auto user_src_md = platform::MKLDNNMemDesc( {src_tz}, platform::MKLDNNGetDataType(), input_format); @@ -222,13 +238,9 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { using bn_fwd_types = bn_type_traits; auto batch_norm_fwd_desc = bn_fwd_types::op_desc{propagation, user_src_md, epsilon, flags}; - auto batch_norm_fwd_pd = std::make_shared( - batch_norm_fwd_desc, mkldnn_engine); - // Save conv_pd/src_memory/weights_memory for backward pass - dev_ctx.SetBlob(key_batch_norm_fwd_pd, batch_norm_fwd_pd); - BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine, - key); + auto batch_norm_fwd_pd = handler.AcquireBatchNormPrimitiveDescriptor( + batch_norm_fwd_desc, mkldnn_engine); auto src_memory = handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data)); diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 5e4d79f1c35af42f662711ae9d8bfc650bab2b4f..faf518005c8cb0958dd5b0bbfc5c6fc4b3c2b582 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -144,7 +144,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const std::string key = platform::ConvMKLDNNHandler::GetHash( src_tz, weights_tz, strides, paddings, dilations, groups, ctx.op().Input("Input") + ctx.op().Input("Filter")); - const std::string key_conv_pd = key + "@conv_pd"; std::vector pipeline; @@ -183,6 +182,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto dst_md = platform::MKLDNNMemDesc( dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + platform::ConvMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); + // create a conv primitive descriptor and save it for usage in backward std::shared_ptr conv_pd; auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference @@ -191,18 +192,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { bias_tz = paddle::framework::vectorize2int(bias->dims()); auto bias_md = platform::MKLDNNMemDesc( bias_tz, platform::MKLDNNGetDataType(), memory::format::x); - conv_pd = ConvFwdPrimitiveDesc( + conv_pd = handler.AcquireConvolutionPrimitiveDescriptor( src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, fwd_prop_kind); } else { - conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, - paddings, mkldnn_engine, fuse_relu, - fuse_residual_conn, fwd_prop_kind); + conv_pd = handler.AcquireConvolutionPrimitiveDescriptor( + src_md, weights_md, boost::none, dst_md, strides, paddings, + mkldnn_engine, fuse_relu, fuse_residual_conn, fwd_prop_kind); } - // Save conv_pd/src_memory/weights_memory for backward pass - if (!is_test) dev_ctx.SetBlob(key_conv_pd, conv_pd); - - platform::ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); // create mkldnn memory from input tensors (data/weights) auto user_src_memory_p = @@ -633,31 +630,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } private: - mkldnn::primitive_attr CreatePostOps(bool fuse_relu, - bool fuse_residual_conn) const { - mkldnn::primitive_attr conv_attr; - mkldnn::post_ops post_operations; - // Fusion with Elementwise layer relies on adding a sum post-operation with - // the scale parameter. It is assumed that when fuse_residual_connection is - // true, the output tensor contains the data coming from residual - // connection. The result of this post_op is: - // Output = scale * Output + Conv_Out. - if (fuse_residual_conn) { - post_operations.append_sum(1.0f); - } - // Fusion with ReLU layer is executed through the PostOps feature. Create a - // PostOps object and configure it to execute an eltwise relu operation. - if (fuse_relu) { - constexpr float scale = 1.0f; - constexpr float negative_slope = 0.0f; - constexpr float placeholder = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, - negative_slope, placeholder); - } - conv_attr.set_post_ops(post_operations); - return conv_attr; - } - mkldnn::primitive_attr CreatePostOps( bool fuse_relu, bool fuse_residual_conn, const std::vector output_shift_scale, float sum_scale) const { @@ -679,30 +651,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { return conv_attr; } - std::unique_ptr - ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, - const memory::desc& dst, const std::vector& strides, - const std::vector& paddings, - const mkldnn::engine& engine, const bool fuse_relu, - const bool fuse_residual_conn, - mkldnn::prop_kind fwd_prop_kind) const { - memory::dims stride_dims = strides; - memory::dims padding_dims = paddings; - - auto conv_desc = mkldnn::convolution_forward::desc( - fwd_prop_kind, mkldnn::convolution_direct, src, weights, dst, - stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - - mkldnn::primitive_attr conv_attr = - CreatePostOps(fuse_relu, fuse_residual_conn); - - auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc( - conv_desc, conv_attr, engine); - - return std::unique_ptr( - p_conv_pd); - } - std::unique_ptr ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, const memory::desc& dst, const std::vector& strides, @@ -731,31 +679,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { p_conv_pd); } - std::unique_ptr - ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, - const memory::desc& bias, const memory::desc& dst, - const std::vector& strides, - const std::vector& paddings, - const mkldnn::engine& engine, const bool fuse_relu, - const bool fuse_residual_conn, - mkldnn::prop_kind fwd_prop_kind) const { - memory::dims stride_dims = strides; - memory::dims padding_dims = paddings; - - auto conv_desc = mkldnn::convolution_forward::desc( - fwd_prop_kind, mkldnn::convolution_direct, src, weights, bias, dst, - stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - - mkldnn::primitive_attr conv_attr = - CreatePostOps(fuse_relu, fuse_residual_conn); - - auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc( - conv_desc, conv_attr, engine); - - return std::unique_ptr( - p_conv_pd); - } - std::unique_ptr ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, const memory::desc& bias, const memory::desc& dst, diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 317d4cebe26b81ff03c212e6328233d5152ed1b4..30d2469eeaf6938f1f93730b8b645ca2cfe97364 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "boost/optional.hpp" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" @@ -124,7 +125,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { const std::string key = platform::ConvTransposeMKLDNNHandler::GetHash( src_tz, weights_tz, strides, paddings, dilations, groups, ctx.op().Output("Output")); - const std::string key_conv_transpose_pd = key + "@conv_transpose_pd"; std::vector pipeline; @@ -153,6 +153,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { auto dst_md = platform::MKLDNNMemDesc( dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + platform::ConvTransposeMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); // create a deconv(conv transpose) primitive descriptor and save it for // usage in backward std::shared_ptr @@ -163,19 +164,14 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { bias_tz = paddle::framework::vectorize2int(bias->dims()); auto bias_md = platform::MKLDNNMemDesc( bias_tz, platform::MKLDNNGetDataType(), mkldnn::memory::format::x); - conv_transpose_pd = ConvTransposeFwdPrimitiveDesc( + conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor( src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, - fuse_relu, fwd_prop_kind); + fuse_relu, false, fwd_prop_kind); } else { - conv_transpose_pd = ConvTransposeFwdPrimitiveDesc( - src_md, weights_md, dst_md, strides, paddings, mkldnn_engine, - fuse_relu, fwd_prop_kind); + conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor( + src_md, weights_md, boost::none, dst_md, strides, paddings, + mkldnn_engine, fuse_relu, false, fwd_prop_kind); } - // Save conv_pd/src_memory/weights_memory for backward pass - if (!is_test) dev_ctx.SetBlob(key_conv_transpose_pd, conv_transpose_pd); - - platform::ConvTransposeMKLDNNHandler handler(conv_transpose_pd, dev_ctx, - mkldnn_engine, key); // create mkldnn memory from input tensors (data/weights) auto user_src_memory_p = handler.AcquireSrcMemory( @@ -224,70 +220,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { output->set_layout(DataLayout::kMKLDNN); output->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); } - - private: - mkldnn::primitive_attr CreatePostOps(bool fuse_relu) const { - mkldnn::primitive_attr conv_attr; - mkldnn::post_ops post_operations; - // Fusion with ReLU layer is executed through the PostOps feature. Create a - // PostOps object and configure it to execute an eltwise relu operation. - if (fuse_relu) { - constexpr float scale = 1.0f; - constexpr float negative_slope = 0.0f; - constexpr float placeholder = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, - negative_slope, placeholder); - } - conv_attr.set_post_ops(post_operations); - return conv_attr; - } - - std::unique_ptr - ConvTransposeFwdPrimitiveDesc( - const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights, - const mkldnn::memory::desc& dst, const std::vector& strides, - const std::vector& paddings, const mkldnn::engine& engine, - const bool fuse_relu, mkldnn::prop_kind fwd_prop_kind) const { - mkldnn::memory::dims stride_dims = {strides[0], strides[1]}; - mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]}; - - auto deconv_desc = mkldnn::deconvolution_forward::desc( - fwd_prop_kind, mkldnn::deconvolution_direct, src, weights, dst, - stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - - mkldnn::primitive_attr deconv_attr = CreatePostOps(fuse_relu); - - auto p_conv_transpose_pd = - new mkldnn::deconvolution_forward::primitive_desc(deconv_desc, - deconv_attr, engine); - - return std::unique_ptr( - p_conv_transpose_pd); - } - - std::unique_ptr - ConvTransposeFwdPrimitiveDesc( - const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights, - const mkldnn::memory::desc& bias, const mkldnn::memory::desc& dst, - const std::vector& strides, const std::vector& paddings, - const mkldnn::engine& engine, const bool fuse_relu, - mkldnn::prop_kind fwd_prop_kind) const { - mkldnn::memory::dims stride_dims = {strides[0], strides[1]}; - mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]}; - - auto deconv_desc = mkldnn::deconvolution_forward::desc( - fwd_prop_kind, mkldnn::deconvolution_direct, src, weights, bias, dst, - stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - - mkldnn::primitive_attr deconv_attr = CreatePostOps(fuse_relu); - - auto p_conv_transpose_pd = - new mkldnn::deconvolution_forward::primitive_desc(deconv_desc, - deconv_attr, engine); - - return std::unique_ptr( - p_conv_transpose_pd); - } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index dc1176f0848b93dd6872f676c3a71dab4f3455fd..1b3f33d345f4e0fafd7ad5da41eec052ac2dc504 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -34,12 +34,9 @@ using platform::to_void_cast; class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { public: - SoftmaxMKLDNNHandler( - std::shared_ptr softmax_pd, - const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, - const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - softmax_pd_(softmax_pd) {} + SoftmaxMKLDNNHandler(const platform::MKLDNNDeviceContext& dev_ctx, + mkldnn::engine engine, const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key) {} SoftmaxMKLDNNHandler( std::shared_ptr softmax_pd, @@ -54,6 +51,26 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { key_ += "-BWD"; } + std::shared_ptr + AcquireSoftmaxPrimitiveDescriptor(const softmax_forward::desc& softmax_desc, + const mkldnn::engine& engine) { + const std::string key_softmax_pd = key_ + "@softmax_pd"; + + auto softmax_pd = std::static_pointer_cast( + dev_ctx_.GetBlob(key_softmax_pd)); + + if (softmax_pd == nullptr) { + softmax_pd_.reset( + new softmax_forward::primitive_desc(softmax_desc, engine)); + dev_ctx_.SetBlob(key_softmax_pd, softmax_pd_); + } else { + softmax_pd_ = softmax_pd; + is_reusing_ = true; + } + + return softmax_pd_; + } + std::shared_ptr AcquireSoftmax( std::shared_ptr dst_memory_p, std::shared_ptr src_memory_p) { @@ -138,19 +155,18 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { // Generate keys for storing/retriving primitives for this operator const std::string key = platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out")); - const std::string key_softmax_pd = key + "@softmax_pd"; + SoftmaxMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); // Currently only NC data format is supported auto softmax_md = MKLDNNMemDesc( {softmax_tz}, platform::MKLDNNGetDataType(), memory::format::nc); // Normalization is made after innermost dimension eg. C out of NC auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, softmax_md, 1 /*dim: C*/); - auto softmax_pd = std::make_shared( - softmax_desc, mkldnn_engine); - dev_ctx.SetBlob(key_softmax_pd, softmax_pd); - SoftmaxMKLDNNHandler handler(softmax_pd, dev_ctx, mkldnn_engine, key); + auto softmax_pd = + handler.AcquireSoftmaxPrimitiveDescriptor(softmax_desc, mkldnn_engine); + auto softmax_src_memory_p = handler.AcquireSrcMemory(softmax_md, to_void_cast(input_data)); auto softmax_dst_memory_p = diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 9731aefa95c5243e29ace87ad8c35d5b01904e60..1e8ba5922aa96ac40798d103868c839242ac1e55 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -483,8 +483,10 @@ class Pad2dOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( paddings_dim.size(), 1, "Size of Input(Paddings)'s dimension should be equal to 1."); - PADDLE_ENFORCE_EQ(paddings_dim[0], 4, - "Shape of Input(Paddings) should be equal to [4]."); + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(paddings_dim[0], 4, + "Shape of Input(Paddings) should be equal to [4]."); + } out_dims[1] = x_dim[1]; out_dims[2] = x_dim[2]; out_dims[3] = x_dim[3]; @@ -504,11 +506,7 @@ class Pad2dOp : public framework::OperatorWithKernel { } ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); - if (out_dims[0] == x_dim[0]) { - // Only pass LoD when the first dimension is equal between - // output and input. - ctx->ShareLoD("X", /*->*/ "Out"); - } + ctx->ShareLoD("X", /*->*/ "Out"); } protected: diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index 7bb10ce063109dbd8520430d2b32ac9370ef8d25..d0dd861af7be80ede75b9d14867087ec687fc1da 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -37,9 +37,11 @@ class ROIAlignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(rois_dims.size() == 2, "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "given as [[x1, y1, x2, y2], ...]."); - PADDLE_ENFORCE(rois_dims[1] == 4, - "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" - "given as [[x1, y1, x2, y2], ...]."); + if (ctx->IsRuntime()) { + PADDLE_ENFORCE(rois_dims[1] == 4, + "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" + "given as [[x1, y1, x2, y2], ...]."); + } int pooled_height = ctx->Attrs().Get("pooled_height"); int pooled_width = ctx->Attrs().Get("pooled_width"); float spatial_scale = ctx->Attrs().Get("spatial_scale"); diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 81aabdd0061b3940f23d4731d55fc5cbe5817004..7e9611679ba9a988f40973aaa37f04bcfa48f1ad 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -45,9 +45,12 @@ class RowConvOp : public framework::OperatorWithKernel { auto filter_dims = ctx->GetInputDim("Filter"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); PADDLE_ENFORCE_EQ(filter_dims.size(), 2, "Input(Y)'s rank should be 2."); - PADDLE_ENFORCE_EQ( - x_dims[1], filter_dims[1], - "The 2nd dimension of Input(X) and Input(Filter) should be same."); + if (ctx->IsRuntime() || (x_dims[1] > 0 && filter_dims[1] > 0)) { + PADDLE_ENFORCE_EQ( + x_dims[1], filter_dims[1], + "The 2nd dimension of Input(X) and Input(Filter) should be same."); + } + ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", "Out"); } diff --git a/paddle/fluid/operators/sample_logits_op.cc b/paddle/fluid/operators/sample_logits_op.cc index 9793118b7004477659b73a128b015f4fb4c0bea9..8ce2d52273d7cc3d523e5d77c2c79b9989b9227f 100644 --- a/paddle/fluid/operators/sample_logits_op.cc +++ b/paddle/fluid/operators/sample_logits_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sample_logits_op.h" +#include #include "paddle/fluid/operators/math/sample_prob.h" namespace paddle { @@ -59,6 +60,10 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor, default: Tensor), A 2-D tensor with shape [N, NT + S]." "The probabilites of sampled positive and negtive labels.") .AsIntermediate(); + AddOutput("LogitsDim", "Store dim information of Logits for gradient op") + .AsIntermediate(); + AddOutput("LabelsDim", "Store dim information of Logits for gradient op") + .AsIntermediate(); AddOutput("SampledLogits", "(Tensor, default: Tensor), A 2-D tensor with shape" "[N, NT + S]. The outputs value of sampled logits, which will be" @@ -120,6 +125,10 @@ class SampleLogitsOp : public framework::OperatorWithKernel { "Output(SampledLogits) should be not null."); PADDLE_ENFORCE(ctx->HasOutput("SampledLabels"), "Output(SampledLabels) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("LogitsDim"), + "Output(LogitsDim) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("LabelsDim"), + "Output(LabelsDim) should be not null."); auto logits_dims = ctx->GetInputDim("Logits"); auto labels_dims = ctx->GetInputDim("Labels"); @@ -139,6 +148,15 @@ class SampleLogitsOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Probabilities", {logits_dims[0], num_sampled_classes}); ctx->SetOutputDim("SampledLogits", {logits_dims[0], num_sampled_classes}); ctx->SetOutputDim("SampledLabels", {logits_dims[0], labels_dims[1]}); + + // append 0 to shape variable to avoid optimized by memory optimize pass + auto logits_dim_vec = framework::vectorize(logits_dims); + logits_dim_vec.push_back(0); + ctx->SetOutputDim("LogitsDim", framework::make_ddim(logits_dim_vec)); + + auto labels_dim_vec = framework::vectorize(labels_dims); + labels_dim_vec.push_back(0); + ctx->SetOutputDim("LabelsDim", framework::make_ddim(labels_dim_vec)); } protected: @@ -157,28 +175,27 @@ class SampleLogitsOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Logits"), - "Input(Logits) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Labels"), - "Input(Labels) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("LogitsDim"), + "Input(LogitsDim) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LabelsDim"), + "Input(LabelsDim) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Samples"), "Input(Samples) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("SampledLogits"), - "Input(SampledLogits) should be not null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("SampledLogits")), "Input(SampledLogits@Grad) should not be null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")), "Output(Logits@Grad) should be not null."); - auto logit_dims = ctx->GetInputDim("Logits"); - auto label_dims = ctx->GetInputDim("Labels"); - PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, + auto logits_dims = ctx->GetInputDim("LogitsDim"); + logits_dims = framework::DDim(logits_dims.Get(), logits_dims.size() - 1); + auto labels_dims = ctx->GetInputDim("LabelsDim"); + labels_dims = framework::DDim(labels_dims.Get(), labels_dims.size() - 1); + PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL, "The label should be a 2-D tensor."); - PADDLE_ENFORCE_EQ(logit_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(logits_dims.size(), 2UL, "The logits should be a 2-D tensor."); - ctx->SetOutputDim(framework::GradVarName("Logits"), - ctx->GetInputDim("Logits")); + ctx->SetOutputDim(framework::GradVarName("Logits"), logits_dims); } protected: @@ -201,10 +218,9 @@ class SampleLogitsGradMaker : public framework::SingleGradOpDescMaker { std::unique_ptr Apply() const override { auto* grad_op = new framework::OpDesc(); grad_op->SetType("sample_logits_grad"); - grad_op->SetInput("Logits", Input("Logits")); - grad_op->SetInput("Labels", Input("Labels")); + grad_op->SetInput("LogitsDim", Output("LogitsDim")); + grad_op->SetInput("LabelsDim", Output("LabelsDim")); grad_op->SetInput("Samples", Output("Samples")); - grad_op->SetInput("SampledLogits", Output("SampledLogits")); grad_op->SetInput(framework::GradVarName("SampledLogits"), OutputGrad("SampledLogits")); grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); diff --git a/paddle/fluid/operators/scatter_op.cc b/paddle/fluid/operators/scatter_op.cc index 8e0e3bd6054018852b242d1dba5c250394ed81ce..68ad223b3c311bec5968eb18b50f15e9da84e6d3 100644 --- a/paddle/fluid/operators/scatter_op.cc +++ b/paddle/fluid/operators/scatter_op.cc @@ -42,10 +42,6 @@ class ScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx->GetInputDim("Updates")[0], ctx->GetInputDim("Ids")[0], "Updates and Ids should have same batch-size."); - framework::DDim data_dim(updates_dims); - for (int i = 1; i < data_dim.size(); ++i) { - PADDLE_ENFORCE_EQ(data_dim[i], updates_dims[i]); - } ctx->SetOutputDim("Out", ref_dims); } diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index 5c92588cc1d073612d2f6a7b315edf16cc14bedd..1c2726454f3d1fb8545e5d3260e59fcafbcb2aee 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -34,15 +34,22 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto labels_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); - PADDLE_ENFORCE_EQ(labels_dims.size(), 2, - "Input(Label)'s rank should be 2."); - PADDLE_ENFORCE_EQ(x_dims[0], labels_dims[0], - "The 1st dimension of Input(X) and Input(Label) should " - "be equal."); - PADDLE_ENFORCE_EQ(x_dims[1], labels_dims[1], - "The 2nd dimension of Input(X) and Input(Label) should " - "be equal."); + + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, labels_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank), + framework::slice_ddim(labels_dims, 0, rank), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); @@ -65,23 +72,24 @@ class SigmoidCrossEntropyWithLogitsGradOp auto x_dims = ctx->GetInputDim("X"); auto labels_dims = ctx->GetInputDim("Label"); auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); - PADDLE_ENFORCE_EQ(labels_dims.size(), 2, - "Input(Label)'s rank should be 2."); - PADDLE_ENFORCE_EQ(dout_dims.size(), 2, - "Input(Out@Grad)'s rank should be 2."); - PADDLE_ENFORCE_EQ(x_dims[0], labels_dims[0], - "The 1st dimension of Input(X) and Input(Label) should " - "be equal."); - PADDLE_ENFORCE_EQ(x_dims[1], labels_dims[1], - "The 2nd dimension of Input(X) and Input(Label) should " - "be equal."); - PADDLE_ENFORCE_EQ(x_dims[0], dout_dims[0], - "The 1st dimension of Input(X) and Input(Out@Grad) " - "should be equal."); - PADDLE_ENFORCE_EQ(x_dims[1], dout_dims[1], - "The 2nd dimension of Input(X) and Input(Out@Grad) " - "should be equal."); + + int rank = x_dims.size(); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank), + framework::slice_ddim(labels_dims, 0, rank), + "Input(X) and Input(Label) shall have the same shape."); + + PADDLE_ENFORCE_EQ( + framework::slice_ddim(x_dims, 0, rank), + framework::slice_ddim(dout_dims, 0, rank), + "Input(X) and Input(Out@Grad) shall have the same shape."); + } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); } diff --git a/paddle/fluid/operators/spectral_norm_op.cc b/paddle/fluid/operators/spectral_norm_op.cc index 04f659a465a345653d251cbe6703309c804fe614..ec5ee487729d0650983d553dbffe14b63c16b26a 100644 --- a/paddle/fluid/operators/spectral_norm_op.cc +++ b/paddle/fluid/operators/spectral_norm_op.cc @@ -56,13 +56,19 @@ class SpectralNormOp : public framework::OperatorWithKernel { } auto dim_u = ctx->GetInputDim("U"); auto dim_v = ctx->GetInputDim("V"); - PADDLE_ENFORCE_EQ(dim_u[0], h, - "Input(U) dims[0] should be equal to " - "Input(Weight) dims[Attr(dim)]"); - PADDLE_ENFORCE_EQ( - dim_v[0], w, - "Input(V) dims[0] should be equal to " - "the product of Input(Weight) dims except dims[Attr(dim)]"); + + if (ctx->IsRuntime() || (dim_u[0] > 0 && h > 0)) { + PADDLE_ENFORCE_EQ(dim_u[0], h, + "Input(U) dims[0] should be equal to " + "Input(Weight) dims[Attr(dim)]"); + } + + if (ctx->IsRuntime() || (dim_v[0] > 0 && w > 0)) { + PADDLE_ENFORCE_EQ( + dim_v[0], w, + "Input(V) dims[0] should be equal to " + "the product of Input(Weight) dims except dims[Attr(dim)]"); + } ctx->SetOutputDim("Out", dim_weight); ctx->ShareLoD("Weight", /*->*/ "Out"); diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 1391148ccf5d13082cb31ef2e143249e8ef95bfc..67f7510e874d4b3dcb857510e42cbfa7081becfe 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -65,7 +65,21 @@ class SumOp : public framework::OperatorWithKernel { if (framework::product(in_dim) == 0) { in_dim = x_dim; } else { - PADDLE_ENFORCE_EQ(in_dim, x_dim, "Input tensors must have same shape"); + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(in_dim, x_dim, + "Input tensors must have same shape"); + } else { + PADDLE_ENFORCE_EQ(in_dim.size(), x_dim.size(), + "Input tensors must have same shape size"); + // if in_dim or x_dim has -1, not check equal + for (int i = 0; i < x_dim.size(); ++i) { + if (x_dim[i] == -1 || in_dim[i] == -1) { + continue; + } + PADDLE_ENFORCE_EQ(in_dim[i], x_dim[i], + "Input tensors must have same shape if not -1"); + } + } } } ctx->SetOutputDim("Out", in_dim); diff --git a/paddle/fluid/operators/unpool_op.cc b/paddle/fluid/operators/unpool_op.cc index 11e505d6df3beda7053c59b66a29ec2badde3b75..86b4c06a27cc63fca8ec077cb3044ffe9415e01d 100644 --- a/paddle/fluid/operators/unpool_op.cc +++ b/paddle/fluid/operators/unpool_op.cc @@ -99,10 +99,15 @@ class UnpoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(in_x_dims.size() == 4, "Unpooling intput must be of 4-dimensional."); PADDLE_ENFORCE_EQ(in_x_dims, in_y_dims); + std::vector output_shape({in_x_dims[0], in_x_dims[1]}); for (size_t i = 0; i < ksize.size(); ++i) { - output_shape.push_back(UnpoolOutputSize(in_x_dims[i + 2], ksize[i], - paddings[i], strides[i])); + if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) { + output_shape.push_back(-1); + } else { + output_shape.push_back(UnpoolOutputSize(in_x_dims[i + 2], ksize[i], + paddings[i], strides[i])); + } } ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index ecaad4ec070fe60a522839e0718c424a441dec0b..ba3a82b4b07f4dcb3f0037e398c146ab167d7b57 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include "boost/optional.hpp" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -395,9 +396,28 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { std::vector logical_axis_; }; +template +struct convolutional_algorithm; + +template <> +struct convolutional_algorithm { + static constexpr mkldnn::algorithm T = mkldnn::algorithm::convolution_direct; +}; + +template <> +struct convolutional_algorithm { + static constexpr mkldnn::algorithm T = + mkldnn::algorithm::deconvolution_direct; +}; + template class ConvMKLDNNTemplateHandler : public MKLDNNHandler { public: + ConvMKLDNNTemplateHandler(const platform::MKLDNNDeviceContext& dev_ctx, + mkldnn::engine engine, const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key) {} + + // TODO(jczaja): remove after conv int8 is adapted ConvMKLDNNTemplateHandler( std::shared_ptr conv_pd, const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, @@ -542,6 +562,73 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { scale_data, mask); } + mkldnn::primitive_attr CreatePostOps(bool fuse_relu, + bool fuse_residual_conn = false) const { + mkldnn::primitive_attr conv_attr; + mkldnn::post_ops post_operations; + // Fusion with Elementwise layer relies on adding a sum post-operation with + // the scale parameter. It is assumed that when fuse_residual_connection is + // true, the output tensor contains the data coming from residual + // connection. The result of this post_op is: + // Output = scale * Output + Conv_Out. + if (fuse_residual_conn) { + post_operations.append_sum(1.0f); + } + // Fusion with ReLU layer is executed through the PostOps feature. Create a + // PostOps object and configure it to execute an eltwise relu operation. + if (fuse_relu) { + constexpr float scale = 1.0f; + constexpr float negative_slope = 0.0f; + constexpr float placeholder = 0.0f; + post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, + negative_slope, placeholder); + } + conv_attr.set_post_ops(post_operations); + return conv_attr; + } + + std::shared_ptr + AcquireConvolutionPrimitiveDescriptor( + const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights, + boost::optional bias, + const mkldnn::memory::desc& dst, const std::vector& strides, + const std::vector& paddings, const mkldnn::engine& engine, + const bool fuse_relu, const bool fuse_residual_conn, + mkldnn::prop_kind fwd_prop_kind) { + const std::string key_conv_pd = key_ + "@conv_pd"; + + auto conv_pd = std::static_pointer_cast( + dev_ctx_.GetBlob(key_conv_pd)); + + if (conv_pd == nullptr) { + mkldnn::memory::dims stride_dims = strides; + mkldnn::memory::dims padding_dims = paddings; + + auto conv_desc = + bias ? typename forward_t::desc( + fwd_prop_kind, convolutional_algorithm::T, src, + weights, *bias, dst, stride_dims, padding_dims, + padding_dims, mkldnn::padding_kind::zero) + : typename forward_t::desc( + fwd_prop_kind, convolutional_algorithm::T, src, + weights, dst, stride_dims, padding_dims, padding_dims, + mkldnn::padding_kind::zero); + + mkldnn::primitive_attr conv_attr = + CreatePostOps(fuse_relu, fuse_residual_conn); + + conv_pd_.reset( + new typename forward_t::primitive_desc(conv_desc, conv_attr, engine)); + // Save conv_pd/src_memory/weights_memory for backward pass + dev_ctx_.SetBlob(key_conv_pd, conv_pd_); + } else { + conv_pd_ = conv_pd; + is_reusing_ = true; + } + + return conv_pd_; + } + std::shared_ptr AcquireConvolution( std::shared_ptr src_memory_p, std::shared_ptr weights_memory_p, diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 7bb713493182239b2fd17f7b7fb496afdc9b8e6c..ccfcb13db6e7339b6770242d8beb60152d2a25ef 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -446,7 +446,8 @@ function assert_api_spec_approvals() { BRANCH="develop" fi - API_FILES=("paddle/fluid/API.spec" + API_FILES=("CMakeLists.txt" + "paddle/fluid/API.spec" "paddle/fluid/op_use_default_grad_op_maker.spec" "python/paddle/fluid/parallel_executor.py" "paddle/fluid/framework/operator.h" @@ -469,24 +470,29 @@ function assert_api_spec_approvals() { echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. - # approval_user_list: velconia 1979255,panyx0718 2887803,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308. + # approval_user_list: velconia 1979255,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308. if [ "$API_FILE" == "paddle/fluid/API.spec" ];then APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308 46782768 30176695` + python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 35982308 46782768 30176695` if [ "${APPROVALS}" == "TRUE" ];then APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308` fi + elif [ "$API_FILE" == "CMakeLists.txt" ];then + APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 6836917 46782768 30176695` else APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` fi echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "FALSE" ]; then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then - echo "You must have one RD (panyx0718 or chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}" + echo "You must have one RD (chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}" + elif [ "$API_FILE" == "CMakeLists.txt" ];then + echo "You must have one RD (luotao1 or chengduoZH or XiaoguangHu01) approval for the cmakelist change! ${API_FILE}" else - echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" + echo "You must have one RD (velconia,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" fi exit 1 fi @@ -496,10 +502,10 @@ function assert_api_spec_approvals() { HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true` if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "FALSE" ]; then - echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" + echo "You must have one RD (velconia,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" exit 1 fi fi diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 39e06e3486cd5479f69cbdb67811f03bd9646123..6b78e2abb32ab3f134fc7bae6fbd203a2730cf66 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -48,6 +48,12 @@ class Layer(core.Layer): self._helper = LayerObjectHelper(self._full_name) + def train(self): + framework._dygraph_tracer()._train_mode() + + def eval(self): + framework._dygraph_tracer()._eval_mode() + def full_name(self): """Full name for this layers. @@ -254,6 +260,12 @@ class PyLayer(core.PyLayer): def __init__(self): super(PyLayer, self).__init__() + def train(self): + framework._dygraph_tracer()._train_mode() + + def eval(self): + framework._dygraph_tracer()._eval_mode() + @classmethod def _do_forward(cls, inputs): return cls._to_tuple(cls.forward(inputs)) diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index 94e212b139b2b375aa9f5252d396e90235ba33c1..ee37ffab2cb7521b83108a40febcfe88cab28633 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -24,7 +24,9 @@ __all__ = ['Tracer'] def release_op(op): - del framework._dygraph_tracer()._ops[op._trace_id] + del framework._dygraph_tracer()._ops[op._trace_id].inputs + del framework._dygraph_tracer()._ops[op._trace_id].outputs + del framework._dygraph_tracer()._ops[op._trace_id].backward_refs class Tracer(core.Tracer): @@ -38,6 +40,7 @@ class Tracer(core.Tracer): self._ops = defaultdict() self._vars = defaultdict() self._trace_id = 0 + self._train_mode = True def trace_var(self, name, var): self._vars[name] = var @@ -46,15 +49,57 @@ class Tracer(core.Tracer): return list((item for name, item in six.iteritems(self._vars) if isinstance(item, framework.Parameter))) - def trace_op(self, op, stop_gradient=False): + def trace_op(self, op, inputs, outputs, stop_gradient=False): + # TODO(minqiyang): remove this line after we take apart all + # backward grads and forward variables + if self._train_mode: + op.inputs = inputs + inps = defaultdict(list) + for k, vars in six.iteritems(inputs): + if isinstance(vars, framework.Variable): + inps[k].append(vars._ivar) + elif isinstance(vars, list) or isinstance(vars, tuple): + for var in vars: + inps[k].append(var._ivar) + + op.outputs = outputs + outs = defaultdict(list) + for k, vars in six.iteritems(outputs): + if isinstance(vars, framework.Variable): + outs[k].append(vars._ivar) + elif isinstance(vars, list) or isinstance(vars, tuple): + for var in vars: + outs[k].append(var._ivar) + else: + inps = defaultdict(list) + for k, vars in six.iteritems(inputs): + if isinstance(vars, framework.Variable): + op.previous_ops.append(vars.op) + inps[k].append(vars._ivar) + elif isinstance(vars, list) or isinstance(vars, tuple): + for var in vars: + op.previous_ops.append(var.op) + inps[k].append(var._ivar) + + op.outputs = outputs + outs = defaultdict(list) + for k, vars in six.iteritems(outputs): + if isinstance(vars, framework.Variable): + vars.op = op + outs[k].append(vars._ivar) + elif isinstance(vars, list) or isinstance(vars, tuple): + for var in vars: + var.op = op + outs[k].append(var._ivar) + # record op's trace id op.iop._trace_id = self._trace_id - backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.attrs, + backward_refs = self.trace(op.iop, inps, outs, op.attrs, framework._current_expected_place(), stop_gradient) - if not stop_gradient: + if not stop_gradient and self._train_mode: self._trace_id += 1 self._ops[op.iop._trace_id] = op @@ -65,10 +110,16 @@ class Tracer(core.Tracer): # TODO(minqiyang): remove all inputs and outputs after separate # var and grad op.backward_refs = defaultdict(list) - for k, v in six.iteritems(op.inputs): + for k, v in six.iteritems(inputs): if k in backward_refs: - op.backward_refs[k] = op.inputs[k] + op.backward_refs[k] = inputs[k] - for k, v in six.iteritems(op.outputs): + for k, v in six.iteritems(outputs): if k in backward_refs: - op.backward_refs[k] = op.outputs[k] + op.backward_refs[k] = outputs[k] + + def _train_mode(self): + self._train_mode = True + + def _eval_mode(self): + self._train_mode = False diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index c05e5fb9e3a46e721c20fd9288b89009e32afcbe..535909d710eb352bfc90ba42cc3b1894491e9ede 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -411,6 +411,7 @@ class Variable(object): if persistable else False) if persistable: _dygraph_tracer().trace_var(name, self) + self.op = None else: self.error_clip = error_clip @@ -939,24 +940,7 @@ class Operator(object): raise ValueError( "`type` to initialized an Operator can not be None.") self.iop = core.OpBase(type) - - # TODO(minqiyang): remove these lines after we take apart all - # backward grads and forward variables - self.inputs = defaultdict(list) - if inputs is not None: - for k, v in six.iteritems(inputs): - if isinstance(v, Variable): - self.inputs[k].append(v._ivar) - elif isinstance(v, list) or isinstance(v, tuple): - self.inputs[k].extend([var._ivar for var in v]) - - self.outputs = defaultdict(list) - if outputs is not None: - for k, v in six.iteritems(outputs): - if isinstance(v, Variable): - self.outputs[k].append(v._ivar) - elif isinstance(v, list) or isinstance(v, tuple): - self.outputs[k].extend([var._ivar for var in v]) + self.previous_ops = [] self.attrs = attrs if attrs else {} else: @@ -1647,15 +1631,18 @@ class Block(object): block=self, desc=None, type=kwargs.get("type", None), - inputs=kwargs.get("inputs", None), - outputs=kwargs.get("outputs", None), - attrs=kwargs.get("attrs", None)) + inputs=None, + outputs=None, + attrs=kwargs.get("attrs", {})) # record ops in tracer rather than blocks # # TODO(minqiyang): add op stop_gradient support in static mode too. # currently, we only support stop_gradient in dygraph mode. - _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False)) + _dygraph_tracer().trace_op(op, + kwargs.get("inputs", {}), + kwargs.get("outputs", {}), + kwargs.get("stop_gradient", False)) else: op_desc = self.desc.append_op() op = Operator( @@ -1719,10 +1706,14 @@ class Block(object): self, None, type=kwargs.get("type", None), - inputs=kwargs.get("inputs", None), - outputs=kwargs.get("outputs", None), - attrs=kwargs.get("attrs", None)) - _dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False)) + inputs=None, + outputs=None, + attrs=kwargs.get("attrs", {})) + + _dygraph_tracer().trace_op(op, + kwargs.get("inputs", {}), + kwargs.get("outputs", {}), + kwargs.get("stop_gradient", False)) else: op_desc = self.desc._prepend_op() op = Operator( diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index f8f461853f34a09eb2317f6ac93ad385cca3609f..2df63d723e6ce91d3819c5e4301b9d5682158d79 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -29,7 +29,8 @@ from functools import reduce __all__ = [ 'While', 'Switch', 'increment', 'array_write', 'create_array', 'less_than', - 'equal', 'array_read', 'array_length', 'IfElse', 'DynamicRNN', 'StaticRNN', + 'less_equal', 'greater_than', 'greater_equal', 'equal', 'not_equal', + 'array_read', 'array_length', 'IfElse', 'DynamicRNN', 'StaticRNN', 'reorder_lod_tensor_by_rank', 'Print', 'is_empty' ] @@ -189,6 +190,7 @@ def Print(input, 'print_tensor_lod': print_tensor_lod, 'print_phase': print_phase.upper() }) + return input class BlockGuard(object): @@ -971,6 +973,114 @@ def less_than(x, y, force_cpu=None, cond=None): return cond +@templatedoc() +def less_equal(x, y, cond=None): + """ + This layer returns the truth value of :math:`x <= y` elementwise, which is equivalent to the overloaded operator `<=`. + + Args: + x(Variable): First operand of *less_equal* + y(Variable): Second operand of *less_equal* + cond(Variable|None): Optional output variable to store the result of *less_equal* + + Returns: + Variable: The tensor variable storing the output of *less_equal*. + + Examples: + .. code-block:: python + + out = fluid.layers.less_equal(x=label, y=limit) + """ + helper = LayerHelper("less_equal", **locals()) + if cond is None: + cond = helper.create_variable_for_type_inference(dtype='bool') + cond.stop_gradient = True + + attrs = dict() + if force_init_on_cpu(): + attrs['force_cpu'] = force_init_on_cpu() + + helper.append_op( + type='less_equal', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [cond]}, + attrs=attrs) + return cond + + +@templatedoc() +def greater_than(x, y, cond=None): + """ + This layer returns the truth value of :math:`x > y` elementwise, which is equivalent to the overloaded operator `>`. + + Args: + x(Variable): First operand of *greater_than* + y(Variable): Second operand of *greater_than* + cond(Variable|None): Optional output variable to store the result of *greater_than* + + Returns: + Variable: The tensor variable storing the output of *greater_than*. + + Examples: + .. code-block:: python + + out = fluid.layers.greater_than(x=label, y=limit) + """ + helper = LayerHelper("greater_than", **locals()) + if cond is None: + cond = helper.create_variable_for_type_inference(dtype='bool') + cond.stop_gradient = True + + attrs = dict() + if force_init_on_cpu(): + attrs['force_cpu'] = force_init_on_cpu() + + helper.append_op( + type='greater_than', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [cond]}, + attrs=attrs) + return cond + + +@templatedoc() +def greater_equal(x, y, cond=None): + """ + This layer returns the truth value of :math:`x >= y` elementwise, which is equivalent to the overloaded operator `>=`. + + Args: + x(Variable): First operand of *greater_equal* + y(Variable): Second operand of *greater_equal* + cond(Variable|None): Optional output variable to store the result of *greater_equal* + + Returns: + Variable: The tensor variable storing the output of *greater_equal*. + + Examples: + .. code-block:: python + + out = fluid.layers.greater_equal(x=label, y=limit) + """ + helper = LayerHelper("greater_equal", **locals()) + if cond is None: + cond = helper.create_variable_for_type_inference(dtype='bool') + cond.stop_gradient = True + + attrs = dict() + if force_init_on_cpu(): + attrs['force_cpu'] = force_init_on_cpu() + + helper.append_op( + type='greater_equal', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [cond]}, + attrs=attrs) + return cond + + def equal(x, y, cond=None): """ This layer returns the truth value of :math:`x == y` elementwise. @@ -999,6 +1109,34 @@ def equal(x, y, cond=None): return cond +def not_equal(x, y, cond=None): + """ + This layer returns the truth value of :math:`x != y` elementwise, which is equivalent to the overloader operator `!=`. + + Args: + x(Variable): First operand of *not_equal* + y(Variable): Second operand of *not_equal* + cond(Variable|None): Optional output variable to store the result of *not_equal* + + Returns: + Variable: The tensor variable storing the output of *not_equal*. + + Examples: + .. code-block:: python + + out = fluid.layers.not_equal(x=label, y=limit) + """ + helper = LayerHelper("not_equal", **locals()) + if cond is None: + cond = helper.create_variable_for_type_inference(dtype='bool') + cond.stop_gradient = True + + helper.append_op( + type='not_equal', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [cond]}) + return cond + + def array_read(array, i): """ This function performs the operation to read the data in as an diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 0a1ddbc1dba51692e75fa76856dd689b77ab9f35..09a573e8381e7402f6f617e6045110a93ba4f589 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -509,14 +509,14 @@ def polygon_box_transform(input, name=None): @templatedoc(op_type="yolov3_loss") def yolov3_loss(x, - gtbox, - gtlabel, + gt_box, + gt_label, anchors, anchor_mask, class_num, ignore_thresh, downsample_ratio, - gtscore=None, + gt_score=None, use_label_smooth=True, name=None): """ @@ -524,12 +524,12 @@ def yolov3_loss(x, Args: x (Variable): ${x_comment} - gtbox (Variable): groud truth boxes, should be in shape of [N, B, 4], + gt_box (Variable): groud truth boxes, should be in shape of [N, B, 4], in the third dimenstion, x, y, w, h should be stored and x, y, w, h should be relative value of input image. N is the batch number and B is the max box number in an image. - gtlabel (Variable): class id of ground truth boxes, shoud be in shape + gt_label (Variable): class id of ground truth boxes, shoud be in shape of [N, B]. anchors (list|tuple): ${anchors_comment} anchor_mask (list|tuple): ${anchor_mask_comment} @@ -537,7 +537,7 @@ def yolov3_loss(x, ignore_thresh (float): ${ignore_thresh_comment} downsample_ratio (int): ${downsample_ratio_comment} name (string): the name of yolov3 loss. Default None. - gtscore (Variable): mixup score of ground truth boxes, shoud be in shape + gt_score (Variable): mixup score of ground truth boxes, shoud be in shape of [N, B]. Default None. use_label_smooth (bool): ${use_label_smooth_comment} @@ -558,13 +558,13 @@ def yolov3_loss(x, .. code-block:: python x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') - gtbox = fluid.layers.data(name='gtbox', shape=[6, 4], dtype='float32') - gtlabel = fluid.layers.data(name='gtlabel', shape=[6], dtype='int32') - gtscore = fluid.layers.data(name='gtscore', shape=[6], dtype='float32') + gt_box = fluid.layers.data(name='gt_box', shape=[6, 4], dtype='float32') + gt_label = fluid.layers.data(name='gt_label', shape=[6], dtype='int32') + gt_score = fluid.layers.data(name='gt_score', shape=[6], dtype='float32') anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] anchor_mask = [0, 1, 2] - loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, - gtscore=gtscore, anchors=anchors, + loss = fluid.layers.yolov3_loss(x=x, gt_box=gt_box, gt_label=gt_label, + gt_score=gt_score, anchors=anchors, anchor_mask=anchor_mask, class_num=80, ignore_thresh=0.7, downsample_ratio=32) """ @@ -572,11 +572,11 @@ def yolov3_loss(x, if not isinstance(x, Variable): raise TypeError("Input x of yolov3_loss must be Variable") - if not isinstance(gtbox, Variable): + if not isinstance(gt_box, Variable): raise TypeError("Input gtbox of yolov3_loss must be Variable") - if not isinstance(gtlabel, Variable): + if not isinstance(gt_label, Variable): raise TypeError("Input gtlabel of yolov3_loss must be Variable") - if gtscore is not None and not isinstance(gtscore, Variable): + if gt_score is not None and not isinstance(gt_score, Variable): raise TypeError("Input gtscore of yolov3_loss must be Variable") if not isinstance(anchors, list) and not isinstance(anchors, tuple): raise TypeError("Attr anchors of yolov3_loss must be list or tuple") @@ -602,11 +602,11 @@ def yolov3_loss(x, inputs = { "X": x, - "GTBox": gtbox, - "GTLabel": gtlabel, + "GTBox": gt_box, + "GTLabel": gt_label, } - if gtscore: - inputs["GTScore"] = gtscore + if gt_score: + inputs["GTScore"] = gt_score attrs = { "anchors": anchors, @@ -1542,7 +1542,7 @@ def multi_box_head(inputs, .. code-block:: python mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head( - inputs=[conv1, conv2, conv3, conv4, conv5, conv5], + inputs=[conv1, conv2, conv3, conv4, conv5, conv6], image=images, num_classes=21, min_ratio=20, diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e0e32bb6737c1bdde8bee2708afdebb186dc18f6..2bac9dd9a46b1b291d7ee39876f7b60d2d5e298b 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5721,12 +5721,21 @@ def hsigmoid(input, raise ValueError( "num_classes must not be less than 2 with default tree") + if (not is_custom) and (is_sparse): + print("Sparse mode should not be used without custom tree") + is_sparse = False + + if (not is_custom) and ((path_table is not None) or + (path_code is not None)): + raise ValueError( + "only num_classes should be passed without custom tree") + if (is_custom) and (path_code is None): - raise ValueError("path_code should not be None with costum tree") + raise ValueError("path_code should not be None with custom tree") elif (is_custom) and (path_table is None): - raise ValueError("path_table should not be None with costum tree") + raise ValueError("path_table should not be None with custom tree") elif (is_custom) and (num_classes is None): - raise ValueError("num_classes should not be None with costum tree") + raise ValueError("num_classes should not be None with custom tree") else: pass @@ -6269,6 +6278,8 @@ def sampled_softmax_with_cross_entropy(logits, sampled_label = helper.create_variable_for_type_inference(dtype='int64') sampled_softlabel = helper.create_variable_for_type_inference( dtype=logits.dtype) + logits_dim = helper.create_variable_for_type_inference(dtype=logits.dtype) + labels_dim = helper.create_variable_for_type_inference(dtype=label.type) helper.append_op( type='sample_logits', @@ -6282,7 +6293,9 @@ def sampled_softmax_with_cross_entropy(logits, 'Samples': samples, 'Probabilities': probabilities, 'SampledLabels': sampled_label, - 'SampledLogits': sampled_logits + 'SampledLogits': sampled_logits, + 'LogitsDim': logits_dim, + 'LabelsDim': labels_dim }, attrs={ 'use_customized_samples': use_customized_samples, diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index f018bb8af8cc9f7ed965c86d5aff40352014c393..f06c0abaf901950d072cf857696bc7479fb7b52f 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -27,6 +27,7 @@ __activations_noattr__ = [ 'tanh_shrink', 'softshrink', 'sqrt', + 'rsqrt', 'abs', 'ceil', 'floor', @@ -81,8 +82,8 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0): Examples: .. code-block:: python - - result = fluid.layers.uniform_random(shape=[32, 784]) + + result = fluid.layers.uniform_random(shape=[32, 784]) """ locals_var = locals().keys() diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 03ebd41fa00c69bfce66d325e32fc9aeb25a2486..d1681580bebc454d26be518180b649bfb3c76e4e 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -28,7 +28,7 @@ __all__ = [ 'tensor_array_to_tensor', 'concat', 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite', - 'range', 'linspace' + 'range', 'linspace', 'zeros_like' ] @@ -853,3 +853,34 @@ def linspace(start, stop, num, dtype): 'Num': num}, outputs={'Out': [out]}) return out + + +def zeros_like(x, out=None): + """ + **zeros_like** + + This function creates a zeros tensor which has identical shape and dtype + with `x`. + + Args: + x(Variable): The input tensor which specifies shape and dtype. + out(Variable): The output tensor. + + Returns: + Variable: The tensor variable storing the output. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', dtype='float32', shape=[3], append_batch_size=False) + data = fluid.layers.zeros_like(x) # [0.0, 0.0, 0.0] + + """ + + helper = LayerHelper("zeros_like", **locals()) + if out is None: + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='fill_zeros_like', inputs={'X': [x]}, outputs={'Out': [out]}) + out.stop_gradient = True + return out diff --git a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py index 48c0f3d3611547308b5d4460748d3aab765f5805..6f24ec45aa6f27814e489b8dce49fe69f62d4f10 100644 --- a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py +++ b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py @@ -88,3 +88,19 @@ def train10(batch_size=None): paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch', batch_size=batch_size) + + +def test10(batch_size=None): + """ + CIFAR-10 test set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ + return reader_creator( + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'test_batch', + batch_size=batch_size) diff --git a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py index 82294d4b26fe64e6cddc81f9ba3480caf5b51620..0a27aa0fcfece36f1a8ae5ad0477d75a15fd88da 100644 --- a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py @@ -89,9 +89,11 @@ def train(use_cuda, train_program, parallel, params_dirname): cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10), batch_size=BATCH_SIZE, drop_last=False) - + # Use only part of the test set data validation program test_reader = paddle.batch( - paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) + cifar10_small_test_set.test10(BATCH_SIZE), + batch_size=BATCH_SIZE, + drop_last=False) def event_handler(event): if isinstance(event, EndStepEvent): diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 7d1b869cf5991dc5ef960ff4d72289979aae158a..e1c4c2eca08d4652ecda8e2579d342818c803f4a 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -474,17 +474,17 @@ class TestYoloDetection(unittest.TestCase): program = Program() with program_guard(program): x = layers.data(name='x', shape=[30, 7, 7], dtype='float32') - gtbox = layers.data(name='gtbox', shape=[10, 4], dtype='float32') - gtlabel = layers.data(name='gtlabel', shape=[10], dtype='int32') - gtscore = layers.data(name='gtscore', shape=[10], dtype='float32') + gt_box = layers.data(name='gt_box', shape=[10, 4], dtype='float32') + gt_label = layers.data(name='gt_label', shape=[10], dtype='int32') + gt_score = layers.data(name='gt_score', shape=[10], dtype='float32') loss = layers.yolov3_loss( x, - gtbox, - gtlabel, [10, 13, 30, 13], [0, 1], + gt_box, + gt_label, [10, 13, 30, 13], [0, 1], 10, 0.7, 32, - gtscore=gtscore, + gt_score=gt_score, use_label_smooth=False) self.assertIsNotNone(loss) diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index d587715d607c6da16da5c009db16322e8cd7d176..4d66b7a989732e37c48c73b9617943874ad07bba 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -192,6 +192,23 @@ class TestSqrt(TestActivation): self.check_grad(['X'], 'Out', max_relative_error=0.007) +class TestRsqrt(TestActivation): + def setUp(self): + self.op_type = "rsqrt" + self.init_dtype() + + x = np.random.uniform(0.1, 1, [2, 3]).astype(self.dtype) + out = 1.0 / np.sqrt(x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + def test_check_grad(self): + if self.dtype == np.float16: + return + self.check_grad(['X'], 'Out', max_relative_error=0.0005) + + class TestAbs(TestActivation): def setUp(self): self.op_type = "abs" diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py new file mode 100644 index 0000000000000000000000000000000000000000..8b8fdcc887beb4879b2ce1101184dabe6f819acf --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py @@ -0,0 +1,201 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import SGDOptimizer +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.base import to_variable +from test_imperative_base import new_program_scope + + +class SimpleImgConvPool(fluid.dygraph.Layer): + def __init__(self, + name_scope, + num_channels, + num_filters, + filter_size, + pool_size, + pool_stride, + pool_padding=0, + pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + act=None, + use_cudnn=False, + param_attr=None, + bias_attr=None): + super(SimpleImgConvPool, self).__init__(name_scope) + + self._conv2d = Conv2D( + self.full_name(), + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D( + self.full_name(), + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) + + def forward(self, inputs): + x = self._conv2d(inputs) + x = self._pool2d(x) + return x + + +class MNIST(fluid.dygraph.Layer): + def __init__(self, name_scope): + super(MNIST, self).__init__(name_scope) + + self._simple_img_conv_pool_1 = SimpleImgConvPool( + self.full_name(), 1, 20, 5, 2, 2, act="relu") + + self._simple_img_conv_pool_2 = SimpleImgConvPool( + self.full_name(), 20, 50, 5, 2, 2, act="relu") + + pool_2_shape = 50 * 4 * 4 + SIZE = 10 + scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 + self._fc = FC(self.full_name(), + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") + + def forward(self, inputs): + x = self._simple_img_conv_pool_1(inputs) + x = self._simple_img_conv_pool_2(x) + x = self._fc(x) + return x + + +class TestDygraphMultiForward(unittest.TestCase): + def test_mnist_forward_float32(self): + seed = 90 + epoch_num = 1 + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + dy_param_init_value = {} + mnist.eval() + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label.stop_gradient = True + + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + dy_out = avg_loss.numpy() + + if epoch == 0 and batch_id == 0: + for param in mnist.parameters(): + dy_param_init_value[param.name] = param.numpy() + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + img = fluid.layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in mnist.parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_out = out[0] + + self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py index 8960cbcdd2f574a647229894c44c2b6ea188b7d4..b1851f4c78ddf984b06cf67f628099d5b60c771e 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -65,7 +65,9 @@ class ModelHyperParams(object): # number of head used in multi-head attention. n_head = 8 # number of sub-layers to be stacked in the encoder and decoder. - n_layer = 6 + # NOTE(zcd): the origin number of layer is 6, to make this unit test faster, + # we should reduce the layer number to 4. + n_layer = 4 # dropout rate used by all dropout layers. dropout = 0.1 diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py index ae1883f1f7e44e06e378ff6d16dbc3c5060027e4..ec10b634091fc521062457b780b0c4cafcbacec0 100644 --- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py +++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py @@ -149,5 +149,98 @@ class TestSigmoidCrossEntropyWithNorm(OpTest): self.check_grad(['X'], 'Out') +class TestSigmoidCrossEntropyWithLogitsOp5(OpTest): + """Test sigmoid_cross_entropy_with_logit_op with probabalistic label + """ + + def setUp(self): + self.op_type = "sigmoid_cross_entropy_with_logits" + batch_size = [10, 10] + num_classes = 20 + self.inputs = { + 'X': logit( + np.random.uniform(0, 1, tuple(batch_size + [num_classes])) + .astype("float32")), + 'Label': np.random.uniform(0, 1, tuple(batch_size + [num_classes])) + .astype("float32") + } + + # Fw Pass is implemented as elementwise sigmoid followed by + # elementwise logistic loss + # Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X)) + sigmoid_X = expit(self.inputs['X']) + term1 = self.inputs['Label'] * np.log(sigmoid_X) + term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X) + self.outputs = {'Out': -term1 - term2} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestSigmoidCrossEntropyWithNorm2(OpTest): + def setUp(self): + self.op_type = "sigmoid_cross_entropy_with_logits" + batch_size = [10, 10] + num_classes = 20 + ignore_index = -1 + self.inputs = { + 'X': logit( + np.random.uniform(0, 1, tuple(batch_size + [num_classes])) + .astype("float32")), + 'Label': np.random.randint(-1, 2, tuple(batch_size + [num_classes])) + .astype("float32") + } + self.attrs = {'ignore_index': ignore_index, 'normalize': True} + sigmoid_X = expit(self.inputs['X']) + term1 = self.inputs['Label'] * np.log(sigmoid_X) + term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X) + out = -term1 - term2 + out[np.where(self.inputs['Label'] == ignore_index)] = 0 + if self.attrs['normalize']: + out = out / float( + np.where(self.inputs['Label'] != ignore_index)[0].size) + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestSigmoidCrossEntropyWithLogitsOp6(OpTest): + """Test sigmoid_cross_entropy_with_logit_op with binary label + """ + + def setUp(self): + self.op_type = "sigmoid_cross_entropy_with_logits" + batch_size = [10, 10] + num_classes = 20 + self.inputs = { + 'X': logit( + np.random.uniform(0, 1, tuple(batch_size + [num_classes])) + .astype("float32")), + 'Label': np.random.randint(0, 2, tuple(batch_size + [num_classes])) + .astype("float32") + } + + # Fw Pass is implemented as elementwise sigmoid followed by + # elementwise logistic loss + # Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X)) + sigmoid_X = expit(self.inputs['X']) + term1 = self.inputs['Label'] * np.log(sigmoid_X) + term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X) + self.outputs = {'Out': -term1 - term2} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + if __name__ == '__main__': unittest.main()