提交 742d7587 编写于 作者: X xuezhong

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_infershape_bug2

...@@ -277,6 +277,7 @@ paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, de ...@@ -277,6 +277,7 @@ paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, de
paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292')) paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb')) paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb'))
paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756')) paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756'))
paddle.fluid.layers.zeros_like (ArgSpec(args=['x', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c7e4cfffc93ae89c8f6f53b6d650f923'))
paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -286,7 +287,11 @@ paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs= ...@@ -286,7 +287,11 @@ paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=
paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713')) paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713'))
paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a')) paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f')) paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.less_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd6b173ae1a149e0bdfe7b8bf69285957'))
paddle.fluid.layers.greater_than (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '2c9bd414caa6c615539018d27001b44c'))
paddle.fluid.layers.greater_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '62c667d24e7b07e166b47a53b61b2ff4'))
paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77')) paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
paddle.fluid.layers.not_equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '56148fb1024687a08e96af79bdc5c929'))
paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', 'dd68bead34dfbaf6b0a163fc1cc3c385')) paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', 'dd68bead34dfbaf6b0a163fc1cc3c385'))
paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2')) paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -319,6 +324,7 @@ paddle.fluid.layers.atan (ArgSpec(args=['x', 'name'], varargs=None, keywords=Non ...@@ -319,6 +324,7 @@ paddle.fluid.layers.atan (ArgSpec(args=['x', 'name'], varargs=None, keywords=Non
paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7')) paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7'))
paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13')) paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13'))
paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9e27491c39ac74d0b1ffe506aec0ebb')) paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9e27491c39ac74d0b1ffe506aec0ebb'))
paddle.fluid.layers.rsqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c445467ebe58b3c0d7f0bba7795b6f56'))
paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd')) paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd'))
paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad')) paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad'))
paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973')) paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973'))
...@@ -331,13 +337,13 @@ paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywor ...@@ -331,13 +337,13 @@ paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywor
paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee')) paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee'))
paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a')) paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a'))
paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148')) paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148'))
paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641')) paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', 'a8c4e972b7d6742c838a37abf407ed9a'))
paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e')) paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e'))
paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926')) paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926'))
paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a')) paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a'))
paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8')) paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8'))
paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9')) paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9'))
paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd')) paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'fe9afaee481dd09f28866df22756466f'))
paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1')) paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1'))
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1'))
...@@ -352,7 +358,7 @@ paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', ...@@ -352,7 +358,7 @@ paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes',
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1')) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e')) paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b')) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gtscore', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '57fa96922e42db8f064c3fb77f2255e8')) paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '059021025283ad1ee6f4d32228cf3e4e'))
paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5566169a5ab993d177792c023c7fb340')) paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5566169a5ab993d177792c023c7fb340'))
paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e')) paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0')) paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
......
...@@ -116,7 +116,7 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) { ...@@ -116,7 +116,7 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) {
reinterpret_cast<const PaddlePredictor::Config*>(&analysis_config); reinterpret_cast<const PaddlePredictor::Config*>(&analysis_config);
auto native_pred = CreateTestPredictor(config, false); auto native_pred = CreateTestPredictor(config, false);
auto analysis_pred = CreateTestPredictor(config, true); auto analysis_pred = CreateTestPredictor(config, true);
for (int i = 0; i < 100; i++) { for (int i = 0; i < 20; i++) {
std::vector<std::vector<PaddleTensor>> inputs_all; std::vector<std::vector<PaddleTensor>> inputs_all;
if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) { if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
SetFakeImageInput(&inputs_all, model_dir, true, FLAGS_prog_filename, SetFakeImageInput(&inputs_all, model_dir, true, FLAGS_prog_filename,
...@@ -133,11 +133,13 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) { ...@@ -133,11 +133,13 @@ void compare_continuous_input(std::string model_dir, bool use_tensorrt) {
TEST(TensorRT_mobilenet, compare) { TEST(TensorRT_mobilenet, compare) {
std::string model_dir = FLAGS_infer_model + "/mobilenet"; std::string model_dir = FLAGS_infer_model + "/mobilenet";
compare(model_dir, /* use_tensorrt */ true); compare(model_dir, /* use_tensorrt */ true);
// Open it when need.
// profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
} }
TEST(TensorRT_resnet50, compare) { TEST(resnet50, compare_continuous_input) {
std::string model_dir = FLAGS_infer_model + "/resnet50"; std::string model_dir = FLAGS_infer_model + "/resnet50";
compare(model_dir, /* use_tensorrt */ true); compare_continuous_input(model_dir, true);
} }
TEST(TensorRT_resnext50, compare) { TEST(TensorRT_resnext50, compare) {
...@@ -145,24 +147,6 @@ TEST(TensorRT_resnext50, compare) { ...@@ -145,24 +147,6 @@ TEST(TensorRT_resnext50, compare) {
compare(model_dir, /* use_tensorrt */ true); compare(model_dir, /* use_tensorrt */ true);
} }
TEST(TensorRT_resnext50, profile) {
std::string model_dir = FLAGS_infer_model + "/resnext50";
// Set FLAGS_record_benchmark to true to record benchmark to file.
// FLAGS_record_benchmark=true;
FLAGS_model_name = "resnext50";
profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
}
TEST(resnext50, compare_analysis_native) {
std::string model_dir = FLAGS_infer_model + "/resnext50";
compare(model_dir, false /*use tensorrt*/);
}
TEST(TensorRT_mobilenet, analysis) {
std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
compare(model_dir, false /* use_tensorrt */);
}
TEST(AnalysisPredictor, use_gpu) { TEST(AnalysisPredictor, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
AnalysisConfig config; AnalysisConfig config;
...@@ -180,20 +164,5 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -180,20 +164,5 @@ TEST(AnalysisPredictor, use_gpu) {
} }
} }
TEST(TensorRT_mobilenet, profile) {
std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
profile(model_dir, true, false);
}
TEST(resnet50, compare_continuous_input) {
std::string model_dir = FLAGS_infer_model + "/resnet50";
compare_continuous_input(model_dir, true);
}
TEST(resnet50, compare_continuous_input_native) {
std::string model_dir = FLAGS_infer_model + "/resnet50";
compare_continuous_input(model_dir, false);
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
...@@ -18,7 +18,6 @@ gru ...@@ -18,7 +18,6 @@ gru
hierarchical_sigmoid hierarchical_sigmoid
lrn lrn
lstm_unit lstm_unit
lstmp
max_pool2d_with_index max_pool2d_with_index
max_pool3d_with_index max_pool3d_with_index
maxout maxout
......
...@@ -227,6 +227,15 @@ $out = \sqrt{x}$ ...@@ -227,6 +227,15 @@ $out = \sqrt{x}$
)DOC"; )DOC";
UNUSED constexpr char RsqrtDoc[] = R"DOC(
Rsqrt Activation Operator.
Please make sure input is legal in case of numeric errors.
$out = \frac{1}{\sqrt{x}}$
)DOC";
UNUSED constexpr char AbsDoc[] = R"DOC( UNUSED constexpr char AbsDoc[] = R"DOC(
Abs Activation Operator. Abs Activation Operator.
...@@ -575,6 +584,7 @@ REGISTER_ACTIVATION_OP_MAKER(Gelu, GeluDoc); ...@@ -575,6 +584,7 @@ REGISTER_ACTIVATION_OP_MAKER(Gelu, GeluDoc);
REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc);
REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc);
REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Rsqrt, RsqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc); REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc);
REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc); REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc);
REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc); REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc);
...@@ -586,6 +596,7 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); ...@@ -586,6 +596,7 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -511,6 +511,26 @@ struct SqrtGradFunctor : public BaseActivationFunctor<T> { ...@@ -511,6 +511,26 @@ struct SqrtGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
}; };
// rsqrt(x) = x^(-1/2)
template <typename T>
struct RsqrtFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.rsqrt();
}
};
template <typename T>
struct RsqrtGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(-0.5) * dout * out * out * out;
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
// ceil(x) = ceiling(x) // ceil(x) = ceiling(x)
template <typename T> template <typename T>
struct CeilFunctor : public BaseActivationFunctor<T> { struct CeilFunctor : public BaseActivationFunctor<T> {
...@@ -1191,6 +1211,7 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1191,6 +1211,7 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
__macro(atan, Atan, AtanFunctor, AtanGradFunctor); \ __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \
__macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); \
__macro(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor); \
__macro(abs, Abs, AbsFunctor, AbsGradFunctor); \ __macro(abs, Abs, AbsFunctor, AbsGradFunctor); \
__macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \ __macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \ __macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
......
...@@ -79,9 +79,13 @@ class AffineChannelOp : public framework::OperatorWithKernel { ...@@ -79,9 +79,13 @@ class AffineChannelOp : public framework::OperatorWithKernel {
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE_EQ(scale_dims.size(), 1UL); PADDLE_ENFORCE_EQ(scale_dims.size(), 1UL);
PADDLE_ENFORCE_EQ(scale_dims[0], C);
PADDLE_ENFORCE_EQ(b_dims.size(), 1UL); PADDLE_ENFORCE_EQ(b_dims.size(), 1UL);
PADDLE_ENFORCE_EQ(b_dims[0], C); if (ctx->IsRuntime() || scale_dims[0] > 0) {
PADDLE_ENFORCE_EQ(scale_dims[0], C);
}
if (ctx->IsRuntime() || b_dims[0] > 0) {
PADDLE_ENFORCE_EQ(b_dims[0], C);
}
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
......
...@@ -65,11 +65,22 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { ...@@ -65,11 +65,22 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const {
(data_layout == DataLayout::kNCHW ? x_dims[1] (data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); auto scale_dim = ctx->GetInputDim("Scale");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); auto bias_dim = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], C);
PADDLE_ENFORCE_EQ(scale_dim.size(), 1UL);
PADDLE_ENFORCE_EQ(scale_dim.size(), 1UL);
bool check = true;
if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 ||
framework::product(bias_dim) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[0], C);
PADDLE_ENFORCE_EQ(scale_dim[0], C);
}
ctx->SetOutputDim("Y", x_dims); ctx->SetOutputDim("Y", x_dims);
ctx->SetOutputDim("MeanOut", {C}); ctx->SetOutputDim("MeanOut", {C});
ctx->SetOutputDim("VarianceOut", {C}); ctx->SetOutputDim("VarianceOut", {C});
......
...@@ -49,7 +49,15 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,15 @@ class ConcatOp : public framework::OperatorWithKernel {
for (size_t i = 1; i < n; i++) { for (size_t i = 1; i < n; i++) {
for (size_t j = 0; j < in_zero_dims_size; j++) { for (size_t j = 0; j < in_zero_dims_size; j++) {
if (j == axis) { if (j == axis) {
out_dims[axis] += ins[i][j]; if (ctx->IsRuntime()) {
out_dims[axis] += ins[i][j];
} else {
if (ins[i][j] == -1) {
out_dims[axis] = -1;
} else {
out_dims[axis] += ins[i][j];
}
}
} else { } else {
if (ctx->IsRuntime()) { if (ctx->IsRuntime()) {
// check all shape in run time // check all shape in run time
......
...@@ -68,9 +68,14 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -68,9 +68,14 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], if ((!ctx->IsRuntime()) &&
dilations[i], paddings[i], (in_dims[i + 2] <= 0 || filter_dims[i + 2] <= 0)) {
strides[i])); output_shape.push_back(-1);
} else {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
}
} }
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Output"); ctx->ShareLoD("Input", "Output");
......
...@@ -36,14 +36,17 @@ class ConvShiftOp : public framework::OperatorWithKernel { ...@@ -36,14 +36,17 @@ class ConvShiftOp : public framework::OperatorWithKernel {
auto y_dims = ctx->GetInputDim("Y"); auto y_dims = ctx->GetInputDim("Y");
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2, "Input(Y)'s rank should be 2."); PADDLE_ENFORCE_EQ(y_dims.size(), 2, "Input(Y)'s rank should be 2.");
PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], if (ctx->IsRuntime() || (x_dims[0] > 0 && y_dims[0] > 0))
"The 1st dimension of Input(X) and Input(Y) should " PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
"be equal."); "The 1st dimension of Input(X) and Input(Y) should "
PADDLE_ENFORCE_EQ(y_dims[1] % 2, 1, "be equal.");
"The 2nd dimension of Input(Y) should be odd."); if (ctx->IsRuntime() || y_dims[1] > 0)
PADDLE_ENFORCE_LE(y_dims[1], x_dims[1], PADDLE_ENFORCE_EQ(y_dims[1] % 2, 1,
"The 2nd dimension of Input(Y) should be less than or " "The 2nd dimension of Input(Y) should be odd.");
"equal to the 2nd dimension of Input(X)."); if (ctx->IsRuntime() || (x_dims[1] > 0 && y_dims[1] > 0))
PADDLE_ENFORCE_LE(y_dims[1], x_dims[1],
"The 2nd dimension of Input(Y) should be less than or "
"equal to the 2nd dimension of Input(X).");
ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
......
...@@ -40,17 +40,27 @@ class CosSimOp : public framework::OperatorWithKernel { ...@@ -40,17 +40,27 @@ class CosSimOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y"); auto y_dims = ctx->GetInputDim("Y");
PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(), bool check = true;
"Ranks of Input(X) and Input(Y) must be equal."); if ((!ctx->IsRuntime()) &&
PADDLE_ENFORCE_GE(x_dims.size(), 2, (framework::product(x_dims) <= 0 || framework::product(y_dims) <= 0)) {
"Rank of Input(X) must not be less than 2."); check = false;
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 1, x_dims.size()), }
framework::slice_ddim(y_dims, 1, y_dims.size()),
"All dimensions except the 1st of Input(X) and Input(Y) " if (check) {
"must be equal."); PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(),
PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, "Ranks of Input(X) and Input(Y) must be equal.");
"The 1st dimension of Input(Y) must be equal to Input(X) or" PADDLE_ENFORCE_GE(x_dims.size(), 2,
" just 1 (which will be broadcasted to match Input(X))."); "Rank of Input(X) must not be less than 2.");
PADDLE_ENFORCE_EQ(
framework::slice_ddim(x_dims, 1, x_dims.size()),
framework::slice_ddim(y_dims, 1, y_dims.size()),
"All dimensions except the 1st of Input(X) and Input(Y) "
"must be equal.");
PADDLE_ENFORCE(
x_dims[0] == y_dims[0] || y_dims[0] == 1,
"The 1st dimension of Input(Y) must be equal to Input(X) or"
" just 1 (which will be broadcasted to match Input(X)).");
}
// resize tensor // resize tensor
ctx->SetOutputDim("Out", {x_dims[0], 1}); ctx->SetOutputDim("Out", {x_dims[0], 1});
......
...@@ -51,8 +51,10 @@ class DetectionMAPOp : public framework::OperatorWithKernel { ...@@ -51,8 +51,10 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(label_dims.size(), 2, PADDLE_ENFORCE_EQ(label_dims.size(), 2,
"The rank of Input(Label) must be 2, " "The rank of Input(Label) must be 2, "
"the shape is [N, 6]."); "the shape is [N, 6].");
PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5, if (ctx->IsRuntime() || label_dims[1] > 0) {
"The shape of Input(Label) is [N, 6] or [N, 5]."); PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5,
"The shape of Input(Label) is [N, 6] or [N, 5].");
}
if (ctx->HasInput("PosCount")) { if (ctx->HasInput("PosCount")) {
PADDLE_ENFORCE(ctx->HasInput("TruePos"), PADDLE_ENFORCE(ctx->HasInput("TruePos"),
......
...@@ -31,14 +31,16 @@ class SplitByrefOp : public framework::OperatorWithKernel { ...@@ -31,14 +31,16 @@ class SplitByrefOp : public framework::OperatorWithKernel {
auto in_dims = ctx->GetInputDim("X"); auto in_dims = ctx->GetInputDim("X");
auto outs_names = ctx->Outputs("Out"); auto outs_names = ctx->Outputs("Out");
size_t num = static_cast<size_t>(ctx->Attrs().Get<int>("num")); size_t num = static_cast<size_t>(ctx->Attrs().Get<int>("num"));
std::vector<int> sections = static_cast<std::vector<int>>( auto sections = ctx->Attrs().Get<std::vector<int>>("sections");
ctx->Attrs().Get<std::vector<int>>("sections"));
const size_t outs_number = outs_names.size(); const size_t outs_number = outs_names.size();
std::vector<framework::DDim> outs_dims; std::vector<framework::DDim> outs_dims;
outs_dims.reserve(outs_number); outs_dims.reserve(outs_number);
if (num > 0) { if (num > 0) {
int64_t in_axis_dim = in_dims[0]; int64_t in_axis_dim = 0;
if (ctx->IsRuntime()) {
in_axis_dim = in_dims[0];
}
PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, PADDLE_ENFORCE_EQ(in_axis_dim % num, 0,
"tensor split does not result" "tensor split does not result"
" in an equal division"); " in an equal division");
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/grid_sampler_op.h" #include "paddle/fluid/operators/grid_sampler_op.h"
#include <memory>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
...@@ -40,10 +41,12 @@ class GridSampleOp : public framework::OperatorWithKernel { ...@@ -40,10 +41,12 @@ class GridSampleOp : public framework::OperatorWithKernel {
"Input(X) of GridSampleOp should be 4-D Tensor."); "Input(X) of GridSampleOp should be 4-D Tensor.");
PADDLE_ENFORCE(grid_dims.size() == 4, PADDLE_ENFORCE(grid_dims.size() == 4,
"Input(Grid) of GridSampleOp should be 4-D Tensor."); "Input(Grid) of GridSampleOp should be 4-D Tensor.");
PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2."); if (ctx->IsRuntime() || grid_dims[3] > 0) {
PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0], PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2.");
"Input(X) and Input(Grid) dims[0] should be equal."); }
if (ctx->IsRuntime()) { if (ctx->IsRuntime()) {
PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0],
"Input(X) and Input(Grid) dims[0] should be equal.");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
grid_dims[1], x_dims[2], grid_dims[1], x_dims[2],
"Input(X) dims[2] and Input(Grid) dims[1] should be equal."); "Input(X) dims[2] and Input(Grid) dims[1] should be equal.");
......
...@@ -238,6 +238,8 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> { ...@@ -238,6 +238,8 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
zero(dev_ctx, w_grad, static_cast<T>(0.0)); zero(dev_ctx, w_grad, static_cast<T>(0.0));
bit_code->MulGradWeight(pre_out_grad, w_grad, in); bit_code->MulGradWeight(pre_out_grad, w_grad, in);
} else { } else {
PADDLE_ENFORCE(path != nullptr,
"Sparse mode should not be used without custom tree!");
framework::Vector<int64_t> real_rows = PathToRows(*path); framework::Vector<int64_t> real_rows = PathToRows(*path);
auto* w_grad = auto* w_grad =
ctx.Output<framework::SelectedRows>(framework::GradVarName("W")); ctx.Output<framework::SelectedRows>(framework::GradVarName("W"));
......
...@@ -45,9 +45,14 @@ class InterpolateOp : public framework::OperatorWithKernel { ...@@ -45,9 +45,14 @@ class InterpolateOp : public framework::OperatorWithKernel {
// round down // round down
out_h = static_cast<int>(dim_x[2] * scale); out_h = static_cast<int>(dim_x[2] * scale);
out_w = static_cast<int>(dim_x[3] * scale); out_w = static_cast<int>(dim_x[3] * scale);
// protect when input shape is -1
out_h = out_h > 0 ? out_h : -1;
out_w = out_w > 0 ? out_w : -1;
} else { } else {
out_h = ctx->Attrs().Get<int>("out_h"); out_h = ctx->Attrs().Get<int>("out_h");
out_w = ctx->Attrs().Get<int>("out_w"); out_w = ctx->Attrs().Get<int>("out_w");
PADDLE_ENFORCE_GT(out_h, 0, "out_h should be greater than 0.");
PADDLE_ENFORCE_GT(out_w, 0, "out_w should be greater than 0.");
} }
if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { if (ctx->HasInput("OutSize") && ctx->IsRuntime()) {
...@@ -58,6 +63,7 @@ class InterpolateOp : public framework::OperatorWithKernel { ...@@ -58,6 +63,7 @@ class InterpolateOp : public framework::OperatorWithKernel {
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
return; return;
} }
std::vector<int64_t> dim_out({dim_x[0], dim_x[1], out_h, out_w}); std::vector<int64_t> dim_out({dim_x[0], dim_x[1], out_h, out_w});
ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); ctx->SetOutputDim("Out", framework::make_ddim(dim_out));
} }
......
...@@ -35,8 +35,10 @@ class KLDivLossOp : public framework::OperatorWithKernel { ...@@ -35,8 +35,10 @@ class KLDivLossOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(), PADDLE_ENFORCE_EQ(dim_x.size(), dim_target.size(),
"Input(X) rank and Input(Target) rank should be same."); "Input(X) rank and Input(Target) rank should be same.");
for (int i = 0; i < dim_x.size(); i++) { for (int i = 0; i < dim_x.size(); i++) {
PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i], if (ctx->IsRuntime() || (dim_x[i] > 0 && dim_target[i] > 0)) {
"Input(X) and Input(Target) should in same shape."); PADDLE_ENFORCE_EQ(dim_x[i], dim_target[i],
"Input(X) and Input(Target) should in same shape.");
}
} }
auto reduction = ctx->Attrs().Get<std::string>("reduction"); auto reduction = ctx->Attrs().Get<std::string>("reduction");
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lstmp_op.h" #include "paddle/fluid/operators/lstmp_op.h"
#include <memory>
#include <string> #include <string>
namespace paddle { namespace paddle {
...@@ -45,6 +46,7 @@ class LSTMPOp : public framework::OperatorWithKernel { ...@@ -45,6 +46,7 @@ class LSTMPOp : public framework::OperatorWithKernel {
"Output(BatchHidden) of LSTMP operator should not be null."); "Output(BatchHidden) of LSTMP operator should not be null.");
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
PADDLE_ENFORCE_EQ(in_dims.size(), 2, PADDLE_ENFORCE_EQ(in_dims.size(), 2,
"Input(X)'s rank of LSTMP operator must be 2."); "Input(X)'s rank of LSTMP operator must be 2.");
...@@ -269,13 +271,47 @@ Users can choose to use fully-connected operator before LSTMP operator. ...@@ -269,13 +271,47 @@ Users can choose to use fully-connected operator before LSTMP operator.
} }
}; };
class LSTMPGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDesc();
grad_op->SetType("lstmp_grad");
grad_op->SetInput("Weight", Input("Weight"));
grad_op->SetInput("ProjWeight", Input("ProjWeight"));
grad_op->SetInput("Bias", Input("Bias"));
grad_op->SetInput("Projection", Output("Projection"));
grad_op->SetInput("Cell", Output("Cell"));
grad_op->SetInput("BatchGate", Output("BatchGate"));
grad_op->SetInput("BatchCellPreAct", Output("BatchCellPreAct"));
grad_op->SetInput("BatchHidden", Output("BatchHidden"));
grad_op->SetInput("H0", Input("H0"));
grad_op->SetInput("C0", Input("C0"));
grad_op->SetInput(framework::GradVarName("Projection"),
OutputGrad("Projection"));
grad_op->SetOutput(framework::GradVarName("Input"), InputGrad("Input"));
grad_op->SetOutput(framework::GradVarName("Weight"), InputGrad("Weight"));
grad_op->SetOutput(framework::GradVarName("ProjWeight"),
InputGrad("ProjWeight"));
grad_op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias"));
grad_op->SetOutput(framework::GradVarName("H0"), InputGrad("H0"));
grad_op->SetOutput(framework::GradVarName("C0"), InputGrad("C0"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
class LSTMPGradOp : public framework::OperatorWithKernel { class LSTMPGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Projection"), PADDLE_ENFORCE(ctx->HasInput("Projection"),
"Input(Projection) of LSTMP operator should not be null."); "Input(Projection) of LSTMP operator should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Cell"), PADDLE_ENFORCE(ctx->HasInput("Cell"),
...@@ -298,7 +334,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel { ...@@ -298,7 +334,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
ctx->SetOutputDim(g_name, ctx->GetInputDim(name)); ctx->SetOutputDim(g_name, ctx->GetInputDim(name));
}; };
SetOutGradDim("Input"); ctx->SetOutputDim(framework::GradVarName("Input"),
ctx->GetInputDim("BatchGate"));
SetOutGradDim("Weight"); SetOutGradDim("Weight");
SetOutGradDim("ProjWeight"); SetOutGradDim("ProjWeight");
SetOutGradDim("Bias"); SetOutGradDim("Bias");
...@@ -310,7 +347,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel { ...@@ -310,7 +347,8 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
ctx.Input<framework::LoDTensor>("Input")->type(), ctx.device_context()); ctx.Input<framework::LoDTensor>("BatchGate")->type(),
ctx.device_context());
} }
}; };
...@@ -318,8 +356,7 @@ class LSTMPGradOp : public framework::OperatorWithKernel { ...@@ -318,8 +356,7 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, REGISTER_OPERATOR(lstmp, ops::LSTMPOp, ops::LSTMPOpMaker, ops::LSTMPGradMaker);
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp); REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
lstmp, ops::LSTMPKernel<paddle::platform::CPUDeviceContext, float>, lstmp, ops::LSTMPKernel<paddle::platform::CPUDeviceContext, float>,
......
...@@ -267,7 +267,6 @@ class LSTMPGradKernel : public framework::OpKernel<T> { ...@@ -267,7 +267,6 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
} }
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<LoDTensor>("Input");
auto* weight = ctx.Input<Tensor>("Weight"); auto* weight = ctx.Input<Tensor>("Weight");
auto* proj_weight = ctx.Input<Tensor>("ProjWeight"); auto* proj_weight = ctx.Input<Tensor>("ProjWeight");
auto* bias = ctx.Input<Tensor>("Bias"); auto* bias = ctx.Input<Tensor>("Bias");
...@@ -323,7 +322,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> { ...@@ -323,7 +322,8 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
ordered_c0_g.mutable_data<T>(c0_g->dims(), ctx.GetPlace()); ordered_c0_g.mutable_data<T>(c0_g->dims(), ctx.GetPlace());
} }
auto in_dims = input->dims(); // batch_gate dims equal to input dims
auto in_dims = batch_gate->dims();
auto out_dims = cell_out->dims(); auto out_dims = cell_out->dims();
framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]}); framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]});
int frame_size = static_cast<int>(in_dims[1] / 4); int frame_size = static_cast<int>(in_dims[1] / 4);
......
...@@ -164,7 +164,9 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { ...@@ -164,7 +164,9 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase {
auto mask_dim = context->GetInputDim("Mask"); auto mask_dim = context->GetInputDim("Mask");
PADDLE_ENFORCE_EQ(mask_dim.size(), 2); PADDLE_ENFORCE_EQ(mask_dim.size(), 2);
PADDLE_ENFORCE_EQ(mask_dim[1], 1); if (context->IsRuntime() || mask_dim[1] > 0) {
PADDLE_ENFORCE_EQ(mask_dim[1], 1);
}
context->SetOutputDim("Out", context->GetInputDim("InTrue")); context->SetOutputDim("Out", context->GetInputDim("InTrue"));
} }
......
...@@ -39,13 +39,9 @@ struct bn_type_traits { ...@@ -39,13 +39,9 @@ struct bn_type_traits {
class BatchNormMKLDNNHandler : public platform::MKLDNNHandler { class BatchNormMKLDNNHandler : public platform::MKLDNNHandler {
public: public:
BatchNormMKLDNNHandler( BatchNormMKLDNNHandler(const platform::MKLDNNDeviceContext &dev_ctx,
std::shared_ptr<batch_norm_fwd::primitive_desc> batch_norm_pd, mkldnn::engine engine, const std::string &base_key)
const platform::MKLDNNDeviceContext &dev_ctx, mkldnn::engine engine, : platform::MKLDNNHandler(dev_ctx, engine, base_key) {}
const std::string &base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key) {
batch_norm_pd_ = batch_norm_pd;
}
std::shared_ptr<memory> AcquireScaleshiftMemoryFromPrimitive(void *ptr) { std::shared_ptr<memory> AcquireScaleshiftMemoryFromPrimitive(void *ptr) {
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(
...@@ -62,6 +58,26 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandler { ...@@ -62,6 +58,26 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandler {
batch_norm_pd_->variance_primitive_desc(), ptr, "@variance_mem_p"); batch_norm_pd_->variance_primitive_desc(), ptr, "@variance_mem_p");
} }
std::shared_ptr<batch_norm_fwd::primitive_desc>
AcquireBatchNormPrimitiveDescriptor(const batch_norm_fwd::desc &bn_fwd_desc,
const mkldnn::engine &engine) {
const std::string key_batch_norm_fwd_pd = key_ + "@bn_fwd_pd";
auto batch_norm_pd =
std::static_pointer_cast<batch_norm_fwd::primitive_desc>(
dev_ctx_.GetBlob(key_batch_norm_fwd_pd));
if (batch_norm_pd == nullptr) {
batch_norm_pd_.reset(
new batch_norm_fwd::primitive_desc(bn_fwd_desc, engine));
dev_ctx_.SetBlob(key_batch_norm_fwd_pd, batch_norm_pd_);
} else {
batch_norm_pd_ = batch_norm_pd;
is_reusing_ = true;
}
return batch_norm_pd_;
}
std::shared_ptr<batch_norm_fwd> AcquireTestTrainingBatchNormFwd( std::shared_ptr<batch_norm_fwd> AcquireTestTrainingBatchNormFwd(
std::shared_ptr<memory> src_memory, std::shared_ptr<memory> src_memory,
std::shared_ptr<memory> scaleshift_memory, std::shared_ptr<memory> scaleshift_memory,
...@@ -213,7 +229,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -213,7 +229,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const std::string key = BatchNormMKLDNNHandler::GetHash( const std::string key = BatchNormMKLDNNHandler::GetHash(
src_tz, epsilon, flags, global_stats, input_format, src_tz, epsilon, flags, global_stats, input_format,
ctx.op().Output("SavedMean")); ctx.op().Output("SavedMean"));
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; BatchNormMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
auto user_src_md = platform::MKLDNNMemDesc( auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, platform::MKLDNNGetDataType<T>(), input_format); {src_tz}, platform::MKLDNNGetDataType<T>(), input_format);
...@@ -222,13 +238,9 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -222,13 +238,9 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>; using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
auto batch_norm_fwd_desc = auto batch_norm_fwd_desc =
bn_fwd_types::op_desc{propagation, user_src_md, epsilon, flags}; bn_fwd_types::op_desc{propagation, user_src_md, epsilon, flags};
auto batch_norm_fwd_pd = std::make_shared<batch_norm_fwd::primitive_desc>(
batch_norm_fwd_desc, mkldnn_engine);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_batch_norm_fwd_pd, batch_norm_fwd_pd);
BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine, auto batch_norm_fwd_pd = handler.AcquireBatchNormPrimitiveDescriptor(
key); batch_norm_fwd_desc, mkldnn_engine);
auto src_memory = auto src_memory =
handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data)); handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data));
......
...@@ -144,7 +144,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -144,7 +144,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const std::string key = platform::ConvMKLDNNHandler::GetHash( const std::string key = platform::ConvMKLDNNHandler::GetHash(
src_tz, weights_tz, strides, paddings, dilations, groups, src_tz, weights_tz, strides, paddings, dilations, groups,
ctx.op().Input("Input") + ctx.op().Input("Filter")); ctx.op().Input("Input") + ctx.op().Input("Filter"));
const std::string key_conv_pd = key + "@conv_pd";
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
...@@ -183,6 +182,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -183,6 +182,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto dst_md = platform::MKLDNNMemDesc( auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format); dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
platform::ConvMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
// create a conv primitive descriptor and save it for usage in backward // create a conv primitive descriptor and save it for usage in backward
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd; std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd;
auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference
...@@ -191,18 +192,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -191,18 +192,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias_tz = paddle::framework::vectorize2int(bias->dims()); bias_tz = paddle::framework::vectorize2int(bias->dims());
auto bias_md = platform::MKLDNNMemDesc( auto bias_md = platform::MKLDNNMemDesc(
bias_tz, platform::MKLDNNGetDataType<T>(), memory::format::x); bias_tz, platform::MKLDNNGetDataType<T>(), memory::format::x);
conv_pd = ConvFwdPrimitiveDesc( conv_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine,
fuse_relu, fuse_residual_conn, fwd_prop_kind); fuse_relu, fuse_residual_conn, fwd_prop_kind);
} else { } else {
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, conv_pd = handler.AcquireConvolutionPrimitiveDescriptor(
paddings, mkldnn_engine, fuse_relu, src_md, weights_md, boost::none, dst_md, strides, paddings,
fuse_residual_conn, fwd_prop_kind); mkldnn_engine, fuse_relu, fuse_residual_conn, fwd_prop_kind);
} }
// Save conv_pd/src_memory/weights_memory for backward pass
if (!is_test) dev_ctx.SetBlob(key_conv_pd, conv_pd);
platform::ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key);
// create mkldnn memory from input tensors (data/weights) // create mkldnn memory from input tensors (data/weights)
auto user_src_memory_p = auto user_src_memory_p =
...@@ -633,31 +630,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -633,31 +630,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
private: private:
mkldnn::primitive_attr CreatePostOps(bool fuse_relu,
bool fuse_residual_conn) const {
mkldnn::primitive_attr conv_attr;
mkldnn::post_ops post_operations;
// Fusion with Elementwise layer relies on adding a sum post-operation with
// the scale parameter. It is assumed that when fuse_residual_connection is
// true, the output tensor contains the data coming from residual
// connection. The result of this post_op is:
// Output = scale * Output + Conv_Out.
if (fuse_residual_conn) {
post_operations.append_sum(1.0f);
}
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation.
if (fuse_relu) {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
negative_slope, placeholder);
}
conv_attr.set_post_ops(post_operations);
return conv_attr;
}
mkldnn::primitive_attr CreatePostOps( mkldnn::primitive_attr CreatePostOps(
bool fuse_relu, bool fuse_residual_conn, bool fuse_relu, bool fuse_residual_conn,
const std::vector<float> output_shift_scale, float sum_scale) const { const std::vector<float> output_shift_scale, float sum_scale) const {
...@@ -679,30 +651,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -679,30 +651,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
return conv_attr; return conv_attr;
} }
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
const memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::engine& engine, const bool fuse_relu,
const bool fuse_residual_conn,
mkldnn::prop_kind fwd_prop_kind) const {
memory::dims stride_dims = strides;
memory::dims padding_dims = paddings;
auto conv_desc = mkldnn::convolution_forward::desc(
fwd_prop_kind, mkldnn::convolution_direct, src, weights, dst,
stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero);
mkldnn::primitive_attr conv_attr =
CreatePostOps(fuse_relu, fuse_residual_conn);
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
conv_desc, conv_attr, engine);
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
p_conv_pd);
}
std::unique_ptr<mkldnn::convolution_forward::primitive_desc> std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
const memory::desc& dst, const std::vector<int>& strides, const memory::desc& dst, const std::vector<int>& strides,
...@@ -731,31 +679,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -731,31 +679,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
p_conv_pd); p_conv_pd);
} }
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
const memory::desc& bias, const memory::desc& dst,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::engine& engine, const bool fuse_relu,
const bool fuse_residual_conn,
mkldnn::prop_kind fwd_prop_kind) const {
memory::dims stride_dims = strides;
memory::dims padding_dims = paddings;
auto conv_desc = mkldnn::convolution_forward::desc(
fwd_prop_kind, mkldnn::convolution_direct, src, weights, bias, dst,
stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero);
mkldnn::primitive_attr conv_attr =
CreatePostOps(fuse_relu, fuse_residual_conn);
auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc(
conv_desc, conv_attr, engine);
return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
p_conv_pd);
}
std::unique_ptr<mkldnn::convolution_forward::primitive_desc> std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
const memory::desc& bias, const memory::desc& dst, const memory::desc& bias, const memory::desc& dst,
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "boost/optional.hpp"
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
...@@ -124,7 +125,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -124,7 +125,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const std::string key = platform::ConvTransposeMKLDNNHandler::GetHash( const std::string key = platform::ConvTransposeMKLDNNHandler::GetHash(
src_tz, weights_tz, strides, paddings, dilations, groups, src_tz, weights_tz, strides, paddings, dilations, groups,
ctx.op().Output("Output")); ctx.op().Output("Output"));
const std::string key_conv_transpose_pd = key + "@conv_transpose_pd";
std::vector<mkldnn::primitive> pipeline; std::vector<mkldnn::primitive> pipeline;
...@@ -153,6 +153,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -153,6 +153,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto dst_md = platform::MKLDNNMemDesc( auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format); dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
platform::ConvTransposeMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
// create a deconv(conv transpose) primitive descriptor and save it for // create a deconv(conv transpose) primitive descriptor and save it for
// usage in backward // usage in backward
std::shared_ptr<mkldnn::deconvolution_forward::primitive_desc> std::shared_ptr<mkldnn::deconvolution_forward::primitive_desc>
...@@ -163,19 +164,14 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -163,19 +164,14 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias_tz = paddle::framework::vectorize2int(bias->dims()); bias_tz = paddle::framework::vectorize2int(bias->dims());
auto bias_md = platform::MKLDNNMemDesc( auto bias_md = platform::MKLDNNMemDesc(
bias_tz, platform::MKLDNNGetDataType<T>(), mkldnn::memory::format::x); bias_tz, platform::MKLDNNGetDataType<T>(), mkldnn::memory::format::x);
conv_transpose_pd = ConvTransposeFwdPrimitiveDesc( conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine,
fuse_relu, fwd_prop_kind); fuse_relu, false, fwd_prop_kind);
} else { } else {
conv_transpose_pd = ConvTransposeFwdPrimitiveDesc( conv_transpose_pd = handler.AcquireConvolutionPrimitiveDescriptor(
src_md, weights_md, dst_md, strides, paddings, mkldnn_engine, src_md, weights_md, boost::none, dst_md, strides, paddings,
fuse_relu, fwd_prop_kind); mkldnn_engine, fuse_relu, false, fwd_prop_kind);
} }
// Save conv_pd/src_memory/weights_memory for backward pass
if (!is_test) dev_ctx.SetBlob(key_conv_transpose_pd, conv_transpose_pd);
platform::ConvTransposeMKLDNNHandler handler(conv_transpose_pd, dev_ctx,
mkldnn_engine, key);
// create mkldnn memory from input tensors (data/weights) // create mkldnn memory from input tensors (data/weights)
auto user_src_memory_p = handler.AcquireSrcMemory( auto user_src_memory_p = handler.AcquireSrcMemory(
...@@ -224,70 +220,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -224,70 +220,6 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); output->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
} }
private:
mkldnn::primitive_attr CreatePostOps(bool fuse_relu) const {
mkldnn::primitive_attr conv_attr;
mkldnn::post_ops post_operations;
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation.
if (fuse_relu) {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
negative_slope, placeholder);
}
conv_attr.set_post_ops(post_operations);
return conv_attr;
}
std::unique_ptr<mkldnn::deconvolution_forward::primitive_desc>
ConvTransposeFwdPrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings, const mkldnn::engine& engine,
const bool fuse_relu, mkldnn::prop_kind fwd_prop_kind) const {
mkldnn::memory::dims stride_dims = {strides[0], strides[1]};
mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]};
auto deconv_desc = mkldnn::deconvolution_forward::desc(
fwd_prop_kind, mkldnn::deconvolution_direct, src, weights, dst,
stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero);
mkldnn::primitive_attr deconv_attr = CreatePostOps(fuse_relu);
auto p_conv_transpose_pd =
new mkldnn::deconvolution_forward::primitive_desc(deconv_desc,
deconv_attr, engine);
return std::unique_ptr<mkldnn::deconvolution_forward::primitive_desc>(
p_conv_transpose_pd);
}
std::unique_ptr<mkldnn::deconvolution_forward::primitive_desc>
ConvTransposeFwdPrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& bias, const mkldnn::memory::desc& dst,
const std::vector<int>& strides, const std::vector<int>& paddings,
const mkldnn::engine& engine, const bool fuse_relu,
mkldnn::prop_kind fwd_prop_kind) const {
mkldnn::memory::dims stride_dims = {strides[0], strides[1]};
mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]};
auto deconv_desc = mkldnn::deconvolution_forward::desc(
fwd_prop_kind, mkldnn::deconvolution_direct, src, weights, bias, dst,
stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero);
mkldnn::primitive_attr deconv_attr = CreatePostOps(fuse_relu);
auto p_conv_transpose_pd =
new mkldnn::deconvolution_forward::primitive_desc(deconv_desc,
deconv_attr, engine);
return std::unique_ptr<mkldnn::deconvolution_forward::primitive_desc>(
p_conv_transpose_pd);
}
}; };
} // namespace operators } // namespace operators
......
...@@ -34,12 +34,9 @@ using platform::to_void_cast; ...@@ -34,12 +34,9 @@ using platform::to_void_cast;
class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
public: public:
SoftmaxMKLDNNHandler( SoftmaxMKLDNNHandler(const platform::MKLDNNDeviceContext& dev_ctx,
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd, mkldnn::engine engine, const std::string& base_key)
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, : platform::MKLDNNHandler(dev_ctx, engine, base_key) {}
const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key),
softmax_pd_(softmax_pd) {}
SoftmaxMKLDNNHandler( SoftmaxMKLDNNHandler(
std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd, std::shared_ptr<mkldnn::softmax_forward::primitive_desc> softmax_pd,
...@@ -54,6 +51,26 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { ...@@ -54,6 +51,26 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
key_ += "-BWD"; key_ += "-BWD";
} }
std::shared_ptr<softmax_forward::primitive_desc>
AcquireSoftmaxPrimitiveDescriptor(const softmax_forward::desc& softmax_desc,
const mkldnn::engine& engine) {
const std::string key_softmax_pd = key_ + "@softmax_pd";
auto softmax_pd = std::static_pointer_cast<softmax_forward::primitive_desc>(
dev_ctx_.GetBlob(key_softmax_pd));
if (softmax_pd == nullptr) {
softmax_pd_.reset(
new softmax_forward::primitive_desc(softmax_desc, engine));
dev_ctx_.SetBlob(key_softmax_pd, softmax_pd_);
} else {
softmax_pd_ = softmax_pd;
is_reusing_ = true;
}
return softmax_pd_;
}
std::shared_ptr<mkldnn::softmax_forward> AcquireSoftmax( std::shared_ptr<mkldnn::softmax_forward> AcquireSoftmax(
std::shared_ptr<mkldnn::memory> dst_memory_p, std::shared_ptr<mkldnn::memory> dst_memory_p,
std::shared_ptr<mkldnn::memory> src_memory_p) { std::shared_ptr<mkldnn::memory> src_memory_p) {
...@@ -138,19 +155,18 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -138,19 +155,18 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
// Generate keys for storing/retriving primitives for this operator // Generate keys for storing/retriving primitives for this operator
const std::string key = const std::string key =
platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out")); platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out"));
const std::string key_softmax_pd = key + "@softmax_pd";
SoftmaxMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
// Currently only NC data format is supported // Currently only NC data format is supported
auto softmax_md = MKLDNNMemDesc( auto softmax_md = MKLDNNMemDesc(
{softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc); {softmax_tz}, platform::MKLDNNGetDataType<T>(), memory::format::nc);
// Normalization is made after innermost dimension eg. C out of NC // Normalization is made after innermost dimension eg. C out of NC
auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring,
softmax_md, 1 /*dim: C*/); softmax_md, 1 /*dim: C*/);
auto softmax_pd = std::make_shared<mkldnn::softmax_forward::primitive_desc>(
softmax_desc, mkldnn_engine);
dev_ctx.SetBlob(key_softmax_pd, softmax_pd);
SoftmaxMKLDNNHandler handler(softmax_pd, dev_ctx, mkldnn_engine, key); auto softmax_pd =
handler.AcquireSoftmaxPrimitiveDescriptor(softmax_desc, mkldnn_engine);
auto softmax_src_memory_p = auto softmax_src_memory_p =
handler.AcquireSrcMemory(softmax_md, to_void_cast<T>(input_data)); handler.AcquireSrcMemory(softmax_md, to_void_cast<T>(input_data));
auto softmax_dst_memory_p = auto softmax_dst_memory_p =
......
...@@ -483,8 +483,10 @@ class Pad2dOp : public framework::OperatorWithKernel { ...@@ -483,8 +483,10 @@ class Pad2dOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddings_dim.size(), 1, paddings_dim.size(), 1,
"Size of Input(Paddings)'s dimension should be equal to 1."); "Size of Input(Paddings)'s dimension should be equal to 1.");
PADDLE_ENFORCE_EQ(paddings_dim[0], 4, if (ctx->IsRuntime()) {
"Shape of Input(Paddings) should be equal to [4]."); PADDLE_ENFORCE_EQ(paddings_dim[0], 4,
"Shape of Input(Paddings) should be equal to [4].");
}
out_dims[1] = x_dim[1]; out_dims[1] = x_dim[1];
out_dims[2] = x_dim[2]; out_dims[2] = x_dim[2];
out_dims[3] = x_dim[3]; out_dims[3] = x_dim[3];
...@@ -504,11 +506,7 @@ class Pad2dOp : public framework::OperatorWithKernel { ...@@ -504,11 +506,7 @@ class Pad2dOp : public framework::OperatorWithKernel {
} }
ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
if (out_dims[0] == x_dim[0]) { ctx->ShareLoD("X", /*->*/ "Out");
// Only pass LoD when the first dimension is equal between
// output and input.
ctx->ShareLoD("X", /*->*/ "Out");
}
} }
protected: protected:
......
...@@ -37,9 +37,11 @@ class ROIAlignOp : public framework::OperatorWithKernel { ...@@ -37,9 +37,11 @@ class ROIAlignOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(rois_dims.size() == 2, PADDLE_ENFORCE(rois_dims.size() == 2,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ...]."); "given as [[x1, y1, x2, y2], ...].");
PADDLE_ENFORCE(rois_dims[1] == 4, if (ctx->IsRuntime()) {
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)" PADDLE_ENFORCE(rois_dims[1] == 4,
"given as [[x1, y1, x2, y2], ...]."); "ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], ...].");
}
int pooled_height = ctx->Attrs().Get<int>("pooled_height"); int pooled_height = ctx->Attrs().Get<int>("pooled_height");
int pooled_width = ctx->Attrs().Get<int>("pooled_width"); int pooled_width = ctx->Attrs().Get<int>("pooled_width");
float spatial_scale = ctx->Attrs().Get<float>("spatial_scale"); float spatial_scale = ctx->Attrs().Get<float>("spatial_scale");
......
...@@ -45,9 +45,12 @@ class RowConvOp : public framework::OperatorWithKernel { ...@@ -45,9 +45,12 @@ class RowConvOp : public framework::OperatorWithKernel {
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 2, "Input(Y)'s rank should be 2."); PADDLE_ENFORCE_EQ(filter_dims.size(), 2, "Input(Y)'s rank should be 2.");
PADDLE_ENFORCE_EQ( if (ctx->IsRuntime() || (x_dims[1] > 0 && filter_dims[1] > 0)) {
x_dims[1], filter_dims[1], PADDLE_ENFORCE_EQ(
"The 2nd dimension of Input(X) and Input(Filter) should be same."); x_dims[1], filter_dims[1],
"The 2nd dimension of Input(X) and Input(Filter) should be same.");
}
ctx->SetOutputDim("Out", x_dims); ctx->SetOutputDim("Out", x_dims);
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
} }
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/sample_logits_op.h" #include "paddle/fluid/operators/sample_logits_op.h"
#include <memory>
#include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/sample_prob.h"
namespace paddle { namespace paddle {
...@@ -59,6 +60,10 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,6 +60,10 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor, default: Tensor<float>), A 2-D tensor with shape [N, NT + S]." "(Tensor, default: Tensor<float>), A 2-D tensor with shape [N, NT + S]."
"The probabilites of sampled positive and negtive labels.") "The probabilites of sampled positive and negtive labels.")
.AsIntermediate(); .AsIntermediate();
AddOutput("LogitsDim", "Store dim information of Logits for gradient op")
.AsIntermediate();
AddOutput("LabelsDim", "Store dim information of Logits for gradient op")
.AsIntermediate();
AddOutput("SampledLogits", AddOutput("SampledLogits",
"(Tensor, default: Tensor<float>), A 2-D tensor with shape" "(Tensor, default: Tensor<float>), A 2-D tensor with shape"
"[N, NT + S]. The outputs value of sampled logits, which will be" "[N, NT + S]. The outputs value of sampled logits, which will be"
...@@ -120,6 +125,10 @@ class SampleLogitsOp : public framework::OperatorWithKernel { ...@@ -120,6 +125,10 @@ class SampleLogitsOp : public framework::OperatorWithKernel {
"Output(SampledLogits) should be not null."); "Output(SampledLogits) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("SampledLabels"), PADDLE_ENFORCE(ctx->HasOutput("SampledLabels"),
"Output(SampledLabels) should be not null."); "Output(SampledLabels) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("LogitsDim"),
"Output(LogitsDim) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("LabelsDim"),
"Output(LabelsDim) should be not null.");
auto logits_dims = ctx->GetInputDim("Logits"); auto logits_dims = ctx->GetInputDim("Logits");
auto labels_dims = ctx->GetInputDim("Labels"); auto labels_dims = ctx->GetInputDim("Labels");
...@@ -139,6 +148,15 @@ class SampleLogitsOp : public framework::OperatorWithKernel { ...@@ -139,6 +148,15 @@ class SampleLogitsOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Probabilities", {logits_dims[0], num_sampled_classes}); ctx->SetOutputDim("Probabilities", {logits_dims[0], num_sampled_classes});
ctx->SetOutputDim("SampledLogits", {logits_dims[0], num_sampled_classes}); ctx->SetOutputDim("SampledLogits", {logits_dims[0], num_sampled_classes});
ctx->SetOutputDim("SampledLabels", {logits_dims[0], labels_dims[1]}); ctx->SetOutputDim("SampledLabels", {logits_dims[0], labels_dims[1]});
// append 0 to shape variable to avoid optimized by memory optimize pass
auto logits_dim_vec = framework::vectorize(logits_dims);
logits_dim_vec.push_back(0);
ctx->SetOutputDim("LogitsDim", framework::make_ddim(logits_dim_vec));
auto labels_dim_vec = framework::vectorize(labels_dims);
labels_dim_vec.push_back(0);
ctx->SetOutputDim("LabelsDim", framework::make_ddim(labels_dim_vec));
} }
protected: protected:
...@@ -157,28 +175,27 @@ class SampleLogitsOpGrad : public framework::OperatorWithKernel { ...@@ -157,28 +175,27 @@ class SampleLogitsOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Logits"), PADDLE_ENFORCE(ctx->HasInput("LogitsDim"),
"Input(Logits) should not be null."); "Input(LogitsDim) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"), PADDLE_ENFORCE(ctx->HasInput("LabelsDim"),
"Input(Labels) should be not null."); "Input(LabelsDim) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Samples"), PADDLE_ENFORCE(ctx->HasInput("Samples"),
"Input(Samples) should be not null."); "Input(Samples) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("SampledLogits"),
"Input(SampledLogits) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("SampledLogits")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("SampledLogits")),
"Input(SampledLogits@Grad) should not be null."); "Input(SampledLogits@Grad) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")), PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")),
"Output(Logits@Grad) should be not null."); "Output(Logits@Grad) should be not null.");
auto logit_dims = ctx->GetInputDim("Logits"); auto logits_dims = ctx->GetInputDim("LogitsDim");
auto label_dims = ctx->GetInputDim("Labels"); logits_dims = framework::DDim(logits_dims.Get(), logits_dims.size() - 1);
PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, auto labels_dims = ctx->GetInputDim("LabelsDim");
labels_dims = framework::DDim(labels_dims.Get(), labels_dims.size() - 1);
PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL,
"The label should be a 2-D tensor."); "The label should be a 2-D tensor.");
PADDLE_ENFORCE_EQ(logit_dims.size(), 2UL, PADDLE_ENFORCE_EQ(logits_dims.size(), 2UL,
"The logits should be a 2-D tensor."); "The logits should be a 2-D tensor.");
ctx->SetOutputDim(framework::GradVarName("Logits"), ctx->SetOutputDim(framework::GradVarName("Logits"), logits_dims);
ctx->GetInputDim("Logits"));
} }
protected: protected:
...@@ -201,10 +218,9 @@ class SampleLogitsGradMaker : public framework::SingleGradOpDescMaker { ...@@ -201,10 +218,9 @@ class SampleLogitsGradMaker : public framework::SingleGradOpDescMaker {
std::unique_ptr<framework::OpDesc> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDesc(); auto* grad_op = new framework::OpDesc();
grad_op->SetType("sample_logits_grad"); grad_op->SetType("sample_logits_grad");
grad_op->SetInput("Logits", Input("Logits")); grad_op->SetInput("LogitsDim", Output("LogitsDim"));
grad_op->SetInput("Labels", Input("Labels")); grad_op->SetInput("LabelsDim", Output("LabelsDim"));
grad_op->SetInput("Samples", Output("Samples")); grad_op->SetInput("Samples", Output("Samples"));
grad_op->SetInput("SampledLogits", Output("SampledLogits"));
grad_op->SetInput(framework::GradVarName("SampledLogits"), grad_op->SetInput(framework::GradVarName("SampledLogits"),
OutputGrad("SampledLogits")); OutputGrad("SampledLogits"));
grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
......
...@@ -42,10 +42,6 @@ class ScatterOp : public framework::OperatorWithKernel { ...@@ -42,10 +42,6 @@ class ScatterOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Updates")[0], PADDLE_ENFORCE_EQ(ctx->GetInputDim("Updates")[0],
ctx->GetInputDim("Ids")[0], ctx->GetInputDim("Ids")[0],
"Updates and Ids should have same batch-size."); "Updates and Ids should have same batch-size.");
framework::DDim data_dim(updates_dims);
for (int i = 1; i < data_dim.size(); ++i) {
PADDLE_ENFORCE_EQ(data_dim[i], updates_dims[i]);
}
ctx->SetOutputDim("Out", ref_dims); ctx->SetOutputDim("Out", ref_dims);
} }
......
...@@ -34,15 +34,22 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { ...@@ -34,15 +34,22 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto labels_dims = ctx->GetInputDim("Label"); auto labels_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2.");
PADDLE_ENFORCE_EQ(labels_dims.size(), 2, int rank = x_dims.size();
"Input(Label)'s rank should be 2."); PADDLE_ENFORCE_EQ(rank, labels_dims.size(),
PADDLE_ENFORCE_EQ(x_dims[0], labels_dims[0], "Input(X) and Input(Label) shall have the same rank.");
"The 1st dimension of Input(X) and Input(Label) should " bool check = true;
"be equal."); if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
PADDLE_ENFORCE_EQ(x_dims[1], labels_dims[1], framework::product(labels_dims) <= 0)) {
"The 2nd dimension of Input(X) and Input(Label) should " check = false;
"be equal."); }
if (check) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank),
framework::slice_ddim(labels_dims, 0, rank),
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
}
ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
...@@ -65,23 +72,24 @@ class SigmoidCrossEntropyWithLogitsGradOp ...@@ -65,23 +72,24 @@ class SigmoidCrossEntropyWithLogitsGradOp
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto labels_dims = ctx->GetInputDim("Label"); auto labels_dims = ctx->GetInputDim("Label");
auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2.");
PADDLE_ENFORCE_EQ(labels_dims.size(), 2, int rank = x_dims.size();
"Input(Label)'s rank should be 2."); bool check = true;
PADDLE_ENFORCE_EQ(dout_dims.size(), 2, if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
"Input(Out@Grad)'s rank should be 2."); framework::product(labels_dims) <= 0)) {
PADDLE_ENFORCE_EQ(x_dims[0], labels_dims[0], check = false;
"The 1st dimension of Input(X) and Input(Label) should " }
"be equal.");
PADDLE_ENFORCE_EQ(x_dims[1], labels_dims[1], if (check) {
"The 2nd dimension of Input(X) and Input(Label) should " PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank),
"be equal."); framework::slice_ddim(labels_dims, 0, rank),
PADDLE_ENFORCE_EQ(x_dims[0], dout_dims[0], "Input(X) and Input(Label) shall have the same shape.");
"The 1st dimension of Input(X) and Input(Out@Grad) "
"should be equal."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(x_dims[1], dout_dims[1], framework::slice_ddim(x_dims, 0, rank),
"The 2nd dimension of Input(X) and Input(Out@Grad) " framework::slice_ddim(dout_dims, 0, rank),
"should be equal."); "Input(X) and Input(Out@Grad) shall have the same shape.");
}
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
} }
......
...@@ -56,13 +56,19 @@ class SpectralNormOp : public framework::OperatorWithKernel { ...@@ -56,13 +56,19 @@ class SpectralNormOp : public framework::OperatorWithKernel {
} }
auto dim_u = ctx->GetInputDim("U"); auto dim_u = ctx->GetInputDim("U");
auto dim_v = ctx->GetInputDim("V"); auto dim_v = ctx->GetInputDim("V");
PADDLE_ENFORCE_EQ(dim_u[0], h,
"Input(U) dims[0] should be equal to " if (ctx->IsRuntime() || (dim_u[0] > 0 && h > 0)) {
"Input(Weight) dims[Attr(dim)]"); PADDLE_ENFORCE_EQ(dim_u[0], h,
PADDLE_ENFORCE_EQ( "Input(U) dims[0] should be equal to "
dim_v[0], w, "Input(Weight) dims[Attr(dim)]");
"Input(V) dims[0] should be equal to " }
"the product of Input(Weight) dims except dims[Attr(dim)]");
if (ctx->IsRuntime() || (dim_v[0] > 0 && w > 0)) {
PADDLE_ENFORCE_EQ(
dim_v[0], w,
"Input(V) dims[0] should be equal to "
"the product of Input(Weight) dims except dims[Attr(dim)]");
}
ctx->SetOutputDim("Out", dim_weight); ctx->SetOutputDim("Out", dim_weight);
ctx->ShareLoD("Weight", /*->*/ "Out"); ctx->ShareLoD("Weight", /*->*/ "Out");
......
...@@ -65,7 +65,21 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -65,7 +65,21 @@ class SumOp : public framework::OperatorWithKernel {
if (framework::product(in_dim) == 0) { if (framework::product(in_dim) == 0) {
in_dim = x_dim; in_dim = x_dim;
} else { } else {
PADDLE_ENFORCE_EQ(in_dim, x_dim, "Input tensors must have same shape"); if (ctx->IsRuntime()) {
PADDLE_ENFORCE_EQ(in_dim, x_dim,
"Input tensors must have same shape");
} else {
PADDLE_ENFORCE_EQ(in_dim.size(), x_dim.size(),
"Input tensors must have same shape size");
// if in_dim or x_dim has -1, not check equal
for (int i = 0; i < x_dim.size(); ++i) {
if (x_dim[i] == -1 || in_dim[i] == -1) {
continue;
}
PADDLE_ENFORCE_EQ(in_dim[i], x_dim[i],
"Input tensors must have same shape if not -1");
}
}
} }
} }
ctx->SetOutputDim("Out", in_dim); ctx->SetOutputDim("Out", in_dim);
......
...@@ -99,10 +99,15 @@ class UnpoolOp : public framework::OperatorWithKernel { ...@@ -99,10 +99,15 @@ class UnpoolOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(in_x_dims.size() == 4, PADDLE_ENFORCE(in_x_dims.size() == 4,
"Unpooling intput must be of 4-dimensional."); "Unpooling intput must be of 4-dimensional.");
PADDLE_ENFORCE_EQ(in_x_dims, in_y_dims); PADDLE_ENFORCE_EQ(in_x_dims, in_y_dims);
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]}); std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
output_shape.push_back(UnpoolOutputSize(in_x_dims[i + 2], ksize[i], if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) {
paddings[i], strides[i])); output_shape.push_back(-1);
} else {
output_shape.push_back(UnpoolOutputSize(in_x_dims[i + 2], ksize[i],
paddings[i], strides[i]));
}
} }
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "boost/optional.hpp"
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
...@@ -395,9 +396,28 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { ...@@ -395,9 +396,28 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
std::vector<int> logical_axis_; std::vector<int> logical_axis_;
}; };
template <typename T>
struct convolutional_algorithm;
template <>
struct convolutional_algorithm<mkldnn::convolution_forward> {
static constexpr mkldnn::algorithm T = mkldnn::algorithm::convolution_direct;
};
template <>
struct convolutional_algorithm<mkldnn::deconvolution_forward> {
static constexpr mkldnn::algorithm T =
mkldnn::algorithm::deconvolution_direct;
};
template <class forward_t, class backward_data_t, class backward_weights_t> template <class forward_t, class backward_data_t, class backward_weights_t>
class ConvMKLDNNTemplateHandler : public MKLDNNHandler { class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
public: public:
ConvMKLDNNTemplateHandler(const platform::MKLDNNDeviceContext& dev_ctx,
mkldnn::engine engine, const std::string& base_key)
: platform::MKLDNNHandler(dev_ctx, engine, base_key) {}
// TODO(jczaja): remove after conv int8 is adapted
ConvMKLDNNTemplateHandler( ConvMKLDNNTemplateHandler(
std::shared_ptr<typename forward_t::primitive_desc> conv_pd, std::shared_ptr<typename forward_t::primitive_desc> conv_pd,
const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
...@@ -542,6 +562,73 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { ...@@ -542,6 +562,73 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
scale_data, mask); scale_data, mask);
} }
mkldnn::primitive_attr CreatePostOps(bool fuse_relu,
bool fuse_residual_conn = false) const {
mkldnn::primitive_attr conv_attr;
mkldnn::post_ops post_operations;
// Fusion with Elementwise layer relies on adding a sum post-operation with
// the scale parameter. It is assumed that when fuse_residual_connection is
// true, the output tensor contains the data coming from residual
// connection. The result of this post_op is:
// Output = scale * Output + Conv_Out.
if (fuse_residual_conn) {
post_operations.append_sum(1.0f);
}
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// PostOps object and configure it to execute an eltwise relu operation.
if (fuse_relu) {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 0.0f;
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
negative_slope, placeholder);
}
conv_attr.set_post_ops(post_operations);
return conv_attr;
}
std::shared_ptr<typename forward_t::primitive_desc>
AcquireConvolutionPrimitiveDescriptor(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& weights,
boost::optional<const mkldnn::memory::desc&> bias,
const mkldnn::memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings, const mkldnn::engine& engine,
const bool fuse_relu, const bool fuse_residual_conn,
mkldnn::prop_kind fwd_prop_kind) {
const std::string key_conv_pd = key_ + "@conv_pd";
auto conv_pd = std::static_pointer_cast<typename forward_t::primitive_desc>(
dev_ctx_.GetBlob(key_conv_pd));
if (conv_pd == nullptr) {
mkldnn::memory::dims stride_dims = strides;
mkldnn::memory::dims padding_dims = paddings;
auto conv_desc =
bias ? typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T, src,
weights, *bias, dst, stride_dims, padding_dims,
padding_dims, mkldnn::padding_kind::zero)
: typename forward_t::desc(
fwd_prop_kind, convolutional_algorithm<forward_t>::T, src,
weights, dst, stride_dims, padding_dims, padding_dims,
mkldnn::padding_kind::zero);
mkldnn::primitive_attr conv_attr =
CreatePostOps(fuse_relu, fuse_residual_conn);
conv_pd_.reset(
new typename forward_t::primitive_desc(conv_desc, conv_attr, engine));
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx_.SetBlob(key_conv_pd, conv_pd_);
} else {
conv_pd_ = conv_pd;
is_reusing_ = true;
}
return conv_pd_;
}
std::shared_ptr<forward_t> AcquireConvolution( std::shared_ptr<forward_t> AcquireConvolution(
std::shared_ptr<mkldnn::memory> src_memory_p, std::shared_ptr<mkldnn::memory> src_memory_p,
std::shared_ptr<mkldnn::memory> weights_memory_p, std::shared_ptr<mkldnn::memory> weights_memory_p,
......
...@@ -446,7 +446,8 @@ function assert_api_spec_approvals() { ...@@ -446,7 +446,8 @@ function assert_api_spec_approvals() {
BRANCH="develop" BRANCH="develop"
fi fi
API_FILES=("paddle/fluid/API.spec" API_FILES=("CMakeLists.txt"
"paddle/fluid/API.spec"
"paddle/fluid/op_use_default_grad_op_maker.spec" "paddle/fluid/op_use_default_grad_op_maker.spec"
"python/paddle/fluid/parallel_executor.py" "python/paddle/fluid/parallel_executor.py"
"paddle/fluid/framework/operator.h" "paddle/fluid/framework/operator.h"
...@@ -469,24 +470,29 @@ function assert_api_spec_approvals() { ...@@ -469,24 +470,29 @@ function assert_api_spec_approvals() {
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
# approval_user_list: velconia 1979255,panyx0718 2887803,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308. # approval_user_list: velconia 1979255,XiaoguangHu01 46782768,chengduoZH 30176695,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,tensor-tang 21351065,jacquesqiao 3048612,typhoonzero 13348433,shanyi15 35982308.
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308 46782768 30176695` python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 35982308 46782768 30176695`
if [ "${APPROVALS}" == "TRUE" ];then if [ "${APPROVALS}" == "TRUE" ];then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308`
fi fi
elif [ "$API_FILE" == "CMakeLists.txt" ];then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 6836917 46782768 30176695`
else else
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641`
fi fi
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
echo "You must have one RD (panyx0718 or chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}" echo "You must have one RD (chengduoZH or XiaoguangHu01) and one PM (shanyi15) approval for the api change! ${API_FILE}"
elif [ "$API_FILE" == "CMakeLists.txt" ];then
echo "You must have one RD (luotao1 or chengduoZH or XiaoguangHu01) approval for the cmakelist change! ${API_FILE}"
else else
echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" echo "You must have one RD (velconia,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}"
fi fi
exit 1 exit 1
fi fi
...@@ -496,10 +502,10 @@ function assert_api_spec_approvals() { ...@@ -496,10 +502,10 @@ function assert_api_spec_approvals() {
HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true` HAS_CONST_CAST=`git diff -U0 upstream/$BRANCH |grep -o -m 1 "const_cast" || true`
if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${HAS_CONST_CAST} ] && [ "${GIT_PR_ID}" != "" ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641` python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 1979255 21351065 3048612 13348433 46782768 30176695 12538138 6836917 32832641`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have one RD (velconia,panyx0718,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}" echo "You must have one RD (velconia,XiaoguangHu01,chengduoZH,Xreki,luotao1,sneaxiy,tensor-tang,jacquesqiao,typhoonzero) approval for the api change! ${API_FILE}"
exit 1 exit 1
fi fi
fi fi
......
...@@ -48,6 +48,12 @@ class Layer(core.Layer): ...@@ -48,6 +48,12 @@ class Layer(core.Layer):
self._helper = LayerObjectHelper(self._full_name) self._helper = LayerObjectHelper(self._full_name)
def train(self):
framework._dygraph_tracer()._train_mode()
def eval(self):
framework._dygraph_tracer()._eval_mode()
def full_name(self): def full_name(self):
"""Full name for this layers. """Full name for this layers.
...@@ -254,6 +260,12 @@ class PyLayer(core.PyLayer): ...@@ -254,6 +260,12 @@ class PyLayer(core.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer, self).__init__() super(PyLayer, self).__init__()
def train(self):
framework._dygraph_tracer()._train_mode()
def eval(self):
framework._dygraph_tracer()._eval_mode()
@classmethod @classmethod
def _do_forward(cls, inputs): def _do_forward(cls, inputs):
return cls._to_tuple(cls.forward(inputs)) return cls._to_tuple(cls.forward(inputs))
......
...@@ -24,7 +24,9 @@ __all__ = ['Tracer'] ...@@ -24,7 +24,9 @@ __all__ = ['Tracer']
def release_op(op): def release_op(op):
del framework._dygraph_tracer()._ops[op._trace_id] del framework._dygraph_tracer()._ops[op._trace_id].inputs
del framework._dygraph_tracer()._ops[op._trace_id].outputs
del framework._dygraph_tracer()._ops[op._trace_id].backward_refs
class Tracer(core.Tracer): class Tracer(core.Tracer):
...@@ -38,6 +40,7 @@ class Tracer(core.Tracer): ...@@ -38,6 +40,7 @@ class Tracer(core.Tracer):
self._ops = defaultdict() self._ops = defaultdict()
self._vars = defaultdict() self._vars = defaultdict()
self._trace_id = 0 self._trace_id = 0
self._train_mode = True
def trace_var(self, name, var): def trace_var(self, name, var):
self._vars[name] = var self._vars[name] = var
...@@ -46,15 +49,57 @@ class Tracer(core.Tracer): ...@@ -46,15 +49,57 @@ class Tracer(core.Tracer):
return list((item for name, item in six.iteritems(self._vars) return list((item for name, item in six.iteritems(self._vars)
if isinstance(item, framework.Parameter))) if isinstance(item, framework.Parameter)))
def trace_op(self, op, stop_gradient=False): def trace_op(self, op, inputs, outputs, stop_gradient=False):
# TODO(minqiyang): remove this line after we take apart all
# backward grads and forward variables
if self._train_mode:
op.inputs = inputs
inps = defaultdict(list)
for k, vars in six.iteritems(inputs):
if isinstance(vars, framework.Variable):
inps[k].append(vars._ivar)
elif isinstance(vars, list) or isinstance(vars, tuple):
for var in vars:
inps[k].append(var._ivar)
op.outputs = outputs
outs = defaultdict(list)
for k, vars in six.iteritems(outputs):
if isinstance(vars, framework.Variable):
outs[k].append(vars._ivar)
elif isinstance(vars, list) or isinstance(vars, tuple):
for var in vars:
outs[k].append(var._ivar)
else:
inps = defaultdict(list)
for k, vars in six.iteritems(inputs):
if isinstance(vars, framework.Variable):
op.previous_ops.append(vars.op)
inps[k].append(vars._ivar)
elif isinstance(vars, list) or isinstance(vars, tuple):
for var in vars:
op.previous_ops.append(var.op)
inps[k].append(var._ivar)
op.outputs = outputs
outs = defaultdict(list)
for k, vars in six.iteritems(outputs):
if isinstance(vars, framework.Variable):
vars.op = op
outs[k].append(vars._ivar)
elif isinstance(vars, list) or isinstance(vars, tuple):
for var in vars:
var.op = op
outs[k].append(var._ivar)
# record op's trace id # record op's trace id
op.iop._trace_id = self._trace_id op.iop._trace_id = self._trace_id
backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.attrs, backward_refs = self.trace(op.iop, inps, outs, op.attrs,
framework._current_expected_place(), framework._current_expected_place(),
stop_gradient) stop_gradient)
if not stop_gradient: if not stop_gradient and self._train_mode:
self._trace_id += 1 self._trace_id += 1
self._ops[op.iop._trace_id] = op self._ops[op.iop._trace_id] = op
...@@ -65,10 +110,16 @@ class Tracer(core.Tracer): ...@@ -65,10 +110,16 @@ class Tracer(core.Tracer):
# TODO(minqiyang): remove all inputs and outputs after separate # TODO(minqiyang): remove all inputs and outputs after separate
# var and grad # var and grad
op.backward_refs = defaultdict(list) op.backward_refs = defaultdict(list)
for k, v in six.iteritems(op.inputs): for k, v in six.iteritems(inputs):
if k in backward_refs: if k in backward_refs:
op.backward_refs[k] = op.inputs[k] op.backward_refs[k] = inputs[k]
for k, v in six.iteritems(op.outputs): for k, v in six.iteritems(outputs):
if k in backward_refs: if k in backward_refs:
op.backward_refs[k] = op.outputs[k] op.backward_refs[k] = outputs[k]
def _train_mode(self):
self._train_mode = True
def _eval_mode(self):
self._train_mode = False
...@@ -411,6 +411,7 @@ class Variable(object): ...@@ -411,6 +411,7 @@ class Variable(object):
if persistable else False) if persistable else False)
if persistable: if persistable:
_dygraph_tracer().trace_var(name, self) _dygraph_tracer().trace_var(name, self)
self.op = None
else: else:
self.error_clip = error_clip self.error_clip = error_clip
...@@ -939,24 +940,7 @@ class Operator(object): ...@@ -939,24 +940,7 @@ class Operator(object):
raise ValueError( raise ValueError(
"`type` to initialized an Operator can not be None.") "`type` to initialized an Operator can not be None.")
self.iop = core.OpBase(type) self.iop = core.OpBase(type)
self.previous_ops = []
# TODO(minqiyang): remove these lines after we take apart all
# backward grads and forward variables
self.inputs = defaultdict(list)
if inputs is not None:
for k, v in six.iteritems(inputs):
if isinstance(v, Variable):
self.inputs[k].append(v._ivar)
elif isinstance(v, list) or isinstance(v, tuple):
self.inputs[k].extend([var._ivar for var in v])
self.outputs = defaultdict(list)
if outputs is not None:
for k, v in six.iteritems(outputs):
if isinstance(v, Variable):
self.outputs[k].append(v._ivar)
elif isinstance(v, list) or isinstance(v, tuple):
self.outputs[k].extend([var._ivar for var in v])
self.attrs = attrs if attrs else {} self.attrs = attrs if attrs else {}
else: else:
...@@ -1647,15 +1631,18 @@ class Block(object): ...@@ -1647,15 +1631,18 @@ class Block(object):
block=self, block=self,
desc=None, desc=None,
type=kwargs.get("type", None), type=kwargs.get("type", None),
inputs=kwargs.get("inputs", None), inputs=None,
outputs=kwargs.get("outputs", None), outputs=None,
attrs=kwargs.get("attrs", None)) attrs=kwargs.get("attrs", {}))
# record ops in tracer rather than blocks # record ops in tracer rather than blocks
# #
# TODO(minqiyang): add op stop_gradient support in static mode too. # TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in dygraph mode. # currently, we only support stop_gradient in dygraph mode.
_dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False)) _dygraph_tracer().trace_op(op,
kwargs.get("inputs", {}),
kwargs.get("outputs", {}),
kwargs.get("stop_gradient", False))
else: else:
op_desc = self.desc.append_op() op_desc = self.desc.append_op()
op = Operator( op = Operator(
...@@ -1719,10 +1706,14 @@ class Block(object): ...@@ -1719,10 +1706,14 @@ class Block(object):
self, self,
None, None,
type=kwargs.get("type", None), type=kwargs.get("type", None),
inputs=kwargs.get("inputs", None), inputs=None,
outputs=kwargs.get("outputs", None), outputs=None,
attrs=kwargs.get("attrs", None)) attrs=kwargs.get("attrs", {}))
_dygraph_tracer().trace_op(op, kwargs.get("stop_gradient", False))
_dygraph_tracer().trace_op(op,
kwargs.get("inputs", {}),
kwargs.get("outputs", {}),
kwargs.get("stop_gradient", False))
else: else:
op_desc = self.desc._prepend_op() op_desc = self.desc._prepend_op()
op = Operator( op = Operator(
......
...@@ -29,7 +29,8 @@ from functools import reduce ...@@ -29,7 +29,8 @@ from functools import reduce
__all__ = [ __all__ = [
'While', 'Switch', 'increment', 'array_write', 'create_array', 'less_than', 'While', 'Switch', 'increment', 'array_write', 'create_array', 'less_than',
'equal', 'array_read', 'array_length', 'IfElse', 'DynamicRNN', 'StaticRNN', 'less_equal', 'greater_than', 'greater_equal', 'equal', 'not_equal',
'array_read', 'array_length', 'IfElse', 'DynamicRNN', 'StaticRNN',
'reorder_lod_tensor_by_rank', 'Print', 'is_empty' 'reorder_lod_tensor_by_rank', 'Print', 'is_empty'
] ]
...@@ -189,6 +190,7 @@ def Print(input, ...@@ -189,6 +190,7 @@ def Print(input,
'print_tensor_lod': print_tensor_lod, 'print_tensor_lod': print_tensor_lod,
'print_phase': print_phase.upper() 'print_phase': print_phase.upper()
}) })
return input
class BlockGuard(object): class BlockGuard(object):
...@@ -971,6 +973,114 @@ def less_than(x, y, force_cpu=None, cond=None): ...@@ -971,6 +973,114 @@ def less_than(x, y, force_cpu=None, cond=None):
return cond return cond
@templatedoc()
def less_equal(x, y, cond=None):
"""
This layer returns the truth value of :math:`x <= y` elementwise, which is equivalent to the overloaded operator `<=`.
Args:
x(Variable): First operand of *less_equal*
y(Variable): Second operand of *less_equal*
cond(Variable|None): Optional output variable to store the result of *less_equal*
Returns:
Variable: The tensor variable storing the output of *less_equal*.
Examples:
.. code-block:: python
out = fluid.layers.less_equal(x=label, y=limit)
"""
helper = LayerHelper("less_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
if force_init_on_cpu():
attrs['force_cpu'] = force_init_on_cpu()
helper.append_op(
type='less_equal',
inputs={'X': [x],
'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs)
return cond
@templatedoc()
def greater_than(x, y, cond=None):
"""
This layer returns the truth value of :math:`x > y` elementwise, which is equivalent to the overloaded operator `>`.
Args:
x(Variable): First operand of *greater_than*
y(Variable): Second operand of *greater_than*
cond(Variable|None): Optional output variable to store the result of *greater_than*
Returns:
Variable: The tensor variable storing the output of *greater_than*.
Examples:
.. code-block:: python
out = fluid.layers.greater_than(x=label, y=limit)
"""
helper = LayerHelper("greater_than", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
if force_init_on_cpu():
attrs['force_cpu'] = force_init_on_cpu()
helper.append_op(
type='greater_than',
inputs={'X': [x],
'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs)
return cond
@templatedoc()
def greater_equal(x, y, cond=None):
"""
This layer returns the truth value of :math:`x >= y` elementwise, which is equivalent to the overloaded operator `>=`.
Args:
x(Variable): First operand of *greater_equal*
y(Variable): Second operand of *greater_equal*
cond(Variable|None): Optional output variable to store the result of *greater_equal*
Returns:
Variable: The tensor variable storing the output of *greater_equal*.
Examples:
.. code-block:: python
out = fluid.layers.greater_equal(x=label, y=limit)
"""
helper = LayerHelper("greater_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
if force_init_on_cpu():
attrs['force_cpu'] = force_init_on_cpu()
helper.append_op(
type='greater_equal',
inputs={'X': [x],
'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs)
return cond
def equal(x, y, cond=None): def equal(x, y, cond=None):
""" """
This layer returns the truth value of :math:`x == y` elementwise. This layer returns the truth value of :math:`x == y` elementwise.
...@@ -999,6 +1109,34 @@ def equal(x, y, cond=None): ...@@ -999,6 +1109,34 @@ def equal(x, y, cond=None):
return cond return cond
def not_equal(x, y, cond=None):
"""
This layer returns the truth value of :math:`x != y` elementwise, which is equivalent to the overloader operator `!=`.
Args:
x(Variable): First operand of *not_equal*
y(Variable): Second operand of *not_equal*
cond(Variable|None): Optional output variable to store the result of *not_equal*
Returns:
Variable: The tensor variable storing the output of *not_equal*.
Examples:
.. code-block:: python
out = fluid.layers.not_equal(x=label, y=limit)
"""
helper = LayerHelper("not_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
helper.append_op(
type='not_equal', inputs={'X': [x],
'Y': [y]}, outputs={'Out': [cond]})
return cond
def array_read(array, i): def array_read(array, i):
""" """
This function performs the operation to read the data in as an This function performs the operation to read the data in as an
......
...@@ -509,14 +509,14 @@ def polygon_box_transform(input, name=None): ...@@ -509,14 +509,14 @@ def polygon_box_transform(input, name=None):
@templatedoc(op_type="yolov3_loss") @templatedoc(op_type="yolov3_loss")
def yolov3_loss(x, def yolov3_loss(x,
gtbox, gt_box,
gtlabel, gt_label,
anchors, anchors,
anchor_mask, anchor_mask,
class_num, class_num,
ignore_thresh, ignore_thresh,
downsample_ratio, downsample_ratio,
gtscore=None, gt_score=None,
use_label_smooth=True, use_label_smooth=True,
name=None): name=None):
""" """
...@@ -524,12 +524,12 @@ def yolov3_loss(x, ...@@ -524,12 +524,12 @@ def yolov3_loss(x,
Args: Args:
x (Variable): ${x_comment} x (Variable): ${x_comment}
gtbox (Variable): groud truth boxes, should be in shape of [N, B, 4], gt_box (Variable): groud truth boxes, should be in shape of [N, B, 4],
in the third dimenstion, x, y, w, h should be stored in the third dimenstion, x, y, w, h should be stored
and x, y, w, h should be relative value of input image. and x, y, w, h should be relative value of input image.
N is the batch number and B is the max box number in N is the batch number and B is the max box number in
an image. an image.
gtlabel (Variable): class id of ground truth boxes, shoud be in shape gt_label (Variable): class id of ground truth boxes, shoud be in shape
of [N, B]. of [N, B].
anchors (list|tuple): ${anchors_comment} anchors (list|tuple): ${anchors_comment}
anchor_mask (list|tuple): ${anchor_mask_comment} anchor_mask (list|tuple): ${anchor_mask_comment}
...@@ -537,7 +537,7 @@ def yolov3_loss(x, ...@@ -537,7 +537,7 @@ def yolov3_loss(x,
ignore_thresh (float): ${ignore_thresh_comment} ignore_thresh (float): ${ignore_thresh_comment}
downsample_ratio (int): ${downsample_ratio_comment} downsample_ratio (int): ${downsample_ratio_comment}
name (string): the name of yolov3 loss. Default None. name (string): the name of yolov3 loss. Default None.
gtscore (Variable): mixup score of ground truth boxes, shoud be in shape gt_score (Variable): mixup score of ground truth boxes, shoud be in shape
of [N, B]. Default None. of [N, B]. Default None.
use_label_smooth (bool): ${use_label_smooth_comment} use_label_smooth (bool): ${use_label_smooth_comment}
...@@ -558,13 +558,13 @@ def yolov3_loss(x, ...@@ -558,13 +558,13 @@ def yolov3_loss(x,
.. code-block:: python .. code-block:: python
x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
gtbox = fluid.layers.data(name='gtbox', shape=[6, 4], dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[6, 4], dtype='float32')
gtlabel = fluid.layers.data(name='gtlabel', shape=[6], dtype='int32') gt_label = fluid.layers.data(name='gt_label', shape=[6], dtype='int32')
gtscore = fluid.layers.data(name='gtscore', shape=[6], dtype='float32') gt_score = fluid.layers.data(name='gt_score', shape=[6], dtype='float32')
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
anchor_mask = [0, 1, 2] anchor_mask = [0, 1, 2]
loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, loss = fluid.layers.yolov3_loss(x=x, gt_box=gt_box, gt_label=gt_label,
gtscore=gtscore, anchors=anchors, gt_score=gt_score, anchors=anchors,
anchor_mask=anchor_mask, class_num=80, anchor_mask=anchor_mask, class_num=80,
ignore_thresh=0.7, downsample_ratio=32) ignore_thresh=0.7, downsample_ratio=32)
""" """
...@@ -572,11 +572,11 @@ def yolov3_loss(x, ...@@ -572,11 +572,11 @@ def yolov3_loss(x,
if not isinstance(x, Variable): if not isinstance(x, Variable):
raise TypeError("Input x of yolov3_loss must be Variable") raise TypeError("Input x of yolov3_loss must be Variable")
if not isinstance(gtbox, Variable): if not isinstance(gt_box, Variable):
raise TypeError("Input gtbox of yolov3_loss must be Variable") raise TypeError("Input gtbox of yolov3_loss must be Variable")
if not isinstance(gtlabel, Variable): if not isinstance(gt_label, Variable):
raise TypeError("Input gtlabel of yolov3_loss must be Variable") raise TypeError("Input gtlabel of yolov3_loss must be Variable")
if gtscore is not None and not isinstance(gtscore, Variable): if gt_score is not None and not isinstance(gt_score, Variable):
raise TypeError("Input gtscore of yolov3_loss must be Variable") raise TypeError("Input gtscore of yolov3_loss must be Variable")
if not isinstance(anchors, list) and not isinstance(anchors, tuple): if not isinstance(anchors, list) and not isinstance(anchors, tuple):
raise TypeError("Attr anchors of yolov3_loss must be list or tuple") raise TypeError("Attr anchors of yolov3_loss must be list or tuple")
...@@ -602,11 +602,11 @@ def yolov3_loss(x, ...@@ -602,11 +602,11 @@ def yolov3_loss(x,
inputs = { inputs = {
"X": x, "X": x,
"GTBox": gtbox, "GTBox": gt_box,
"GTLabel": gtlabel, "GTLabel": gt_label,
} }
if gtscore: if gt_score:
inputs["GTScore"] = gtscore inputs["GTScore"] = gt_score
attrs = { attrs = {
"anchors": anchors, "anchors": anchors,
...@@ -1542,7 +1542,7 @@ def multi_box_head(inputs, ...@@ -1542,7 +1542,7 @@ def multi_box_head(inputs,
.. code-block:: python .. code-block:: python
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head( mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv5], inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
image=images, image=images,
num_classes=21, num_classes=21,
min_ratio=20, min_ratio=20,
......
...@@ -5721,12 +5721,21 @@ def hsigmoid(input, ...@@ -5721,12 +5721,21 @@ def hsigmoid(input,
raise ValueError( raise ValueError(
"num_classes must not be less than 2 with default tree") "num_classes must not be less than 2 with default tree")
if (not is_custom) and (is_sparse):
print("Sparse mode should not be used without custom tree")
is_sparse = False
if (not is_custom) and ((path_table is not None) or
(path_code is not None)):
raise ValueError(
"only num_classes should be passed without custom tree")
if (is_custom) and (path_code is None): if (is_custom) and (path_code is None):
raise ValueError("path_code should not be None with costum tree") raise ValueError("path_code should not be None with custom tree")
elif (is_custom) and (path_table is None): elif (is_custom) and (path_table is None):
raise ValueError("path_table should not be None with costum tree") raise ValueError("path_table should not be None with custom tree")
elif (is_custom) and (num_classes is None): elif (is_custom) and (num_classes is None):
raise ValueError("num_classes should not be None with costum tree") raise ValueError("num_classes should not be None with custom tree")
else: else:
pass pass
...@@ -6269,6 +6278,8 @@ def sampled_softmax_with_cross_entropy(logits, ...@@ -6269,6 +6278,8 @@ def sampled_softmax_with_cross_entropy(logits,
sampled_label = helper.create_variable_for_type_inference(dtype='int64') sampled_label = helper.create_variable_for_type_inference(dtype='int64')
sampled_softlabel = helper.create_variable_for_type_inference( sampled_softlabel = helper.create_variable_for_type_inference(
dtype=logits.dtype) dtype=logits.dtype)
logits_dim = helper.create_variable_for_type_inference(dtype=logits.dtype)
labels_dim = helper.create_variable_for_type_inference(dtype=label.type)
helper.append_op( helper.append_op(
type='sample_logits', type='sample_logits',
...@@ -6282,7 +6293,9 @@ def sampled_softmax_with_cross_entropy(logits, ...@@ -6282,7 +6293,9 @@ def sampled_softmax_with_cross_entropy(logits,
'Samples': samples, 'Samples': samples,
'Probabilities': probabilities, 'Probabilities': probabilities,
'SampledLabels': sampled_label, 'SampledLabels': sampled_label,
'SampledLogits': sampled_logits 'SampledLogits': sampled_logits,
'LogitsDim': logits_dim,
'LabelsDim': labels_dim
}, },
attrs={ attrs={
'use_customized_samples': use_customized_samples, 'use_customized_samples': use_customized_samples,
......
...@@ -27,6 +27,7 @@ __activations_noattr__ = [ ...@@ -27,6 +27,7 @@ __activations_noattr__ = [
'tanh_shrink', 'tanh_shrink',
'softshrink', 'softshrink',
'sqrt', 'sqrt',
'rsqrt',
'abs', 'abs',
'ceil', 'ceil',
'floor', 'floor',
...@@ -81,8 +82,8 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0): ...@@ -81,8 +82,8 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0):
Examples: Examples:
.. code-block:: python .. code-block:: python
result = fluid.layers.uniform_random(shape=[32, 784]) result = fluid.layers.uniform_random(shape=[32, 784])
""" """
locals_var = locals().keys() locals_var = locals().keys()
......
...@@ -28,7 +28,7 @@ __all__ = [ ...@@ -28,7 +28,7 @@ __all__ = [
'tensor_array_to_tensor', 'concat', 'sums', 'assign', 'tensor_array_to_tensor', 'concat', 'sums', 'assign',
'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax',
'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite',
'range', 'linspace' 'range', 'linspace', 'zeros_like'
] ]
...@@ -853,3 +853,34 @@ def linspace(start, stop, num, dtype): ...@@ -853,3 +853,34 @@ def linspace(start, stop, num, dtype):
'Num': num}, 'Num': num},
outputs={'Out': [out]}) outputs={'Out': [out]})
return out return out
def zeros_like(x, out=None):
"""
**zeros_like**
This function creates a zeros tensor which has identical shape and dtype
with `x`.
Args:
x(Variable): The input tensor which specifies shape and dtype.
out(Variable): The output tensor.
Returns:
Variable: The tensor variable storing the output.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', dtype='float32', shape=[3], append_batch_size=False)
data = fluid.layers.zeros_like(x) # [0.0, 0.0, 0.0]
"""
helper = LayerHelper("zeros_like", **locals())
if out is None:
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type='fill_zeros_like', inputs={'X': [x]}, outputs={'Out': [out]})
out.stop_gradient = True
return out
...@@ -88,3 +88,19 @@ def train10(batch_size=None): ...@@ -88,3 +88,19 @@ def train10(batch_size=None):
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch', 'data_batch',
batch_size=batch_size) batch_size=batch_size)
def test10(batch_size=None):
"""
CIFAR-10 test set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return reader_creator(
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch',
batch_size=batch_size)
...@@ -89,9 +89,11 @@ def train(use_cuda, train_program, parallel, params_dirname): ...@@ -89,9 +89,11 @@ def train(use_cuda, train_program, parallel, params_dirname):
cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10), cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
batch_size=BATCH_SIZE, batch_size=BATCH_SIZE,
drop_last=False) drop_last=False)
# Use only part of the test set data validation program
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) cifar10_small_test_set.test10(BATCH_SIZE),
batch_size=BATCH_SIZE,
drop_last=False)
def event_handler(event): def event_handler(event):
if isinstance(event, EndStepEvent): if isinstance(event, EndStepEvent):
......
...@@ -474,17 +474,17 @@ class TestYoloDetection(unittest.TestCase): ...@@ -474,17 +474,17 @@ class TestYoloDetection(unittest.TestCase):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.data(name='x', shape=[30, 7, 7], dtype='float32') x = layers.data(name='x', shape=[30, 7, 7], dtype='float32')
gtbox = layers.data(name='gtbox', shape=[10, 4], dtype='float32') gt_box = layers.data(name='gt_box', shape=[10, 4], dtype='float32')
gtlabel = layers.data(name='gtlabel', shape=[10], dtype='int32') gt_label = layers.data(name='gt_label', shape=[10], dtype='int32')
gtscore = layers.data(name='gtscore', shape=[10], dtype='float32') gt_score = layers.data(name='gt_score', shape=[10], dtype='float32')
loss = layers.yolov3_loss( loss = layers.yolov3_loss(
x, x,
gtbox, gt_box,
gtlabel, [10, 13, 30, 13], [0, 1], gt_label, [10, 13, 30, 13], [0, 1],
10, 10,
0.7, 0.7,
32, 32,
gtscore=gtscore, gt_score=gt_score,
use_label_smooth=False) use_label_smooth=False)
self.assertIsNotNone(loss) self.assertIsNotNone(loss)
......
...@@ -192,6 +192,23 @@ class TestSqrt(TestActivation): ...@@ -192,6 +192,23 @@ class TestSqrt(TestActivation):
self.check_grad(['X'], 'Out', max_relative_error=0.007) self.check_grad(['X'], 'Out', max_relative_error=0.007)
class TestRsqrt(TestActivation):
def setUp(self):
self.op_type = "rsqrt"
self.init_dtype()
x = np.random.uniform(0.1, 1, [2, 3]).astype(self.dtype)
out = 1.0 / np.sqrt(x)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.0005)
class TestAbs(TestActivation): class TestAbs(TestActivation):
def setUp(self): def setUp(self):
self.op_type = "abs" self.op_type = "abs"
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
class TestDygraphMultiForward(unittest.TestCase):
def test_mnist_forward_float32(self):
seed = 90
epoch_num = 1
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
dy_param_init_value = {}
mnist.eval()
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss.numpy()
if epoch == 0 and batch_id == 0:
for param in mnist.parameters():
dy_param_init_value[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
img = fluid.layers.data(
name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
# initialize params and fetch them
static_param_init_value = {}
static_param_name_list = []
for param in mnist.parameters():
static_param_name_list.append(param.name)
out = exe.run(fluid.default_startup_program(),
fetch_list=static_param_name_list)
for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i]
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
static_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape([128, 1])
fetch_list = [avg_loss.name]
out = exe.run(
fluid.default_main_program(),
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
static_out = out[0]
self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.allclose(static_out, dy_out))
if __name__ == '__main__':
unittest.main()
...@@ -65,7 +65,9 @@ class ModelHyperParams(object): ...@@ -65,7 +65,9 @@ class ModelHyperParams(object):
# number of head used in multi-head attention. # number of head used in multi-head attention.
n_head = 8 n_head = 8
# number of sub-layers to be stacked in the encoder and decoder. # number of sub-layers to be stacked in the encoder and decoder.
n_layer = 6 # NOTE(zcd): the origin number of layer is 6, to make this unit test faster,
# we should reduce the layer number to 4.
n_layer = 4
# dropout rate used by all dropout layers. # dropout rate used by all dropout layers.
dropout = 0.1 dropout = 0.1
......
...@@ -149,5 +149,98 @@ class TestSigmoidCrossEntropyWithNorm(OpTest): ...@@ -149,5 +149,98 @@ class TestSigmoidCrossEntropyWithNorm(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
"""Test sigmoid_cross_entropy_with_logit_op with probabalistic label
"""
def setUp(self):
self.op_type = "sigmoid_cross_entropy_with_logits"
batch_size = [10, 10]
num_classes = 20
self.inputs = {
'X': logit(
np.random.uniform(0, 1, tuple(batch_size + [num_classes]))
.astype("float32")),
'Label': np.random.uniform(0, 1, tuple(batch_size + [num_classes]))
.astype("float32")
}
# Fw Pass is implemented as elementwise sigmoid followed by
# elementwise logistic loss
# Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X))
sigmoid_X = expit(self.inputs['X'])
term1 = self.inputs['Label'] * np.log(sigmoid_X)
term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
self.outputs = {'Out': -term1 - term2}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestSigmoidCrossEntropyWithNorm2(OpTest):
def setUp(self):
self.op_type = "sigmoid_cross_entropy_with_logits"
batch_size = [10, 10]
num_classes = 20
ignore_index = -1
self.inputs = {
'X': logit(
np.random.uniform(0, 1, tuple(batch_size + [num_classes]))
.astype("float32")),
'Label': np.random.randint(-1, 2, tuple(batch_size + [num_classes]))
.astype("float32")
}
self.attrs = {'ignore_index': ignore_index, 'normalize': True}
sigmoid_X = expit(self.inputs['X'])
term1 = self.inputs['Label'] * np.log(sigmoid_X)
term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
out = -term1 - term2
out[np.where(self.inputs['Label'] == ignore_index)] = 0
if self.attrs['normalize']:
out = out / float(
np.where(self.inputs['Label'] != ignore_index)[0].size)
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestSigmoidCrossEntropyWithLogitsOp6(OpTest):
"""Test sigmoid_cross_entropy_with_logit_op with binary label
"""
def setUp(self):
self.op_type = "sigmoid_cross_entropy_with_logits"
batch_size = [10, 10]
num_classes = 20
self.inputs = {
'X': logit(
np.random.uniform(0, 1, tuple(batch_size + [num_classes]))
.astype("float32")),
'Label': np.random.randint(0, 2, tuple(batch_size + [num_classes]))
.astype("float32")
}
# Fw Pass is implemented as elementwise sigmoid followed by
# elementwise logistic loss
# Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X))
sigmoid_X = expit(self.inputs['X'])
term1 = self.inputs['Label'] * np.log(sigmoid_X)
term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
self.outputs = {'Out': -term1 - term2}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册